diff --git a/sparc/aes.asm b/sparc/aes.asm
index ec1283af5e72f1839fdb3b429865775f254c1a27..63cdb31cfc15f6d02cb283aed5656d595d74fccd 100644
--- a/sparc/aes.asm
+++ b/sparc/aes.asm
@@ -103,7 +103,53 @@ define(<AES_ROUND>, <
 	xor	t0, t3, t0		! 0, 1, 2, 3
 	xor	t0, t1, t0
 	st	t0, [tmp + $1]>)
+
+C AES_FINAL_ROUND(i)
+C Compute one word in the final round function. 
+C Input in wtxt, output stored in tmp + i.
+C
+C The comments mark which j in T->table[j][ Bj(wtxt[IDXi(i)]) ]
+C the instruction is a part of. 
+C
+C The code uses the register %o[j], aka tj, as the primary 
+C register for that sub-expression. True for j==1,3.
+define(<AES_FINAL_ROUND>, <
+	ld	[IDX1+$1], t1		! 1
+	ldub	[wtxt+t1], t1		! 1
+
+	ld	[wtxt+$1], t0		! 0
+	! IDX2(j) = j XOR 2
+	lduh	[wtxt+eval($1 ^ 8)], t2	! 2
+	and	t0, 255, t0		! 0
+	ld	[IDX3 + $1], t3		! 3
 	
+	and	t2, 255, t2		! 2
+	ldub	[T+t1], t1		! 1
+	ldub	[T+t0], t0		! 0
+	sll	t1, 8, t1		! 1
+	
+	ldub	[wtxt+t3], t3		! 3
+	or	t0, t1, t0		! 0, 1
+	ldub	[T+t2], t2		! 2
+	ldub	[T+t3], t3		! 3
+	
+	sll	t2, 16, t2		! 2
+	or	t0, t2, t0		! 0, 1, 2
+	ld	[key + $1], t2
+	sll	t3, 24, t3		! 3
+	
+	or	t0, t3, t0		! 0, 1, 2, 3
+	xor	t0, t2, t0
+	
+	srl	t0, 24, t3
+	srl	t0, 16, t2
+	srl	t0, 8, t1
+	stb	t1, [dst++1]
+	
+	stb	t3, [dst+3]
+	stb	t2, [dst+2]
+	stb	t0, [dst]
+	add	dst, 4, dst>)
 	
 C The stack frame looks like
 C
@@ -200,42 +246,43 @@ _aes_crypt:
 	C Unrolled final loop begins
 
 	C i = 0
-	ld	[IDX1+0], t1 	! 1
-	ldub	[wtxt+t1], t1	! 1
-
-	ld	[wtxt+0], t0	! 0
-	! IDX2(j) = j XOR 2
-	lduh	[wtxt+8], t2	! 2
-	and	t0, 255, t0	! 0
-	ld	[IDX3 + 0], t3	! 3
-	
-	and	t2, 255, t2	! 2
-	ldub	[T+t1], t1	! 1
-	ldub	[T+t0], t0	! 0
-	sll	t1, 8, t1	! 1
-	
-	ldub	[wtxt+t3], t3	! 3
-	or	t0, t1, t0	! 0, 1
-	ldub	[T+t2], t2	! 2
-	ldub	[T+t3], t3	! 3
-	
-	sll	t2, 16, t2	! 2
-	or	t0, t2, t0	! 0, 1, 2
-	ld	[key + 0], t2
-	sll	t3, 24, t3	! 3
-	
-	or	t0, t3, t0	! 0, 1, 2, 3
-	xor	t0, t2, t0
-	
-	srl	t0, 24, t3
-	srl	t0, 16, t2
-	srl	t0, 8, t1
-	stb	t1, [dst+1]
-	
-	stb	t3, [dst+3]
-	stb	t2, [dst+2]
-	stb	t0, [dst]
-	add	dst, 4, dst
+	AES_FINAL_ROUND(0)
+C 	ld	[IDX1+0], t1 	! 1
+C 	ldub	[wtxt+t1], t1	! 1
+C 
+C 	ld	[wtxt+0], t0	! 0
+C 	! IDX2(j) = j XOR 2
+C 	lduh	[wtxt+8], t2	! 2
+C 	and	t0, 255, t0	! 0
+C 	ld	[IDX3 + 0], t3	! 3
+C 	
+C 	and	t2, 255, t2	! 2
+C 	ldub	[T+t1], t1	! 1
+C 	ldub	[T+t0], t0	! 0
+C 	sll	t1, 8, t1	! 1
+C 	
+C 	ldub	[wtxt+t3], t3	! 3
+C 	or	t0, t1, t0	! 0, 1
+C 	ldub	[T+t2], t2	! 2
+C 	ldub	[T+t3], t3	! 3
+C 	
+C 	sll	t2, 16, t2	! 2
+C 	or	t0, t2, t0	! 0, 1, 2
+C 	ld	[key + 0], t2
+C 	sll	t3, 24, t3	! 3
+C 	
+C 	or	t0, t3, t0	! 0, 1, 2, 3
+C 	xor	t0, t2, t0
+C 	
+C 	srl	t0, 24, t3
+C 	srl	t0, 16, t2
+C 	srl	t0, 8, t1
+C 	stb	t1, [dst+1]
+C 	
+C 	stb	t3, [dst+3]
+C 	stb	t2, [dst+2]
+C 	stb	t0, [dst]
+C 	add	dst, 4, dst
 	
 	C i = 1
 	ld	[IDX1+4], t1 	! 1