diff --git a/sparc/aes.asm b/sparc/aes.asm
index e7d007741952b306e5d74d70b1a06afa35a6ab74..fd952c5ab1d80384856085a7565fb6992651cbf6 100644
--- a/sparc/aes.asm
+++ b/sparc/aes.asm
@@ -66,19 +66,38 @@ define(t3, %o3)
 C AES_LOAD(i)
 C Get one word of input, XOR with first subkey, store in wtxt
 define(<AES_LOAD>, <
-	ldub	[src + $1], t0
-	ldub	[src + $1 + 1], t1
-	sll	t1, 8, t1
-	or	t0, t1, t0
-	ldub	[src + $1 + 2], t2
+	ldub	[src+3], t3
+	ldub	[src+2], t2
+	sll	t3, 24, t3
+	ldub	[src+1], t1
+	
 	sll	t2, 16, t2
-	or	t0, t2, t0
-	ldub	[src + $1 + 3], t1
-	sll	t1, 24, t1
-	or	t0, t1, t0
-	ld	[key + $1], t2
-	xor	t0, t2, t0
-	st	t0, [wtxt + $1]
+	or	t3, t2, t3
+	ldub	[src], t0
+	sll	t1, 8, t1
+	
+	! Get subkey
+	ld	[ctx + 0], t2
+	or	t3, t1, t3
+	or	t3, t0, t3
+	xor	t3, t2, t3
+	
+	st	t3, [wtxt+0]
+	add	src, 4, src
+	
+	C ldub	[src + $1], t0
+	C ldub	[src + $1 + 1], t1
+	C sll	t1, 8, t1
+	C or	t0, t1, t0
+	C ldub	[src + $1 + 2], t2
+	C sll	t2, 16, t2
+	C or	t0, t2, t0
+	C ldub	[src + $1 + 3], t1
+	C sll	t1, 24, t1
+	C or	t0, t1, t0
+	C ld	[key + $1], t2
+	C xor	t0, t2, t0
+	C st	t0, [wtxt + $1]
 	>)dnl
 
 C AES_ROUND(i)
@@ -225,14 +244,12 @@ C .Lsource_loop:
 	sll	t1, 8, t1
 	
 	! Get subkey
-	ld	[src+%g2], t2
+	ld	[ctx + 0], t2
 	or	t3, t1, t3
 	or	t3, t0, t3
 	xor	t3, t2, t3
 	
-	C cmp	src, %g1
-	st	t3, [src+%g3]
-	C bleu	.Lsource_loop
+	st	t3, [wtxt+0]
 	add	src, 4, src
 
 	C i = 1
@@ -247,13 +264,13 @@ C .Lsource_loop:
 	sll	t1, 8, t1
 	
 	! Get subkey
-	ld	[src+%g2], t2
+	ld	[ctx + 4], t2
 	or	t3, t1, t3
 	or	t3, t0, t3
 	xor	t3, t2, t3
 	
 	C cmp	src, %g1
-	st	t3, [src+%g3]
+	st	t3, [wtxt + 4]
 	C bleu	.Lsource_loop
 	add	src, 4, src
 	C i = 2