Commit 0eb8cbf6 authored by Niels Möller's avatar Niels Möller

* x86_64/aes-decrypt-internal.asm: Rearrange register allocation.

Put SA--SD in %eax--%edx, so the second byte can be accessed as
%ah-%dh. TD is not needed, SD can be reused. Use the register that
is saved for the outer loop counter, getting it off the stack.
* x86_64/aes-encrypt-internal.asm: Likewise.

* x86_64/aes.m4 (HREG, MOVE_HREG): New macros.
(XREG): Fixed bug in handling of %r8 and %r9.
(AES_ROUND): Use MOVE_HREG.

Rev: nettle/x86_64/aes-decrypt-internal.asm:1.2
Rev: nettle/x86_64/aes-encrypt-internal.asm:1.9
Rev: nettle/x86_64/aes.m4:1.6
parent b78b93a9
......@@ -24,28 +24,25 @@ C Register usage:
C AES state, use two of them
define(<SA>,<%eax>)
define(<SB>,<%ebx>)
define(<SC>,<%ebp>)
define(<SD>,<%r9d>)
define(<SC>,<%ecx>)
define(<SD>,<%edx>)
define(<TA>,<%r10d>)
define(<TB>,<%r11d>)
define(<TC>,<%r12d>)
define(<TD>,<%r13d>)
define(<CTX>, <%rdi>)
define(<TABLE>, <%rsi>)
define(<LENGTH>,<%edx>) C Length is only 32 bits
define(<DST>, <%rcx>)
define(<PARAM_LENGTH>,<%edx>) C Length is only 32 bits
define(<PARAM_DST>, <%rcx>)
define(<SRC>, <%r8>)
define(<DST>, <%r9>)
define(<KEY>,<%r14>)
define(<COUNT>, <%r15d>)
define(<BLOCK_COUNT>, <%r13d>)
C Put the outer loop counter on the stack, and reuse the LENGTH
C register as a temporary.
define(<FRAME_COUNT>, <(%rsp)>)
define(<TMP>,<%rdx>)
define(<TMP>,<%rbp>)
.file "aes-decrypt-internal.asm"
......@@ -56,7 +53,7 @@ define(<TMP>,<%rdx>)
.text
ALIGN(4)
PROLOGUE(_nettle_aes_decrypt)
test LENGTH, LENGTH
test PARAM_LENGTH, PARAM_LENGTH
jz .Lend
C save all registers that need to be saved
......@@ -67,11 +64,9 @@ PROLOGUE(_nettle_aes_decrypt)
push %r14
push %r15
C Allocates 4 bytes more than we need, for nicer alignment.
sub $8, %rsp
shrl $4, LENGTH
movl LENGTH, FRAME_COUNT
mov PARAM_DST, DST
movl PARAM_LENGTH, BLOCK_COUNT
shrl $4, BLOCK_COUNT
.Lblock_loop:
mov CTX,KEY
......@@ -95,19 +90,19 @@ PROLOGUE(_nettle_aes_decrypt)
AES_ROUND(TABLE, SC,SB,SA,SD, TC, TMP)
xorl 8(KEY),TC
AES_ROUND(TABLE, SD,SC,SB,SA, TD, TMP)
xorl 12(KEY),TD
AES_ROUND(TABLE, SD,SC,SB,SA, SD, TMP)
xorl 12(KEY),SD
AES_ROUND(TABLE, TA,TD,TC,TB, SA, TMP)
AES_ROUND(TABLE, TA,SD,TC,TB, SA, TMP)
xorl 16(KEY), SA
AES_ROUND(TABLE, TB,TA,TD,TC, SB, TMP)
AES_ROUND(TABLE, TB,TA,SD,TC, SB, TMP)
xorl 20(KEY),SB
AES_ROUND(TABLE, TC,TB,TA,TD, SC, TMP)
AES_ROUND(TABLE, TC,TB,TA,SD, SC, TMP)
xorl 24(KEY),SC
AES_ROUND(TABLE, TD,TC,TB,TA, SD, TMP)
AES_ROUND(TABLE, SD,TC,TB,TA, SD, TMP)
xorl 28(KEY),SD
add $32,KEY C point to next key
......@@ -125,13 +120,13 @@ PROLOGUE(_nettle_aes_decrypt)
AES_ROUND(TABLE, SC,SB,SA,SD, TC, TMP)
xorl 8(KEY),TC
AES_ROUND(TABLE, SD,SC,SB,SA, TD, TMP)
xorl 12(KEY),TD
AES_ROUND(TABLE, SD,SC,SB,SA, SD, TMP)
xorl 12(KEY),SD
AES_FINAL_ROUND(TA,TD,TC,TB, TABLE, SA, TMP)
AES_FINAL_ROUND(TB,TA,TD,TC, TABLE, SB, TMP)
AES_FINAL_ROUND(TC,TB,TA,TD, TABLE, SC, TMP)
AES_FINAL_ROUND(TD,TC,TB,TA, TABLE, SD, TMP)
AES_FINAL_ROUND(TA,SD,TC,TB, TABLE, SA, TMP)
AES_FINAL_ROUND(TB,TA,SD,TC, TABLE, SB, TMP)
AES_FINAL_ROUND(TC,TB,TA,SD, TABLE, SC, TMP)
AES_FINAL_ROUND(SD,TC,TB,TA, TABLE, SD, TMP)
C Inverse S-box substitution
mov $3, COUNT
......@@ -145,11 +140,10 @@ PROLOGUE(_nettle_aes_decrypt)
AES_STORE(SA,SB,SC,SD, KEY, DST)
add $16, DST
decl FRAME_COUNT
decl BLOCK_COUNT
jnz .Lblock_loop
add $8, %rsp
pop %r15
pop %r14
pop %r13
......
C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2001, 2002, 2005, 2008 Rafael R. Sevilla, Niels Mller
......@@ -25,28 +24,25 @@ C Register usage:
C AES state, use two of them
define(<SA>,<%eax>)
define(<SB>,<%ebx>)
define(<SC>,<%ebp>)
define(<SD>,<%r9d>)
define(<SC>,<%ecx>)
define(<SD>,<%edx>)
define(<TA>,<%r10d>)
define(<TB>,<%r11d>)
define(<TC>,<%r12d>)
define(<TD>,<%r13d>)
define(<CTX>, <%rdi>)
define(<TABLE>, <%rsi>)
define(<LENGTH>,<%edx>) C Length is only 32 bits
define(<DST>, <%rcx>)
define(<PARAM_LENGTH>,<%edx>) C Length is only 32 bits
define(<PARAM_DST>, <%rcx>)
define(<SRC>, <%r8>)
define(<DST>, <%r9>)
define(<KEY>,<%r14>)
define(<COUNT>, <%r15d>)
define(<BLOCK_COUNT>, <%r13d>)
C Put the outer loop counter on the stack, and reuse the LENGTH
C register as a temporary.
define(<FRAME_COUNT>, <(%rsp)>)
define(<TMP>,<%rdx>)
define(<TMP>,<%rbp>)
.file "aes-encrypt-internal.asm"
......@@ -57,7 +53,7 @@ define(<TMP>,<%rdx>)
.text
ALIGN(4)
PROLOGUE(_nettle_aes_encrypt)
test LENGTH, LENGTH
test PARAM_LENGTH, PARAM_LENGTH
jz .Lend
C save all registers that need to be saved
......@@ -68,11 +64,9 @@ PROLOGUE(_nettle_aes_encrypt)
push %r14
push %r15
C Allocates 4 bytes more than we need, for nicer alignment.
sub $8, %rsp
shrl $4, LENGTH
movl LENGTH, FRAME_COUNT
mov PARAM_DST, DST
movl PARAM_LENGTH, BLOCK_COUNT
shrl $4, BLOCK_COUNT
.Lblock_loop:
mov CTX,KEY
......@@ -96,19 +90,19 @@ PROLOGUE(_nettle_aes_encrypt)
AES_ROUND(TABLE, SC,SD,SA,SB, TC, TMP)
xorl 8(KEY),TC
AES_ROUND(TABLE, SD,SA,SB,SC, TD, TMP)
xorl 12(KEY),TD
AES_ROUND(TABLE, SD,SA,SB,SC, SD, TMP)
xorl 12(KEY),SD
AES_ROUND(TABLE, TA,TB,TC,TD, SA, TMP)
AES_ROUND(TABLE, TA,TB,TC,SD, SA, TMP)
xorl 16(KEY), SA
AES_ROUND(TABLE, TB,TC,TD,TA, SB, TMP)
AES_ROUND(TABLE, TB,TC,SD,TA, SB, TMP)
xorl 20(KEY),SB
AES_ROUND(TABLE, TC,TD,TA,TB, SC, TMP)
AES_ROUND(TABLE, TC,SD,TA,TB, SC, TMP)
xorl 24(KEY),SC
AES_ROUND(TABLE, TD,TA,TB,TC, SD, TMP)
AES_ROUND(TABLE, SD,TA,TB,TC, SD, TMP)
xorl 28(KEY),SD
add $32,KEY C point to next key
......@@ -126,13 +120,13 @@ PROLOGUE(_nettle_aes_encrypt)
AES_ROUND(TABLE, SC,SD,SA,SB, TC, TMP)
xorl 8(KEY),TC
AES_ROUND(TABLE, SD,SA,SB,SC, TD, TMP)
xorl 12(KEY),TD
AES_ROUND(TABLE, SD,SA,SB,SC, SD, TMP)
xorl 12(KEY),SD
AES_FINAL_ROUND(TA,TB,TC,TD, TABLE, SA, TMP)
AES_FINAL_ROUND(TB,TC,TD,TA, TABLE, SB, TMP)
AES_FINAL_ROUND(TC,TD,TA,TB, TABLE, SC, TMP)
AES_FINAL_ROUND(TD,TA,TB,TC, TABLE, SD, TMP)
AES_FINAL_ROUND(TA,TB,TC,SD, TABLE, SA, TMP)
AES_FINAL_ROUND(TB,TC,SD,TA, TABLE, SB, TMP)
AES_FINAL_ROUND(TC,SD,TA,TB, TABLE, SC, TMP)
AES_FINAL_ROUND(SD,TA,TB,TC, TABLE, SD, TMP)
C S-box substitution
mov $3, COUNT
......@@ -146,11 +140,10 @@ PROLOGUE(_nettle_aes_encrypt)
AES_STORE(SA,SB,SC,SD, KEY, DST)
add $16, DST
decl FRAME_COUNT
decl BLOCK_COUNT
jnz .Lblock_loop
add $8, %rsp
pop %r15
pop %r14
pop %r13
......
......@@ -17,6 +17,28 @@ define(<LREG>,<ifelse(
$1, %r14d, %r14b,
$1, %r15d, %r15b)>)dnl
define(<HREG>,<ifelse(
$1, %eax, %ah,
$1, %ebx, %bh,
$1, %ecx, %ch,
$1, %edx, %dh,
error)>)
dnl MOVE_HREG(src, dst)
define(<MOVE_HREG>, <ifelse(
$1, %eax, <movzb %ah, $2
>,
$1, %ebx, <movzb %bh, $2
>,
$1, %ecx, <movzb %ch, $2
>,
$1, %edx, <movzb %dh, $2
>,
<movl $1, $2
shr <$>8, $2
and <$>0xff, $2
>)>)
define(<XREG>,<ifelse(
$1, %rax, %eax,
$1, %rbx, %ebx,
......@@ -26,8 +48,8 @@ define(<XREG>,<ifelse(
$1, %rdi, %edi,
$1, %rbp, %ebp,
$1, %rsp, %esp,
$1, %r8d, %r8d,
$1, %r9d, %r9d,
$1, %r8, %r8d,
$1, %r9, %r9d,
$1, %r10,%r10d,
$1, %r11,%r11d,
$1, %r12,%r12d,
......@@ -72,9 +94,7 @@ dnl Computes one word of the AES round. Leaves result in $6.
define(<AES_ROUND>, <
movzb LREG($2), $7
movl AES_TABLE0 ($1, $7, 4),$6
movl $3, XREG($7)
shr <$>8,$7
and <$>0xff,$7
MOVE_HREG($3, XREG($7))
xorl AES_TABLE1 ($1, $7, 4),$6
movl $4,XREG($7)
shr <$>16,$7
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment