From bbbf795b67f6e8fcc1cbda06e88de90de9ff72e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Mon, 15 Sep 2008 21:32:01 +0200
Subject: [PATCH] New file.

Rev: nettle/x86_64/aes-decrypt-internal.asm:1.1
---
 x86_64/aes-decrypt-internal.asm | 161 ++++++++++++++++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100644 x86_64/aes-decrypt-internal.asm

diff --git a/x86_64/aes-decrypt-internal.asm b/x86_64/aes-decrypt-internal.asm
new file mode 100644
index 00000000..d3a48f26
--- /dev/null
+++ b/x86_64/aes-decrypt-internal.asm
@@ -0,0 +1,161 @@
+C nettle, low-level cryptographics library
+C 
+C Copyright (C) 2001, 2002, 2005, 2008 Rafael R. Sevilla, Niels M�ller
+C  
+C The nettle library is free software; you can redistribute it and/or modify
+C it under the terms of the GNU Lesser General Public License as published by
+C the Free Software Foundation; either version 2.1 of the License, or (at your
+C option) any later version.
+C 
+C The nettle library is distributed in the hope that it will be useful, but
+C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+C or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+C License for more details.
+C 
+C You should have received a copy of the GNU Lesser General Public License
+C along with the nettle library; see the file COPYING.LIB.  If not, write to
+C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+C MA 02111-1307, USA.
+
+include_src(<x86_64/aes.m4>)
+
+C Register usage:
+
+C AES state, use two of them
+define(<SA>,<%eax>)
+define(<SB>,<%ebx>)
+define(<SC>,<%ebp>)
+define(<SD>,<%r9d>)
+
+define(<TA>,<%r10d>)
+define(<TB>,<%r11d>)
+define(<TC>,<%r12d>)
+define(<TD>,<%r13d>)
+
+define(<CTX>,	<%rdi>)
+define(<TABLE>,	<%rsi>)
+define(<LENGTH>,<%edx>)		C Length is only 32 bits
+define(<DST>,	<%rcx>)
+define(<SRC>,	<%r8>)
+
+define(<KEY>,<%r14>)
+define(<COUNT>,	<%r15d>)
+
+C Put the outer loop counter on the stack, and reuse the LENGTH
+C register as a temporary. 
+	
+define(<FRAME_COUNT>,	<(%rsp)>)
+define(<TMP>,<%rdx>)
+
+	.file "aes-decrypt-internal.asm"
+	
+	C _aes_decrypt(struct aes_context *ctx, 
+	C	       const struct aes_table *T,
+	C	       unsigned length, uint8_t *dst,
+	C	       uint8_t *src)
+	.text
+	ALIGN(4)
+PROLOGUE(_nettle_aes_decrypt)
+	test	LENGTH, LENGTH
+	jz	.Lend
+
+        C save all registers that need to be saved
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15	
+
+	C Allocates 4 bytes more than we need, for nicer alignment.
+	sub	$8, %rsp
+
+	shrl	$4, LENGTH
+	movl	LENGTH, FRAME_COUNT
+.Lblock_loop:
+	mov	CTX,KEY
+	
+	AES_LOAD(SA, SB, SC, SD, SRC, KEY)
+	add	$16, SRC	C Increment src pointer
+
+	C  get number of rounds to do from ctx struct	
+	movl	AES_NROUNDS (CTX), COUNT
+	shrl	$1, COUNT
+	subl	$1, COUNT
+
+	add	$16,KEY		C  point to next key
+	ALIGN(4)
+.Lround_loop:
+	AES_ROUND(TABLE, SA,SD,SC,SB, TA, TMP)
+	xorl	(KEY), TA
+
+	AES_ROUND(TABLE, SB,SA,SD,SC, TB, TMP)
+	xorl	4(KEY),TB
+
+	AES_ROUND(TABLE, SC,SB,SA,SD, TC, TMP)
+	xorl	8(KEY),TC
+
+	AES_ROUND(TABLE, SD,SC,SB,SA, TD, TMP)
+	xorl	12(KEY),TD
+
+	AES_ROUND(TABLE, TA,TD,TC,TB, SA, TMP)
+	xorl	16(KEY), SA
+
+	AES_ROUND(TABLE, TB,TA,TD,TC, SB, TMP)
+	xorl	20(KEY),SB
+
+	AES_ROUND(TABLE, TC,TB,TA,TD, SC, TMP)
+	xorl	24(KEY),SC
+
+	AES_ROUND(TABLE, TD,TC,TB,TA, SD, TMP)
+	xorl	28(KEY),SD
+	
+	add	$32,KEY	C  point to next key
+	decl	COUNT
+	jnz	.Lround_loop
+
+	C last two rounds
+
+	AES_ROUND(TABLE, SA,SD,SC,SB, TA, TMP)
+	xorl	(KEY), TA
+
+	AES_ROUND(TABLE, SB,SA,SD,SC, TB, TMP)
+	xorl	4(KEY),TB
+
+	AES_ROUND(TABLE, SC,SB,SA,SD, TC, TMP)
+	xorl	8(KEY),TC
+
+	AES_ROUND(TABLE, SD,SC,SB,SA, TD, TMP)
+	xorl	12(KEY),TD
+
+	AES_FINAL_ROUND(TA,TD,TC,TB, TABLE, SA, TMP)
+	AES_FINAL_ROUND(TB,TA,TD,TC, TABLE, SB, TMP)
+	AES_FINAL_ROUND(TC,TB,TA,TD, TABLE, SC, TMP)
+	AES_FINAL_ROUND(TD,TC,TB,TA, TABLE, SD, TMP)
+
+	C Inverse S-box substitution
+	mov	$3, COUNT
+.Lsubst:
+	AES_SUBST_BYTE(SA,SB,SC,SD, TABLE, TMP)
+
+	decl	COUNT
+	jnz	.Lsubst
+
+	C Add last subkey, and store decrypted data
+	AES_STORE(SA,SB,SC,SD, KEY, DST)
+	
+	add	$16, DST
+	decl	FRAME_COUNT
+
+	jnz	.Lblock_loop
+
+	add	$8, %rsp
+	pop	%r15	
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+.Lend:
+	ret
+EPILOGUE(_nettle_aes_decrypt)
-- 
GitLab