From f3aebef10130047b8f0f2d8cb3c7a4c559b75392 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Fri, 6 Feb 2004 09:58:19 +0100
Subject: [PATCH] Assembler implementation of sha1_compress. (Not yet working).

Rev: src/nettle/x86/sha1-compress.asm:1.1
---
 x86/sha1-compress.asm | 248 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 248 insertions(+)
 create mode 100644 x86/sha1-compress.asm

diff --git a/x86/sha1-compress.asm b/x86/sha1-compress.asm
new file mode 100644
index 00000000..5fbd7a06
--- /dev/null
+++ b/x86/sha1-compress.asm
@@ -0,0 +1,248 @@
+C nettle, low-level cryptographics library
+C 
+C Copyright (C) 2004, Niels Möller
+C  
+C The nettle library is free software; you can redistribute it and/or modify
+C it under the terms of the GNU Lesser General Public License as published by
+C the Free Software Foundation; either version 2.1 of the License, or (at your
+C option) any later version.
+C 
+C The nettle library is distributed in the hope that it will be useful, but
+C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+C or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+C License for more details.
+C 
+C You should have received a copy of the GNU Lesser General Public License
+C along with the nettle library; see the file COPYING.LIB.  If not, write to
+C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+C MA 02111-1307, USA.
+
+C Register usage
+define(<SA>,<%eax>)
+define(<SB>,<%ebx>)
+define(<SC>,<%ecx>)
+define(<SD>,<%edx>)
+define(<SE>,<%ebp>)
+define(<DATA>,<%esi>)
+define(<TMP>,<%edi>)
+C Extra temporary needed by F3. Can we get rid of it?
+define(<TMP2>,<(%esp)>)
+C Constants
+define(<K1>, <<$>0x5A827999>)		C  Rounds  0-19
+define(<K2>, <<$>0x6ED9EBA1>)		C  Rounds 20-39
+define(<K3>, <<$>0x8F1BBCDC>)		C  Rounds 40-59
+define(<K4>, <<$>0xCA62C1D6>)		C  Rounds 60-79
+C expand(i) is the expansion function
+C
+C   W[i] = W[i - 16] ^ W[i - 14] ^ W[i - 8] ^ W[i - 3]
+C
+C where W[i] is stored in DATA[i & 15].
+C
+C Result is stored back in W[i], and also left in TMP, the only register that is used.
+define(<EXPAND>, <
+	movl	eval(4 *        ($1 & 15)) (DATA), TMP
+	xorl	eval(4 * (($1 +  2) & 15)) (DATA), TMP
+	xorl	eval(4 * (($1 +  8) & 15)) (DATA), TMP
+	xorl	eval(4 * (($1 + 13) & 15)) (DATA), TMP
+	movl	TMP, eval(4 * ($1 & 15)) (DATA)	
+>)dnl
+define(<NOEXPAND>, <eval(4 * ($1 & 15)) (DATA)>)dnl
+C The f functions,
+C
+C  f1(x,y,z) = z ^ (x & (y ^ z))
+C  f2(x,y,z) = x ^ y ^ z
+C  f3(x,y,z) = (x & y) | (z & (x | y))
+C  f4 = f2
+C
+C The macro Fk(x,y,z) computes = fk(x,y,z). 
+C Result is left in TMP. May the TMP2 stackslot.
+define(<F1>, <
+	movl	$3, TMP
+	xorl	$2, TMP
+	andl	$1, TMP
+	xorl	$3, TMP
+>)dnl
+define(<F2>, <
+	movl	$1, TMP
+	xorl	$2, TMP
+	xorl	$3, TMP
+>)dnl
+C We have one register too little. Can this be rewritten so we don't need the stack?
+define(<F3>, <
+	movl	$1, TMP
+	andl	$2, TMP
+	movl	TMP, TMP2
+	movl	$1, TMP
+	orl	$2, TMP
+	andl	$3, TMP
+	orl	TMP2, TMP
+>)dnl
+C The form of the sha1 subround is
+C
+C   a' = e + a <<< 5 + f( b, c, d ) + k + w;
+C   b' = a;
+C   c' = b <<< 30;
+C   d' = c;
+C   e' = d;
+C
+C where <<< denotes rotation. We permute our variables, so that we
+C instead get
+C
+C   e += a <<< 5 + f( b, c, d ) + k + w;
+C   b <<<= 30
+C
+C round(a,b,c,d,e,f,k,w)
+define(<round>, <
+	addl	$7, $5
+	addl	$8, $5
+	$6($2,$3,$4)
+	addl	TMP, $5
+	movl	$1, TMP
+	roll	<$>5, TMP
+	addl	TMP, $5
+	roll	<$>30, $2
+>)dnl
+
+	.file "sha1-compress.asm"
+
+	C sha1_compress(uint32_t *state, uint32_t *data)
+	
+	.text
+	.align 16
+	.globl _nettle_sha1_compress
+	.type  _nettle_sha1_compress,@function
+_nettle_sha1_compress:
+	C save all registers that need to be saved
+	
+	pushl	%ebx		C  16(%esp)
+	pushl	%ebp		C  12(%esp)
+	pushl	%esi		C  8(%esp)
+	pushl	%edi		C  4(%esp)
+	subl	$4, %esp	C  0(%esp) = TMP
+
+	C load the state vector
+	movl	24(%esp),TMP
+	movl	(TMP),   SA
+	movl	4(TMP),  SB
+	movl	8(TMP),  SC
+	movl	12(TMP), SD
+	movl	16(TMP), SE
+
+	movl	28(%esp), DATA
+
+	round(SA, SB, SC, SD, SE, <F1>, K1, NOEXPAND( 0))
+	round(SE, SA, SB, SC, SD, <F1>, K1, NOEXPAND( 1))
+	round(SD, SE, SA, SB, SC, <F1>, K1, NOEXPAND( 2))
+	round(SC, SD, SE, SA, SB, <F1>, K1, NOEXPAND( 3))
+	round(SB, SC, SD, SE, SA, <F1>, K1, NOEXPAND( 4))
+
+	round(SA, SB, SC, SD, SE, <F1>, K1, NOEXPAND( 5))
+	round(SE, SA, SB, SC, SD, <F1>, K1, NOEXPAND( 6))
+	round(SD, SE, SA, SB, SC, <F1>, K1, NOEXPAND( 7))
+	round(SC, SD, SE, SA, SB, <F1>, K1, NOEXPAND( 8))
+	round(SB, SC, SD, SE, SA, <F1>, K1, NOEXPAND( 9))
+
+	round(SA, SB, SC, SD, SE, <F1>, K1, NOEXPAND(10))
+	round(SE, SA, SB, SC, SD, <F1>, K1, NOEXPAND(11))
+	round(SD, SE, SA, SB, SC, <F1>, K1, NOEXPAND(12))
+	round(SC, SD, SE, SA, SB, <F1>, K1, NOEXPAND(13))
+	round(SB, SC, SD, SE, SA, <F1>, K1, NOEXPAND(14))
+
+		   round(SA, SB, SC, SD, SE, <F1>, K1, NOEXPAND(15))
+	EXPAND(16) round(SE, SA, SB, SC, SD, <F1>, K1, TMP)
+	EXPAND(17) round(SD, SE, SA, SB, SC, <F1>, K1, TMP)
+	EXPAND(18) round(SC, SD, SE, SA, SB, <F1>, K1, TMP)
+	EXPAND(19) round(SB, SC, SD, SE, SA, <F1>, K1, TMP)
+
+
+	EXPAND(20) round(SA, SB, SC, SD, SE, <F2>, K2, TMP)
+	EXPAND(21) round(SE, SA, SB, SC, SD, <F2>, K2, TMP)
+	EXPAND(22) round(SD, SE, SA, SB, SC, <F2>, K2, TMP)
+	EXPAND(23) round(SC, SD, SE, SA, SB, <F2>, K2, TMP)
+	EXPAND(24) round(SB, SC, SD, SE, SA, <F2>, K2, TMP)
+
+	EXPAND(25) round(SA, SB, SC, SD, SE, <F2>, K2, TMP)
+	EXPAND(26) round(SE, SA, SB, SC, SD, <F2>, K2, TMP)
+	EXPAND(27) round(SD, SE, SA, SB, SC, <F2>, K2, TMP)
+	EXPAND(28) round(SC, SD, SE, SA, SB, <F2>, K2, TMP)
+	EXPAND(29) round(SB, SC, SD, SE, SA, <F2>, K2, TMP)
+
+	EXPAND(30) round(SA, SB, SC, SD, SE, <F2>, K2, TMP)
+	EXPAND(31) round(SE, SA, SB, SC, SD, <F2>, K2, TMP)
+	EXPAND(32) round(SD, SE, SA, SB, SC, <F2>, K2, TMP)
+	EXPAND(33) round(SC, SD, SE, SA, SB, <F2>, K2, TMP)
+	EXPAND(34) round(SB, SC, SD, SE, SA, <F2>, K2, TMP)
+
+	EXPAND(35) round(SA, SB, SC, SD, SE, <F2>, K2, TMP)
+	EXPAND(36) round(SE, SA, SB, SC, SD, <F2>, K2, TMP)
+	EXPAND(37) round(SD, SE, SA, SB, SC, <F2>, K2, TMP)
+	EXPAND(38) round(SC, SD, SE, SA, SB, <F2>, K2, TMP)
+	EXPAND(39) round(SB, SC, SD, SE, SA, <F2>, K2, TMP)
+
+
+	EXPAND(40) round(SA, SB, SC, SD, SE, <F3>, K3, TMP)
+	EXPAND(41) round(SE, SA, SB, SC, SD, <F3>, K3, TMP)
+	EXPAND(42) round(SD, SE, SA, SB, SC, <F3>, K3, TMP)
+	EXPAND(43) round(SC, SD, SE, SA, SB, <F3>, K3, TMP)
+	EXPAND(44) round(SB, SC, SD, SE, SA, <F3>, K3, TMP)
+
+	EXPAND(45) round(SA, SB, SC, SD, SE, <F3>, K3, TMP)
+	EXPAND(46) round(SE, SA, SB, SC, SD, <F3>, K3, TMP)
+	EXPAND(47) round(SD, SE, SA, SB, SC, <F3>, K3, TMP)
+	EXPAND(48) round(SC, SD, SE, SA, SB, <F3>, K3, TMP)
+	EXPAND(49) round(SB, SC, SD, SE, SA, <F3>, K3, TMP)
+
+	EXPAND(50) round(SA, SB, SC, SD, SE, <F3>, K3, TMP)
+	EXPAND(51) round(SE, SA, SB, SC, SD, <F3>, K3, TMP)
+	EXPAND(52) round(SD, SE, SA, SB, SC, <F3>, K3, TMP)
+	EXPAND(53) round(SC, SD, SE, SA, SB, <F3>, K3, TMP)
+	EXPAND(54) round(SB, SC, SD, SE, SA, <F3>, K3, TMP)
+
+	EXPAND(55) round(SA, SB, SC, SD, SE, <F3>, K3, TMP)
+	EXPAND(56) round(SE, SA, SB, SC, SD, <F3>, K3, TMP)
+	EXPAND(57) round(SD, SE, SA, SB, SC, <F3>, K3, TMP)
+	EXPAND(58) round(SC, SD, SE, SA, SB, <F3>, K3, TMP)
+	EXPAND(59) round(SB, SC, SD, SE, SA, <F3>, K3, TMP)
+
+
+	EXPAND(60) round(SA, SB, SC, SD, SE, <F2>, K4, TMP)
+	EXPAND(61) round(SE, SA, SB, SC, SD, <F2>, K4, TMP)
+	EXPAND(62) round(SD, SE, SA, SB, SC, <F2>, K4, TMP)
+	EXPAND(63) round(SC, SD, SE, SA, SB, <F2>, K4, TMP)
+	EXPAND(64) round(SB, SC, SD, SE, SA, <F2>, K4, TMP)
+
+	EXPAND(65) round(SA, SB, SC, SD, SE, <F2>, K4, TMP)
+	EXPAND(66) round(SE, SA, SB, SC, SD, <F2>, K4, TMP)
+	EXPAND(67) round(SD, SE, SA, SB, SC, <F2>, K4, TMP)
+	EXPAND(68) round(SC, SD, SE, SA, SB, <F2>, K4, TMP)
+	EXPAND(69) round(SB, SC, SD, SE, SA, <F2>, K4, TMP)
+
+	EXPAND(70) round(SA, SB, SC, SD, SE, <F2>, K4, TMP)
+	EXPAND(71) round(SE, SA, SB, SC, SD, <F2>, K4, TMP)
+	EXPAND(72) round(SD, SE, SA, SB, SC, <F2>, K4, TMP)
+	EXPAND(73) round(SC, SD, SE, SA, SB, <F2>, K4, TMP)
+	EXPAND(74) round(SB, SC, SD, SE, SA, <F2>, K4, TMP)
+
+	EXPAND(75) round(SA, SB, SC, SD, SE, <F2>, K4, TMP)
+	EXPAND(76) round(SE, SA, SB, SC, SD, <F2>, K4, TMP)
+	EXPAND(77) round(SD, SE, SA, SB, SC, <F2>, K4, TMP)
+	EXPAND(78) round(SC, SD, SE, SA, SB, <F2>, K4, TMP)
+	EXPAND(79) round(SB, SC, SD, SE, SA, <F2>, K4, TMP)
+
+	C Update the state vector
+	movl	24(%esp),TMP
+	addl	SA, (TMP) 
+	addl	SB, 4(TMP) 
+	addl	SC, 8(TMP) 
+	addl	SD, 12(TMP) 
+	addl	SE, 16(TMP)
+
+	addl	$4, %esp
+	popl	%edi
+	popl	%esi
+	popl	%ebp
+	popl	%ebx
+	ret
+
+.Leord:
+	.size	_nettle_sha1_compress,.Leord-_nettle_sha1_compress
-- 
GitLab