Skip to content
Snippets Groups Projects
Select Git revision
1 result Searching

COPYRIGHT

Blame
  • aes-encrypt-internal.asm 3.83 KiB
    C -*- mode: asm; asm-comment-char: ?C; -*-  
    C nettle, low-level cryptographics library
    C 
    C Copyright (C) 2002, 2005 Niels Mller
    C  
    C The nettle library is free software; you can redistribute it and/or modify
    C it under the terms of the GNU Lesser General Public License as published by
    C the Free Software Foundation; either version 2.1 of the License, or (at your
    C option) any later version.
    C 
    C The nettle library is distributed in the hope that it will be useful, but
    C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    C or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
    C License for more details.
    C 
    C You should have received a copy of the GNU Lesser General Public License
    C along with the nettle library; see the file COPYING.LIB.  If not, write to
    C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
    C MA 02111-1307, USA.
    
    
    C	Arguments
    define(<CTX>,	<%i0>)
    define(<T>,	<%i1>)
    define(<LENGTH>,<%i2>)
    define(<DST>,	<%i3>)
    define(<SRC>,	<%i4>)
    
    C	AES state, two copies for unrolling
    
    define(<W0>,	<%l0>)
    define(<W1>,	<%l1>)
    define(<W2>,	<%l2>)
    define(<W3>,	<%l3>)
    
    define(<X0>,	<%l4>)
    define(<X1>,	<%l5>)
    define(<X2>,	<%l6>)
    define(<X3>,	<%l7>)
    
    C	%o0-%03 are used for loop invariants T0-T3
    define(<KEY>,	<%o4>)
    define(<ROUND>, <%o5>)
    
    C %g1 and %g2 are TMP1 and TMP2
    		
    
    C Registers %g1-%g3 and %o0 - %o5 are free to use.
    
    C The sparc32 stack frame looks like
    C
    C %fp -   4: OS-dependent link field
    C %fp -   8: OS-dependent link field
    C %fp -  24: tmp, uint32_t[4]
    C %fp -  40: wtxt, uint32_t[4]
    C %fp - 136: OS register save area. 
    define(<FRAME_SIZE>, 136)
    
    	.file "aes-encrypt-internal.asm"
    
    	C _aes_encrypt(struct aes_context *ctx, 
    	C	       const struct aes_table *T,
    	C	       unsigned length, uint8_t *dst,
    	C	       uint8_t *src)
    
    	.section	".text"
    	.align 16
    	.proc	020
    	
    PROLOGUE(_nettle_aes_encrypt)
    
    	save	%sp, -FRAME_SIZE, %sp
    	cmp	LENGTH, 0
    	be	.Lend
    
    	C	Loop invariants
    	add	T, AES_TABLE0, T0
    	add	T, AES_TABLE1, T1
    	add	T, AES_TABLE2, T2
    	add	T, AES_TABLE3, T3
    
    .Lblock_loop:
    	C  Read src, and add initial subkey
    	add	CTX, AES_KEYS, KEY
    	AES_LOAD(0, SRC, KEY, W0)
    	AES_LOAD(1, SRC, KEY, W1)
    	AES_LOAD(2, SRC, KEY, W2)
    	AES_LOAD(3, SRC, KEY, W3)
    
    	C	Must be even, and includes the final round
    	ld	[AES_NROUNDS + CTX], ROUND
    	add	SRC, 16, SRC
    	add	KEY, 16, KEY
    
    	srl	ROUND, 1, ROUND
    	C	Last two rounds handled specially
    	sub	ROUND, 1, ROUND
    .Lround_loop:
    	C The AES_ROUND macro uses T0,... T3
    	C	Transform W -> X
    	AES_ROUND(0, W0, W1, W2, W3, KEY, X0)
    	AES_ROUND(1, W1, W2, W3, W0, KEY, X1)
    	AES_ROUND(2, W2, W3, W0, W1, KEY, X2)
    	AES_ROUND(3, W3, W0, W1, W2, KEY, X3)
    
    	C	Transform X -> W
    	AES_ROUND(4, X0, X1, X2, X3, KEY, W0)
    	AES_ROUND(5, X1, X2, X3, X0, KEY, W1)
    	AES_ROUND(6, X2, X3, X0, X1, KEY, W2)
    	AES_ROUND(7, X3, X0, X1, X2, KEY, W3)
    
    	subcc	ROUND, 1, ROUND
    	bne	.Lround_loop
    	add	KEY, 32, KEY
    
    	C	Penultimate round
    	AES_ROUND(0, W0, W1, W2, W3, KEY, X0)
    	AES_ROUND(1, W1, W2, W3, W0, KEY, X1)
    	AES_ROUND(2, W2, W3, W0, W1, KEY, X2)
    	AES_ROUND(3, W3, W0, W1, W2, KEY, X3)
    
    	add	KEY, 16, KEY
    	C	Final round
    	AES_FINAL_ROUND(0, T, X0, X1, X2, X3, KEY, DST)
    	AES_FINAL_ROUND(1, T, X1, X2, X3, X0, KEY, DST)
    	AES_FINAL_ROUND(2, T, X2, X3, X0, X1, KEY, DST)
    	AES_FINAL_ROUND(3, T, X3, X0, X1, X2, KEY, DST)
    
    	subcc	LENGTH, 16, LENGTH
    	bne	.Lblock_loop
    	add	DST, 16, DST
    
    .Lend:
    	ret
    	restore
    EPILOGUE(_nettle_aes_encrypt)
    
    C Some stats from adriana.lysator.liu.se (SS1000$, 85 MHz), for AES 128
    
    C 1:	nettle-1.13 C-code
    C 2:	nettle-1.13 assembler
    C 3:	New C-code
    C 4:	New assembler, first correct version
    C 5:	New assembler, with basic scheduling of AES_ROUND.
    C 6:	New assembpler, with loop invariants T0-T3.
    C 7:	New assembler, with basic scheduling also of AES_FINAL_ROUND.
    	
    C	MB/s	cycles/block	Code size (bytes)
    C 1	1.2	1107		592
    C 2	2.3	572		1032
    C 3	2.1	627
    C 4	1.8	722
    C 5	2.6	496
    C 6	3.0	437
    C 7	3.1	415		1448