Commit 08148381 authored by Niels Möller's avatar Niels Möller

Switch to non-logarithmic ALIGN macro.

parent e1646357
2013-04-16 Niels Möller <nisse@lysator.liu.se>
* asm.m4 (m4_log2): New macro, similar to the one in gmp.
(ALIGN): Changed to take alignment in bytes. Updated all callers,
currently used only in x86 and x86_64 files.
* umac.h (umac32_ctx, umac64_ctx, umac96_ctx, umac128_ctx): Make
block count an uint64_t. Reorder some elements to put short values
together.
......
......@@ -26,16 +26,17 @@ define(<EPILOGUE>,
<ifelse(ELF_STYLE,yes,
<.size C_NAME($1), . - C_NAME($1)>,<>)>)
dnl Argument to ALIGN is always logarithmic
dnl FIXME: the << operator is not supported by Solaris m4,
dnl and ** is not supported by OpenBSD m4.
dnl We should switch to non-logarithmic ALIGN instead.
define(<m4_log2>, <m4_log2_internal($1,1,0)>)
define(<m4_log2_internal>,
<ifelse($3, 10, <not-a-power-of-two>,
$1, $2, $3,
<m4_log2_internal($1, eval(2*$2), eval(1 + $3))>)>)
dnl Argument to ALIGN is always in bytes, and converted to a
dnl logarithmic .align if necessary.
dnl Need changequote to be able to use the << operator.
define(<ALIGN>,
<changequote([,])dnl
.align ifelse(ALIGN_LOG,yes,$1,eval(1 << $1))dnl >> balance
changequote(<,>)dnl
<.align ifelse(ALIGN_LOG,yes,<m4_log2($1)>,$1)
>)
dnl Struct defining macros
......
C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2001, 2002, 2005 Rafael R. Sevilla, Niels Möller
......@@ -61,7 +60,7 @@ C %edi is a temporary, often used as an accumulator.
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_aes_decrypt)
C save all registers that need to be saved
pushl %ebx C 20(%esp)
......@@ -94,7 +93,7 @@ PROLOGUE(_nettle_aes_decrypt)
addl $16,KEY C point to next key
movl KEY,FRAME_KEY
ALIGN(4)
ALIGN(16)
.Lround_loop:
AES_ROUND(T, SA,SD,SC,SB, TMP, KEY)
movl TMP, TA
......
C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2001, 2002, 2005 Rafael R. Sevilla, Niels Möller
......@@ -61,7 +60,7 @@ C %edi is a temporary, often used as an accumulator.
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_aes_encrypt)
C save all registers that need to be saved
pushl %ebx C 20(%esp)
......@@ -94,7 +93,7 @@ PROLOGUE(_nettle_aes_encrypt)
addl $16,KEY C point to next key
movl KEY,FRAME_KEY
ALIGN(4)
ALIGN(16)
.Lround_loop:
AES_ROUND(T, SA,SB,SC,SD, TMP, KEY)
movl TMP, TA
......
......@@ -23,7 +23,7 @@ C MA 02111-1301, USA.
C unsigned length, uint8_t *dst,
C const uint8_t *src)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(nettle_arcfour_crypt)
C save all registers that need to be saved
pushl %ebx C 12(%esp)
......@@ -63,7 +63,7 @@ C Register usage:
sarl $1, %edx
jc .Lloop_odd
ALIGN(4)
ALIGN(16)
.Lloop:
movb (%ebp, %eax), %cl C si.
addb %cl, %bl
......
C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2010, Niels Möller
......@@ -142,7 +141,7 @@ define(<FLINV>, <
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_camellia_crypt)
C save all registers that need to be saved
pushl %ebx C 32(%esp)
......
......@@ -68,7 +68,7 @@ define(<ROUND>,<
C _nettle_md5_compress(uint32_t *state, uint8_t *data)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_md5_compress)
C save all registers that need to be saved
......
......@@ -160,7 +160,7 @@ PROLOGUE(_nettle_sha1_compress)
C Loop-mixed to 520 cycles (for the complete function call) on
C AMD K7.
ALIGN(5)
ALIGN(32)
mov 88(%esp), T2
mov OFFSET(2)(T2), %ecx
mov OFFSET(0)(T2), %eax
......
......@@ -53,7 +53,7 @@ define(<TMP>,<%rbp>)
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_aes_decrypt)
W64_ENTRY(5, 0)
test PARAM_LENGTH, PARAM_LENGTH
......@@ -81,7 +81,7 @@ PROLOGUE(_nettle_aes_decrypt)
subl $1, COUNT
add $16,KEY C point to next key
ALIGN(4)
ALIGN(16)
.Lround_loop:
AES_ROUND(TABLE, SA,SD,SC,SB, TA, TMP)
AES_ROUND(TABLE, SB,SA,SD,SC, TB, TMP)
......
......@@ -53,7 +53,7 @@ define(<TMP>,<%rbp>)
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_aes_encrypt)
W64_ENTRY(5, 0)
test PARAM_LENGTH, PARAM_LENGTH
......@@ -81,7 +81,7 @@ PROLOGUE(_nettle_aes_encrypt)
subl $1, COUNT
add $16,KEY C point to next key
ALIGN(4)
ALIGN(16)
.Lround_loop:
AES_ROUND(TABLE, SA,SB,SC,SD, TA, TMP)
AES_ROUND(TABLE, SB,SC,SD,SA, TB, TMP)
......
C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2010, Niels Möller
......@@ -122,7 +121,7 @@ C xorl XREG(TMP), XREG($1)
C unsigned length, uint8_t *dst,
C uint8_t *src)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_camellia_crypt)
W64_ENTRY(5, 0)
......
......@@ -30,7 +30,7 @@ define(<C2>, <%r11>)
C ecc_192_modp (const struct ecc_curve *ecc, mp_limb_t *rp)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(nettle_ecc_192_modp)
W64_ENTRY(2, 0)
mov 16(RP), T2
......
C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2010, Niels Möller
......@@ -37,7 +36,7 @@ define(<USE_SSE2>, <no>)
C memxor(uint8_t *dst, const uint8_t *src, size_t n)
C %rdi %rsi %rdx
ALIGN(4)
ALIGN(16)
PROLOGUE(memxor)
W64_ENTRY(3, 0)
......@@ -48,7 +47,7 @@ EPILOGUE(memxor)
C memxor3(uint8_t *dst, const uint8_t *a, const uint8_t *b, size_t n)
C %rdi %rsi %rdx %rcx
ALIGN(4)
ALIGN(16)
PROLOGUE(memxor3)
W64_ENTRY(4, 0)
......@@ -124,7 +123,7 @@ ifelse(USE_SSE2, yes, <
jz .Ldone
jmp .Lshift_next
ALIGN(4)
ALIGN(16)
.Lshift_loop:
mov 8(AP, N), S0
......@@ -177,7 +176,7 @@ C jz .Ldone
jmp .Lword_next
ALIGN(4)
ALIGN(16)
.Lword_loop:
mov 8(AP, N), TMP
......@@ -234,7 +233,7 @@ ifelse(USE_SSE2, yes, <
mov TMP, (DST, N)
jmp .Lsse2_next
ALIGN(4)
ALIGN(16)
.Lsse2_loop:
movdqu (AP, N), %xmm0
movdqu (BP, N), %xmm1
......
......@@ -34,7 +34,7 @@ include_src(<x86_64/salsa20.m4>)
C _salsa20_core(uint32_t *dst, const uint32_t *src, unsigned rounds)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_salsa20_core)
W64_ENTRY(3, 9)
......@@ -58,7 +58,7 @@ PROLOGUE(_nettle_salsa20_core)
shrl $1, XREG(COUNT)
ALIGN(4)
ALIGN(16)
.Loop:
QROUND(X0, X1, X2, X3)
pshufd $0x93, X1, X1 C 11 00 01 10 (least sign. left)
......
......@@ -50,7 +50,7 @@ C registers.
C salsa20_crypt(struct salsa20_ctx *ctx, unsigned length,
C uint8_t *dst, const uint8_t *src)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(nettle_salsa20_crypt)
W64_ENTRY(4, 9)
......@@ -92,7 +92,7 @@ PROLOGUE(nettle_salsa20_crypt)
SWAP(X0, X2, M0011)
movl $10, XREG(COUNT)
ALIGN(4)
ALIGN(16)
.Loop:
QROUND(X0, X1, X2, X3)
C For the row operations, we first rotate the rows, to get
......
......@@ -522,7 +522,7 @@ define(<WLTI>, <
C unsigned length, uint8_t *dst,
C const uint8_t *src)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(nettle_serpent_decrypt)
C save all registers that need to be saved
W64_ENTRY(4, 13)
......@@ -557,7 +557,7 @@ PROLOGUE(nettle_serpent_decrypt)
jmp .Lwround_start
ALIGN(4)
ALIGN(16)
.Lwround_loop:
WLTI(X0,X1,X2,X3)
......@@ -624,7 +624,7 @@ PROLOGUE(nettle_serpent_decrypt)
mov $384, CNT
jmp .Lround_start
ALIGN(4)
ALIGN(16)
.Lround_loop:
LTI(x0,x1,x2,x3)
.Lround_start:
......
......@@ -549,7 +549,7 @@ define(<WLT>, <
C unsigned length, uint8_t *dst,
C const uint8_t *src)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(nettle_serpent_encrypt)
C save all registers that need to be saved
W64_ENTRY(4, 13)
......@@ -583,7 +583,7 @@ PROLOGUE(nettle_serpent_encrypt)
mov $-512, CNT
jmp .Lwround_start
ALIGN(4)
ALIGN(16)
.Lwround_loop:
WLT(X0,X1,X2,X3)
.Lwround_start:
......@@ -653,7 +653,7 @@ C parallell.
mov $-512, CNT
jmp .Lround_start
ALIGN(4)
ALIGN(16)
.Lround_loop:
LT(x0,x1,x2,x3)
.Lround_start:
......
......@@ -123,7 +123,7 @@ C adding, and then rotating back.
C _nettle_sha1_compress(uint32_t *state, uint8_t *input)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_sha1_compress)
C save all registers that need to be saved
W64_ENTRY(2, 0)
......
......@@ -114,7 +114,7 @@ define(<NOEXPN>, <
C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_sha256_compress)
W64_ENTRY(3, 0)
......@@ -137,7 +137,7 @@ PROLOGUE(_nettle_sha256_compress)
movl 24(STATE), SG
movl 28(STATE), SH
xor COUNT, COUNT
ALIGN(4)
ALIGN(16)
.Loop1:
NOEXPN(0) ROUND(SA,SB,SC,SD,SE,SF,SG,SH,0)
......
......@@ -94,7 +94,7 @@ define(<ROTL64>, <
C sha3_permute(struct sha3_state *ctx)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(nettle_sha3_permute)
W64_ENTRY(1, 16)
push %rbp
......@@ -139,7 +139,7 @@ PROLOGUE(nettle_sha3_permute)
pxor A2122, C12
pxor A2324, C34
ALIGN(4)
ALIGN(16)
.Loop:
C The theta step. Combine parity bits, then xor to state.
C D0 = C4 ^ (C1 <<< 1)
......@@ -483,7 +483,7 @@ PROLOGUE(nettle_sha3_permute)
EPILOGUE(nettle_sha3_permute)
ALIGN(4)
ALIGN(16)
.rc: C In reverse order
.quad 0x8000000080008008
.quad 0x0000000080000001
......
......@@ -114,7 +114,7 @@ define(<NOEXPN>, <
C _nettle_sha512_compress(uint64_t *state, const uint8_t *input, const uint64_t *k)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_sha512_compress)
W64_ENTRY(3, 0)
......@@ -137,7 +137,7 @@ PROLOGUE(_nettle_sha512_compress)
mov 48(STATE), SG
mov 56(STATE), SH
xor COUNT, COUNT
ALIGN(4)
ALIGN(16)
.Loop1:
NOEXPN(0) ROUND(SA,SB,SC,SD,SE,SF,SG,SH,0)
......
......@@ -49,7 +49,7 @@ C aligned.
C umac_nh_n(uint64_t *out, unsigned n, const uint32_t *key,
C unsigned length, const uint8_t *msg)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_umac_nh_n)
W64_ENTRY(5, 14)
pxor XY0, XY0
......
......@@ -36,7 +36,7 @@ C aligned.
C umac_nh(const uint32_t *key, unsigned length, const uint8_t *msg)
.text
ALIGN(4)
ALIGN(16)
PROLOGUE(_nettle_umac_nh)
W64_ENTRY(3, 7)
pxor XY, XY
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment