From 081483815557570dee6009ae46d66ed43a42c407 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Tue, 16 Apr 2013 15:38:15 +0200 Subject: [PATCH] Switch to non-logarithmic ALIGN macro. --- ChangeLog | 4 ++++ asm.m4 | 17 +++++++++-------- x86/aes-decrypt-internal.asm | 5 ++--- x86/aes-encrypt-internal.asm | 5 ++--- x86/arcfour-crypt.asm | 4 ++-- x86/camellia-crypt-internal.asm | 3 +-- x86/md5-compress.asm | 2 +- x86/sha1-compress.asm | 2 +- x86_64/aes-decrypt-internal.asm | 4 ++-- x86_64/aes-encrypt-internal.asm | 4 ++-- x86_64/camellia-crypt-internal.asm | 3 +-- x86_64/ecc-192-modp.asm | 2 +- x86_64/memxor.asm | 11 +++++------ x86_64/salsa20-core-internal.asm | 4 ++-- x86_64/salsa20-crypt.asm | 4 ++-- x86_64/serpent-decrypt.asm | 6 +++--- x86_64/serpent-encrypt.asm | 6 +++--- x86_64/sha1-compress.asm | 2 +- x86_64/sha256-compress.asm | 4 ++-- x86_64/sha3-permute.asm | 6 +++--- x86_64/sha512-compress.asm | 4 ++-- x86_64/umac-nh-n.asm | 2 +- x86_64/umac-nh.asm | 2 +- 23 files changed, 53 insertions(+), 53 deletions(-) diff --git a/ChangeLog b/ChangeLog index 20726f1a..3ee05f6f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2013-04-16 Niels Möller <nisse@lysator.liu.se> + * asm.m4 (m4_log2): New macro, similar to the one in gmp. + (ALIGN): Changed to take alignment in bytes. Updated all callers, + currently used only in x86 and x86_64 files. + * umac.h (umac32_ctx, umac64_ctx, umac96_ctx, umac128_ctx): Make block count an uint64_t. Reorder some elements to put short values together. diff --git a/asm.m4 b/asm.m4 index f24442da..200b1361 100644 --- a/asm.m4 +++ b/asm.m4 @@ -26,16 +26,17 @@ define(<EPILOGUE>, <ifelse(ELF_STYLE,yes, <.size C_NAME($1), . - C_NAME($1)>,<>)>) -dnl Argument to ALIGN is always logarithmic -dnl FIXME: the << operator is not supported by Solaris m4, -dnl and ** is not supported by OpenBSD m4. -dnl We should switch to non-logarithmic ALIGN instead. +define(<m4_log2>, <m4_log2_internal($1,1,0)>) +define(<m4_log2_internal>, +<ifelse($3, 10, <not-a-power-of-two>, +$1, $2, $3, +<m4_log2_internal($1, eval(2*$2), eval(1 + $3))>)>) + +dnl Argument to ALIGN is always in bytes, and converted to a +dnl logarithmic .align if necessary. -dnl Need changequote to be able to use the << operator. define(<ALIGN>, -<changequote([,])dnl -.align ifelse(ALIGN_LOG,yes,$1,eval(1 << $1))dnl >> balance -changequote(<,>)dnl +<.align ifelse(ALIGN_LOG,yes,<m4_log2($1)>,$1) >) dnl Struct defining macros diff --git a/x86/aes-decrypt-internal.asm b/x86/aes-decrypt-internal.asm index c19853ff..64e59283 100644 --- a/x86/aes-decrypt-internal.asm +++ b/x86/aes-decrypt-internal.asm @@ -1,4 +1,3 @@ -C -*- mode: asm; asm-comment-char: ?C; -*- C nettle, low-level cryptographics library C C Copyright (C) 2001, 2002, 2005 Rafael R. Sevilla, Niels Möller @@ -61,7 +60,7 @@ C %edi is a temporary, often used as an accumulator. C unsigned length, uint8_t *dst, C uint8_t *src) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_aes_decrypt) C save all registers that need to be saved pushl %ebx C 20(%esp) @@ -94,7 +93,7 @@ PROLOGUE(_nettle_aes_decrypt) addl $16,KEY C point to next key movl KEY,FRAME_KEY - ALIGN(4) + ALIGN(16) .Lround_loop: AES_ROUND(T, SA,SD,SC,SB, TMP, KEY) movl TMP, TA diff --git a/x86/aes-encrypt-internal.asm b/x86/aes-encrypt-internal.asm index fc7d2c40..9fe32fc5 100644 --- a/x86/aes-encrypt-internal.asm +++ b/x86/aes-encrypt-internal.asm @@ -1,4 +1,3 @@ -C -*- mode: asm; asm-comment-char: ?C; -*- C nettle, low-level cryptographics library C C Copyright (C) 2001, 2002, 2005 Rafael R. Sevilla, Niels Möller @@ -61,7 +60,7 @@ C %edi is a temporary, often used as an accumulator. C unsigned length, uint8_t *dst, C uint8_t *src) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_aes_encrypt) C save all registers that need to be saved pushl %ebx C 20(%esp) @@ -94,7 +93,7 @@ PROLOGUE(_nettle_aes_encrypt) addl $16,KEY C point to next key movl KEY,FRAME_KEY - ALIGN(4) + ALIGN(16) .Lround_loop: AES_ROUND(T, SA,SB,SC,SD, TMP, KEY) movl TMP, TA diff --git a/x86/arcfour-crypt.asm b/x86/arcfour-crypt.asm index 842ae4a5..89ee7c9e 100644 --- a/x86/arcfour-crypt.asm +++ b/x86/arcfour-crypt.asm @@ -23,7 +23,7 @@ C MA 02111-1301, USA. C unsigned length, uint8_t *dst, C const uint8_t *src) .text - ALIGN(4) + ALIGN(16) PROLOGUE(nettle_arcfour_crypt) C save all registers that need to be saved pushl %ebx C 12(%esp) @@ -63,7 +63,7 @@ C Register usage: sarl $1, %edx jc .Lloop_odd - ALIGN(4) + ALIGN(16) .Lloop: movb (%ebp, %eax), %cl C si. addb %cl, %bl diff --git a/x86/camellia-crypt-internal.asm b/x86/camellia-crypt-internal.asm index e8d892d3..7766220e 100644 --- a/x86/camellia-crypt-internal.asm +++ b/x86/camellia-crypt-internal.asm @@ -1,4 +1,3 @@ -C -*- mode: asm; asm-comment-char: ?C; -*- C nettle, low-level cryptographics library C C Copyright (C) 2010, Niels Möller @@ -142,7 +141,7 @@ define(<FLINV>, < C unsigned length, uint8_t *dst, C uint8_t *src) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_camellia_crypt) C save all registers that need to be saved pushl %ebx C 32(%esp) diff --git a/x86/md5-compress.asm b/x86/md5-compress.asm index 1bdada79..ac0cd900 100644 --- a/x86/md5-compress.asm +++ b/x86/md5-compress.asm @@ -68,7 +68,7 @@ define(<ROUND>,< C _nettle_md5_compress(uint32_t *state, uint8_t *data) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_md5_compress) C save all registers that need to be saved diff --git a/x86/sha1-compress.asm b/x86/sha1-compress.asm index afb8d8c5..777615dc 100644 --- a/x86/sha1-compress.asm +++ b/x86/sha1-compress.asm @@ -160,7 +160,7 @@ PROLOGUE(_nettle_sha1_compress) C Loop-mixed to 520 cycles (for the complete function call) on C AMD K7. -ALIGN(5) +ALIGN(32) mov 88(%esp), T2 mov OFFSET(2)(T2), %ecx mov OFFSET(0)(T2), %eax diff --git a/x86_64/aes-decrypt-internal.asm b/x86_64/aes-decrypt-internal.asm index de97de32..0d4f2f92 100644 --- a/x86_64/aes-decrypt-internal.asm +++ b/x86_64/aes-decrypt-internal.asm @@ -53,7 +53,7 @@ define(<TMP>,<%rbp>) C unsigned length, uint8_t *dst, C uint8_t *src) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_aes_decrypt) W64_ENTRY(5, 0) test PARAM_LENGTH, PARAM_LENGTH @@ -81,7 +81,7 @@ PROLOGUE(_nettle_aes_decrypt) subl $1, COUNT add $16,KEY C point to next key - ALIGN(4) + ALIGN(16) .Lround_loop: AES_ROUND(TABLE, SA,SD,SC,SB, TA, TMP) AES_ROUND(TABLE, SB,SA,SD,SC, TB, TMP) diff --git a/x86_64/aes-encrypt-internal.asm b/x86_64/aes-encrypt-internal.asm index fbfcdb66..4ae0ec85 100644 --- a/x86_64/aes-encrypt-internal.asm +++ b/x86_64/aes-encrypt-internal.asm @@ -53,7 +53,7 @@ define(<TMP>,<%rbp>) C unsigned length, uint8_t *dst, C uint8_t *src) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_aes_encrypt) W64_ENTRY(5, 0) test PARAM_LENGTH, PARAM_LENGTH @@ -81,7 +81,7 @@ PROLOGUE(_nettle_aes_encrypt) subl $1, COUNT add $16,KEY C point to next key - ALIGN(4) + ALIGN(16) .Lround_loop: AES_ROUND(TABLE, SA,SB,SC,SD, TA, TMP) AES_ROUND(TABLE, SB,SC,SD,SA, TB, TMP) diff --git a/x86_64/camellia-crypt-internal.asm b/x86_64/camellia-crypt-internal.asm index 8fc2fc1b..e44a3dec 100644 --- a/x86_64/camellia-crypt-internal.asm +++ b/x86_64/camellia-crypt-internal.asm @@ -1,4 +1,3 @@ -C -*- mode: asm; asm-comment-char: ?C; -*- C nettle, low-level cryptographics library C C Copyright (C) 2010, Niels Möller @@ -122,7 +121,7 @@ C xorl XREG(TMP), XREG($1) C unsigned length, uint8_t *dst, C uint8_t *src) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_camellia_crypt) W64_ENTRY(5, 0) diff --git a/x86_64/ecc-192-modp.asm b/x86_64/ecc-192-modp.asm index 5812070b..f3fe4958 100644 --- a/x86_64/ecc-192-modp.asm +++ b/x86_64/ecc-192-modp.asm @@ -30,7 +30,7 @@ define(<C2>, <%r11>) C ecc_192_modp (const struct ecc_curve *ecc, mp_limb_t *rp) .text - ALIGN(4) + ALIGN(16) PROLOGUE(nettle_ecc_192_modp) W64_ENTRY(2, 0) mov 16(RP), T2 diff --git a/x86_64/memxor.asm b/x86_64/memxor.asm index c2ccc920..b22a4721 100644 --- a/x86_64/memxor.asm +++ b/x86_64/memxor.asm @@ -1,4 +1,3 @@ -C -*- mode: asm; asm-comment-char: ?C; -*- C nettle, low-level cryptographics library C C Copyright (C) 2010, Niels Möller @@ -37,7 +36,7 @@ define(<USE_SSE2>, <no>) C memxor(uint8_t *dst, const uint8_t *src, size_t n) C %rdi %rsi %rdx - ALIGN(4) + ALIGN(16) PROLOGUE(memxor) W64_ENTRY(3, 0) @@ -48,7 +47,7 @@ EPILOGUE(memxor) C memxor3(uint8_t *dst, const uint8_t *a, const uint8_t *b, size_t n) C %rdi %rsi %rdx %rcx - ALIGN(4) + ALIGN(16) PROLOGUE(memxor3) W64_ENTRY(4, 0) @@ -124,7 +123,7 @@ ifelse(USE_SSE2, yes, < jz .Ldone jmp .Lshift_next - ALIGN(4) + ALIGN(16) .Lshift_loop: mov 8(AP, N), S0 @@ -177,7 +176,7 @@ C jz .Ldone jmp .Lword_next - ALIGN(4) + ALIGN(16) .Lword_loop: mov 8(AP, N), TMP @@ -234,7 +233,7 @@ ifelse(USE_SSE2, yes, < mov TMP, (DST, N) jmp .Lsse2_next - ALIGN(4) + ALIGN(16) .Lsse2_loop: movdqu (AP, N), %xmm0 movdqu (BP, N), %xmm1 diff --git a/x86_64/salsa20-core-internal.asm b/x86_64/salsa20-core-internal.asm index 81ca2cc8..0e0cdf6a 100644 --- a/x86_64/salsa20-core-internal.asm +++ b/x86_64/salsa20-core-internal.asm @@ -34,7 +34,7 @@ include_src(<x86_64/salsa20.m4>) C _salsa20_core(uint32_t *dst, const uint32_t *src, unsigned rounds) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_salsa20_core) W64_ENTRY(3, 9) @@ -58,7 +58,7 @@ PROLOGUE(_nettle_salsa20_core) shrl $1, XREG(COUNT) - ALIGN(4) + ALIGN(16) .Loop: QROUND(X0, X1, X2, X3) pshufd $0x93, X1, X1 C 11 00 01 10 (least sign. left) diff --git a/x86_64/salsa20-crypt.asm b/x86_64/salsa20-crypt.asm index 5d119804..25b7e497 100644 --- a/x86_64/salsa20-crypt.asm +++ b/x86_64/salsa20-crypt.asm @@ -50,7 +50,7 @@ C registers. C salsa20_crypt(struct salsa20_ctx *ctx, unsigned length, C uint8_t *dst, const uint8_t *src) .text - ALIGN(4) + ALIGN(16) PROLOGUE(nettle_salsa20_crypt) W64_ENTRY(4, 9) @@ -92,7 +92,7 @@ PROLOGUE(nettle_salsa20_crypt) SWAP(X0, X2, M0011) movl $10, XREG(COUNT) - ALIGN(4) + ALIGN(16) .Loop: QROUND(X0, X1, X2, X3) C For the row operations, we first rotate the rows, to get diff --git a/x86_64/serpent-decrypt.asm b/x86_64/serpent-decrypt.asm index 02a857ce..d6bacb5d 100644 --- a/x86_64/serpent-decrypt.asm +++ b/x86_64/serpent-decrypt.asm @@ -522,7 +522,7 @@ define(<WLTI>, < C unsigned length, uint8_t *dst, C const uint8_t *src) .text - ALIGN(4) + ALIGN(16) PROLOGUE(nettle_serpent_decrypt) C save all registers that need to be saved W64_ENTRY(4, 13) @@ -557,7 +557,7 @@ PROLOGUE(nettle_serpent_decrypt) jmp .Lwround_start - ALIGN(4) + ALIGN(16) .Lwround_loop: WLTI(X0,X1,X2,X3) @@ -624,7 +624,7 @@ PROLOGUE(nettle_serpent_decrypt) mov $384, CNT jmp .Lround_start - ALIGN(4) + ALIGN(16) .Lround_loop: LTI(x0,x1,x2,x3) .Lround_start: diff --git a/x86_64/serpent-encrypt.asm b/x86_64/serpent-encrypt.asm index 5362bad1..613ef41e 100644 --- a/x86_64/serpent-encrypt.asm +++ b/x86_64/serpent-encrypt.asm @@ -549,7 +549,7 @@ define(<WLT>, < C unsigned length, uint8_t *dst, C const uint8_t *src) .text - ALIGN(4) + ALIGN(16) PROLOGUE(nettle_serpent_encrypt) C save all registers that need to be saved W64_ENTRY(4, 13) @@ -583,7 +583,7 @@ PROLOGUE(nettle_serpent_encrypt) mov $-512, CNT jmp .Lwround_start - ALIGN(4) + ALIGN(16) .Lwround_loop: WLT(X0,X1,X2,X3) .Lwround_start: @@ -653,7 +653,7 @@ C parallell. mov $-512, CNT jmp .Lround_start - ALIGN(4) + ALIGN(16) .Lround_loop: LT(x0,x1,x2,x3) .Lround_start: diff --git a/x86_64/sha1-compress.asm b/x86_64/sha1-compress.asm index ffa28d0b..5155683c 100644 --- a/x86_64/sha1-compress.asm +++ b/x86_64/sha1-compress.asm @@ -123,7 +123,7 @@ C adding, and then rotating back. C _nettle_sha1_compress(uint32_t *state, uint8_t *input) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_sha1_compress) C save all registers that need to be saved W64_ENTRY(2, 0) diff --git a/x86_64/sha256-compress.asm b/x86_64/sha256-compress.asm index 59f922e7..6bfb7a78 100644 --- a/x86_64/sha256-compress.asm +++ b/x86_64/sha256-compress.asm @@ -114,7 +114,7 @@ define(<NOEXPN>, < C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_sha256_compress) W64_ENTRY(3, 0) @@ -137,7 +137,7 @@ PROLOGUE(_nettle_sha256_compress) movl 24(STATE), SG movl 28(STATE), SH xor COUNT, COUNT - ALIGN(4) + ALIGN(16) .Loop1: NOEXPN(0) ROUND(SA,SB,SC,SD,SE,SF,SG,SH,0) diff --git a/x86_64/sha3-permute.asm b/x86_64/sha3-permute.asm index 64c4cfc4..7f9a6b79 100644 --- a/x86_64/sha3-permute.asm +++ b/x86_64/sha3-permute.asm @@ -94,7 +94,7 @@ define(<ROTL64>, < C sha3_permute(struct sha3_state *ctx) .text - ALIGN(4) + ALIGN(16) PROLOGUE(nettle_sha3_permute) W64_ENTRY(1, 16) push %rbp @@ -139,7 +139,7 @@ PROLOGUE(nettle_sha3_permute) pxor A2122, C12 pxor A2324, C34 - ALIGN(4) + ALIGN(16) .Loop: C The theta step. Combine parity bits, then xor to state. C D0 = C4 ^ (C1 <<< 1) @@ -483,7 +483,7 @@ PROLOGUE(nettle_sha3_permute) EPILOGUE(nettle_sha3_permute) -ALIGN(4) +ALIGN(16) .rc: C In reverse order .quad 0x8000000080008008 .quad 0x0000000080000001 diff --git a/x86_64/sha512-compress.asm b/x86_64/sha512-compress.asm index d54ebda3..21df82a2 100644 --- a/x86_64/sha512-compress.asm +++ b/x86_64/sha512-compress.asm @@ -114,7 +114,7 @@ define(<NOEXPN>, < C _nettle_sha512_compress(uint64_t *state, const uint8_t *input, const uint64_t *k) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_sha512_compress) W64_ENTRY(3, 0) @@ -137,7 +137,7 @@ PROLOGUE(_nettle_sha512_compress) mov 48(STATE), SG mov 56(STATE), SH xor COUNT, COUNT - ALIGN(4) + ALIGN(16) .Loop1: NOEXPN(0) ROUND(SA,SB,SC,SD,SE,SF,SG,SH,0) diff --git a/x86_64/umac-nh-n.asm b/x86_64/umac-nh-n.asm index 06e74269..bcb99487 100644 --- a/x86_64/umac-nh-n.asm +++ b/x86_64/umac-nh-n.asm @@ -49,7 +49,7 @@ C aligned. C umac_nh_n(uint64_t *out, unsigned n, const uint32_t *key, C unsigned length, const uint8_t *msg) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_umac_nh_n) W64_ENTRY(5, 14) pxor XY0, XY0 diff --git a/x86_64/umac-nh.asm b/x86_64/umac-nh.asm index f9230cd8..8e88df6a 100644 --- a/x86_64/umac-nh.asm +++ b/x86_64/umac-nh.asm @@ -36,7 +36,7 @@ C aligned. C umac_nh(const uint32_t *key, unsigned length, const uint8_t *msg) .text - ALIGN(4) + ALIGN(16) PROLOGUE(_nettle_umac_nh) W64_ENTRY(3, 7) pxor XY, XY -- GitLab