From 5d6be1bc102de591c56e673853de68eedf9df683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Mon, 20 Jan 2014 14:14:40 +0100 Subject: [PATCH] Move block buffer from poly1305_ctx to poly1305_aes_ctx. Simplify poly1305_digest. --- ChangeLog | 26 ++++++++ Makefile.in | 2 +- asm.m4 | 2 - poly1305-aes.c | 31 ++++++++-- poly1305-internal.c | 35 ++++------- poly1305.c | 37 ----------- poly1305.h | 30 ++++----- x86_64/poly1305-internal.asm | 115 +++++++++-------------------------- 8 files changed, 111 insertions(+), 167 deletions(-) delete mode 100644 poly1305.c diff --git a/ChangeLog b/ChangeLog index 7de86fe4..57fff080 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +2014-01-20 Niels Möller <nisse@lysator.liu.se> + + * x86_64/poly1305-internal.asm: Update to new interface. + poly1305_digest much simplified. + + * poly1305.h (struct poly1305_ctx): Moved block and index + fields... + (struct poly1305_aes_ctx): ... to here. + * asm.m4: Delete also from the assembly definition of struct + poly1305_ctx. + + * poly1305-internal.c (poly1305_digest): Don't do final padding + here, leave that to caller. Add digest to the provided nonce s, + and deleted length and dst arguments. Also reset h0-h4 to zero + when done. + (_poly1305_block): Renamed, from... + (poly1305_block): ...old name. + + * poly1305-aes.c (poly1305_aes_update): New function. + (poly1305_aes_digest): Update for poly1305_digest changes, do + final padding here. + + * poly1305.c (poly1305_update): Deleted file and function. Moved + to poly1305-aes.c. + * Makefile.in (nettle_SOURCES): Deleted poly1305.c. + 2014-01-17 Niels Möller <nisse@lysator.liu.se> * poly1305-internal.c (poly1305_block): Additional argument with diff --git a/Makefile.in b/Makefile.in index d6cd848c..c77326b2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -103,7 +103,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c \ serpent-set-key.c serpent-encrypt.c serpent-decrypt.c \ serpent-meta.c \ twofish.c twofish-meta.c \ - poly1305-aes.c poly1305.c poly1305-internal.c \ + poly1305-aes.c poly1305-internal.c \ umac-nh.c umac-nh-n.c umac-l2.c umac-l3.c \ umac-poly64.c umac-poly128.c umac-set-key.c \ umac32.c umac64.c umac96.c umac128.c \ diff --git a/asm.m4 b/asm.m4 index a6ea52cc..55da2bfb 100644 --- a/asm.m4 +++ b/asm.m4 @@ -85,7 +85,5 @@ STRUCTURE(P1305) STRUCT(H2, 4) STRUCT(H0, 8) STRUCT(H1, 8) - STRUCT(BLOCK, 16) - STRUCT(INDEX, 4) divert diff --git a/poly1305-aes.c b/poly1305-aes.c index 8a7d9d13..e4a6f748 100644 --- a/poly1305-aes.c +++ b/poly1305-aes.c @@ -23,6 +23,7 @@ #include "config.h" #endif +#include <assert.h> #include <string.h> #include "poly1305.h" @@ -33,7 +34,7 @@ poly1305_aes_set_key (struct poly1305_aes_ctx *ctx, const uint8_t * key) { aes128_set_encrypt_key(&ctx->aes, (key)); poly1305_set_key(&ctx->pctx, (key+16)); - ctx->pctx.index = 0; + ctx->index = 0; } void @@ -43,13 +44,35 @@ poly1305_aes_set_nonce (struct poly1305_aes_ctx *ctx, memcpy (ctx->nonce, nonce, POLY1305_AES_NONCE_SIZE); } +#define COMPRESS(ctx, data) _poly1305_block(&(ctx)->pctx, (data), 1) + +void +poly1305_aes_update (struct poly1305_aes_ctx *ctx, size_t length, const uint8_t *data) +{ + MD_UPDATE (ctx, length, data, COMPRESS, (void) 0); +} + void poly1305_aes_digest (struct poly1305_aes_ctx *ctx, - size_t length, uint8_t * digest) + size_t length, uint8_t *digest) { uint8_t s[POLY1305_BLOCK_SIZE]; + /* final bytes */ + if (ctx->index > 0) + { + assert (ctx->index < POLY1305_BLOCK_SIZE); + + ctx->block[ctx->index] = 1; + memset (ctx->block + ctx->index + 1, + 0, POLY1305_BLOCK_SIZE - 1 - ctx->index); + + _poly1305_block (&ctx->pctx, ctx->block, 0); + } aes128_encrypt(&ctx->aes, POLY1305_BLOCK_SIZE, s, ctx->nonce); - poly1305_digest (&ctx->pctx, length, digest, s); + + poly1305_digest (&ctx->pctx, s); + memcpy (digest, s, length); + INCREMENT (16, ctx->nonce); - ctx->pctx.index = 0; + ctx->index = 0; } diff --git a/poly1305-internal.c b/poly1305-internal.c index 62c6976c..b33a3c9d 100644 --- a/poly1305-internal.c +++ b/poly1305-internal.c @@ -86,7 +86,7 @@ poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16]) } void -poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned t4) +_poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned t4) { uint32_t t0,t1,t2,t3; uint32_t b; @@ -119,28 +119,13 @@ poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned t4) ctx->h0 += b * 5; } +/* Adds digest to the nonce */ void -poly1305_digest (struct poly1305_ctx *ctx, - size_t length, uint8_t *digest, - const uint8_t *s) +poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s) { uint32_t b, nb; uint64_t f0,f1,f2,f3; uint32_t g0,g1,g2,g3,g4; - uint8_t td[16]; - - /* final bytes */ - /* poly1305_donna_atmost15bytes: */ - if (ctx->index > 0) - { - assert (ctx->index < POLY1305_BLOCK_SIZE); - - ctx->block[ctx->index] = 1; - memset (ctx->block + ctx->index + 1, - 0, POLY1305_BLOCK_SIZE - 1 - ctx->index); - - poly1305_block (ctx, ctx->block, 0); - } b = ctx->h0 >> 26; ctx->h0 = ctx->h0 & 0x3ffffff; ctx->h1 += b; b = ctx->h1 >> 26; ctx->h1 = ctx->h1 & 0x3ffffff; @@ -169,13 +154,17 @@ poly1305_digest (struct poly1305_ctx *ctx, f2 = ((ctx->h2 >> 12) | (ctx->h3 << 14)) + (uint64_t)LE_READ_UINT32(s+8); f3 = ((ctx->h3 >> 18) | (ctx->h4 << 8)) + (uint64_t)LE_READ_UINT32(s+12); - LE_WRITE_UINT32(td, f0); + LE_WRITE_UINT32(s, f0); f1 += (f0 >> 32); - LE_WRITE_UINT32(&td[4], f1); + LE_WRITE_UINT32(s+4, f1); f2 += (f1 >> 32); - LE_WRITE_UINT32(&td[8], f2); + LE_WRITE_UINT32(s+8, f2); f3 += (f2 >> 32); - LE_WRITE_UINT32(&td[12], f3); + LE_WRITE_UINT32(s+12, f3); - memcpy(digest, td, length); + ctx->h0 = 0; + ctx->h1 = 0; + ctx->h2 = 0; + ctx->h3 = 0; + ctx->h4 = 0; } diff --git a/poly1305.c b/poly1305.c deleted file mode 100644 index 20f669f5..00000000 --- a/poly1305.c +++ /dev/null @@ -1,37 +0,0 @@ -/* nettle, low-level cryptographics library - * - * Copyright (C) 2013 Nikos Mavrogiannopoulos, Niels Möller - * - * The nettle library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or (at your - * option) any later version. - * - * The nettle library is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - * License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with the nettle library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02111-1301, USA. - */ - -#if HAVE_CONFIG_H -#include "config.h" -#endif - -#include <string.h> - -#include "poly1305.h" - -#include "macros.h" - -#define COMPRESS(ctx, data) poly1305_block((ctx), (data), 1) - -void -poly1305_update (struct poly1305_ctx *ctx, size_t length, const uint8_t *data) -{ - MD_UPDATE (ctx, length, data, COMPRESS, (void) 0); -} diff --git a/poly1305.h b/poly1305.h index 3517d9c3..359c8e32 100644 --- a/poly1305.h +++ b/poly1305.h @@ -35,12 +35,12 @@ extern "C" { /* Name mangling */ #define poly1305_set_key nettle_poly1305_set_key -#define poly1305_update nettle_poly1305_update -#define poly1305_block nettle_poly1305_block #define poly1305_digest nettle_poly1305_digest +#define _poly1305_block _nettle_poly1305_block #define poly1305_aes_set_key nettle_poly1305_aes_set_key #define poly1305_aes_set_nonce nettle_poly1305_aes_set_nonce +#define poly1305_aes_update nettle_poly1305_aes_update #define poly1305_aes_digest nettle_poly1305_aes_digest /* Low level functions/macros for the poly1305 construction. */ @@ -66,17 +66,15 @@ struct poly1305_ctx { uint32_t h32[4]; uint64_t h64[2]; } h; - - uint8_t block[POLY1305_BLOCK_SIZE]; - unsigned index; }; +/* Low-level internal interface. */ void poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[POLY1305_KEY_SIZE]); -void poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[POLY1305_BLOCK_SIZE], - unsigned high); -void poly1305_update (struct poly1305_ctx *ctx, size_t size, const uint8_t *data); -void poly1305_digest (struct poly1305_ctx *ctx, - size_t length, uint8_t *digest, const uint8_t *s); +/* Extracts digest, and adds it to s, the encrypted nonce. */ +void poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s); +/* Internal function. Process one block. */ +void _poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[POLY1305_BLOCK_SIZE], + unsigned high); /* poly1305-aes */ @@ -86,8 +84,11 @@ void poly1305_digest (struct poly1305_ctx *ctx, struct poly1305_aes_ctx { - /* Must be first element, for the poly1305_aes_update cast to work. */ + /* Keep aes context last, to make it possible to use a general + poly1305_update if other variants are added. */ struct poly1305_ctx pctx; + uint8_t block[POLY1305_BLOCK_SIZE]; + unsigned index; uint8_t nonce[POLY1305_BLOCK_SIZE]; struct aes128_ctx aes; }; @@ -101,9 +102,10 @@ void poly1305_aes_set_nonce (struct poly1305_aes_ctx *ctx, const uint8_t *nonce); -/* An alias, nothing aes-specific. */ -#define poly1305_aes_update \ - (*(void(*)(struct poly1305_aes_ctx *, size_t, const uint8_t *))&poly1305_update) +/* Update is not aes-specific, but since this is the only implemented + variant, we need no more general poly1305_update. */ +void +poly1305_aes_update (struct poly1305_aes_ctx *ctx, size_t length, const uint8_t *data); /* Also increments the nonce */ void diff --git a/x86_64/poly1305-internal.asm b/x86_64/poly1305-internal.asm index 9b8ae013..453c62b2 100644 --- a/x86_64/poly1305-internal.asm +++ b/x86_64/poly1305-internal.asm @@ -52,7 +52,6 @@ PROLOGUE(nettle_poly1305_set_key) mov %rax, P1305_H0 (CTX) mov %rax, P1305_H1 (CTX) mov XREG(%rax), P1305_H2 (CTX) - mov XREG(%rax), P1305_INDEX (CTX) W64_EXIT(2,0) ret @@ -61,7 +60,7 @@ EPILOGUE(nettle_poly1305_set_key) C 64-bit multiplication mod 2^130 - 5 C -C (x_0 + B x_1 + B^2 x_1) * (r_0 + B r_1) = +C (x_0 + B x_1 + B^2 x_2) * (r_0 + B r_1) = C 1 B B^2 B^3 C x_0 r_0 C x_0 r_1 @@ -73,40 +72,47 @@ C Then r_1 B^2 = r_1/4 (2^130) = 5/4 r_1. C and r_1 B^3 = 5/4 B r_1 C So we get C -C x_0 r_0 + x_1 (5/4 r_1) + B (x_0 r_1 + x_1 r_0 + x_2 5/4 r_1 + B x_2 r_0) +C x_0 r_0 + x_1 (5/4 r_1) + B (x_0 r_1 + x_1 r_0 + x_2 5/4 r_1 + B x_2 r_0) +C 1 B B^2 B^3 +C x_0 r_0 +C x_1 r'_1 +C x_0 r_1 +C x_1 r_0 +C x_2 r'_1 +C x_2 r_0 - C poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned hi) + C _poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned hi) -PROLOGUE(nettle_poly1305_block) +PROLOGUE(_nettle_poly1305_block) + W64_ENTRY(3, 0) mov (%rsi), T0 mov 8(%rsi), T1 mov XREG(%rdx), XREG(T2) -C FIXME: Support windows ABI + C Registers: C Inputs: CTX, T0, T1, T2, C Outputs: H0, H1, H2, stored into the context. -C_NAME(poly1305_block): add P1305_H0 (CTX), T0 adc P1305_H1 (CTX), T1 adc P1305_H2 (CTX), XREG(T2) mov P1305_R0 (CTX), %rax - mul T0 + mul T0 C x0*r0 mov %rax, H0 mov %rdx, H1 mov P1305_S1 (CTX), %rax C 5/4 r1 mov %rax, H2 - mul T1 - imul T2, H2 - imul P1305_R0 (CTX), T2 + mul T1 C x1*r1' + imul T2, H2 C x2*r1' + imul P1305_R0 (CTX), T2 C x2*r0 add %rax, H0 adc %rdx, H1 mov P1305_R0 (CTX), %rax - mul T1 + mul T1 C x1*r0 add %rax, H2 adc %rdx, T2 mov P1305_R1 (CTX), %rax - mul T0 + mul T0 C x0*r1 add %rax, H2 adc %rdx, T2 mov T2, %rax @@ -119,57 +125,17 @@ C_NAME(poly1305_block): mov H0, P1305_H0 (CTX) mov H1, P1305_H1 (CTX) mov XREG(T2), P1305_H2 (CTX) + W64_EXIT(3, 0) ret -EPILOGUE(nettle_poly1305_block) +EPILOGUE(_nettle_poly1305_block) - C poly1305_digest (struct poly1305_ctx *ctx, - C size_t length, uint8_t *digest, - C const uint8_t *s) + C poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s) C Registers: C %rdi: ctx - C %rsi: length - C %rdx: digest - C %rcx: s + C %rsi: s PROLOGUE(nettle_poly1305_digest) - W64_ENTRY(4, 0) - mov P1305_INDEX (CTX), XREG(%rax) - push %rsi - push %rdx - push %rcx - test XREG(%rax), XREG(%rax) - jz .Lfinal - - C Pad with a 1 byte. - C FIXME: Or in, without storing in memory. - inc XREG(%rax) C Also clears high half - movb $1, P1305_BLOCK-1 (CTX, %rax) - - mov XREG(%rax), XREG(%rcx) - mov $1, T1 - and $7, XREG(%rcx) - shl $3, XREG(%rcx) - shl LREG(%rcx), T1 - dec T1 - mov P1305_BLOCK (CTX), T0 - xor T2, T2 - cmp $8, XREG(%rax) - jc .Lfinal_lt8 - C If %rax == 16, we get T1 == 0, - C tweak so we get need T1 = -1 instead. - cmp $16, XREG(%rax) - adc $-1, T1 - and P1305_BLOCK+8 (CTX), T1 - jmp .Lfinal_block - -.Lfinal_lt8: - and T1, T0 - xor T1, T1 -.Lfinal_block: - - call poly1305_block - -.Lfinal: + W64_ENTRY(2, 0) mov P1305_H0 (CTX), H0 mov P1305_H1 (CTX), H1 @@ -182,6 +148,8 @@ PROLOGUE(nettle_poly1305_digest) adc $0, H1 adc $0, XREG(H2) +C Use %rax instead of %rsi +define(<T1>, <%rax>) C Add 5, use result if >= 2^130 mov $5, T0 xor T1, T1 @@ -192,38 +160,13 @@ PROLOGUE(nettle_poly1305_digest) cmovnc T0, H0 cmovnc T1, H1 - pop %rcx - pop %rdx - pop %rsi - - add (%rcx), H0 - adc 8(%rcx), H1 + add H0, (%rsi) + adc H1, 8(%rsi) - C Store, taking length into account - cmp $8, %rsi - jc .Ldigest_lt8 - mov H0, (%rdx) - jz .Ldigest_done - cmp $16, %rsi - jc .Ldigest_lt16 - mov H1, 8(%rdx) - jmp .Ldigest_done -.Ldigest_lt16: - mov H1, H0 - add $8, %rdx - sub $8, %rsi -.Ldigest_lt8: - movb LREG(H0), (%rdx) - shr $8, H0 - inc %rdx - dec %rsi - jnz .Ldigest_lt8 -.Ldigest_done: xor XREG(%rax), XREG(%rax) mov %rax, P1305_H0 (CTX) mov %rax, P1305_H1 (CTX) mov XREG(%rax), P1305_H2 (CTX) - mov XREG(%rax), P1305_INDEX (CTX) - W64_EXIT(4, 0) + W64_EXIT(2, 0) ret -- GitLab