From 5d6be1bc102de591c56e673853de68eedf9df683 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Mon, 20 Jan 2014 14:14:40 +0100
Subject: [PATCH] Move block buffer from poly1305_ctx to poly1305_aes_ctx.
 Simplify poly1305_digest.

---
 ChangeLog                    |  26 ++++++++
 Makefile.in                  |   2 +-
 asm.m4                       |   2 -
 poly1305-aes.c               |  31 ++++++++--
 poly1305-internal.c          |  35 ++++-------
 poly1305.c                   |  37 -----------
 poly1305.h                   |  30 ++++-----
 x86_64/poly1305-internal.asm | 115 +++++++++--------------------------
 8 files changed, 111 insertions(+), 167 deletions(-)
 delete mode 100644 poly1305.c

diff --git a/ChangeLog b/ChangeLog
index 7de86fe4..57fff080 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,29 @@
+2014-01-20  Niels Möller  <nisse@lysator.liu.se>
+
+	* x86_64/poly1305-internal.asm: Update to new interface.
+	poly1305_digest much simplified.
+
+	* poly1305.h (struct poly1305_ctx): Moved block and index
+	fields...
+	(struct poly1305_aes_ctx): ... to here.
+	* asm.m4: Delete also from the assembly definition of struct
+	poly1305_ctx.
+
+	* poly1305-internal.c (poly1305_digest): Don't do final padding
+	here, leave that to caller. Add digest to the provided nonce s,
+	and deleted length and dst arguments. Also reset h0-h4 to zero
+	when done.
+	(_poly1305_block): Renamed, from...
+	(poly1305_block): ...old name.
+
+	* poly1305-aes.c (poly1305_aes_update): New function.
+	(poly1305_aes_digest): Update for poly1305_digest changes, do
+	final padding here.
+
+	* poly1305.c (poly1305_update): Deleted file and function. Moved
+	to poly1305-aes.c.
+	* Makefile.in (nettle_SOURCES): Deleted poly1305.c.
+
 2014-01-17  Niels Möller  <nisse@lysator.liu.se>
 
 	* poly1305-internal.c (poly1305_block): Additional argument with
diff --git a/Makefile.in b/Makefile.in
index d6cd848c..c77326b2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -103,7 +103,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c \
 		 serpent-set-key.c serpent-encrypt.c serpent-decrypt.c \
 		 serpent-meta.c \
 		 twofish.c twofish-meta.c \
-		 poly1305-aes.c poly1305.c poly1305-internal.c \
+		 poly1305-aes.c poly1305-internal.c \
 		 umac-nh.c umac-nh-n.c umac-l2.c umac-l3.c \
 		 umac-poly64.c umac-poly128.c umac-set-key.c \
 		 umac32.c umac64.c umac96.c umac128.c \
diff --git a/asm.m4 b/asm.m4
index a6ea52cc..55da2bfb 100644
--- a/asm.m4
+++ b/asm.m4
@@ -85,7 +85,5 @@ STRUCTURE(P1305)
   STRUCT(H2, 4)
   STRUCT(H0, 8)
   STRUCT(H1, 8)
-  STRUCT(BLOCK, 16)
-  STRUCT(INDEX, 4)
 
 divert
diff --git a/poly1305-aes.c b/poly1305-aes.c
index 8a7d9d13..e4a6f748 100644
--- a/poly1305-aes.c
+++ b/poly1305-aes.c
@@ -23,6 +23,7 @@
 #include "config.h"
 #endif
 
+#include <assert.h>
 #include <string.h>
 
 #include "poly1305.h"
@@ -33,7 +34,7 @@ poly1305_aes_set_key (struct poly1305_aes_ctx *ctx, const uint8_t * key)
 {
   aes128_set_encrypt_key(&ctx->aes, (key));
   poly1305_set_key(&ctx->pctx, (key+16));
-  ctx->pctx.index = 0;
+  ctx->index = 0;
 }
 
 void
@@ -43,13 +44,35 @@ poly1305_aes_set_nonce (struct poly1305_aes_ctx *ctx,
   memcpy (ctx->nonce, nonce, POLY1305_AES_NONCE_SIZE);
 }
 
+#define COMPRESS(ctx, data) _poly1305_block(&(ctx)->pctx, (data), 1)
+
+void
+poly1305_aes_update (struct poly1305_aes_ctx *ctx, size_t length, const uint8_t *data)
+{
+  MD_UPDATE (ctx, length, data, COMPRESS, (void) 0);
+}
+
 void
 poly1305_aes_digest (struct poly1305_aes_ctx *ctx,
-		     size_t length, uint8_t * digest)
+		     size_t length, uint8_t *digest)
 {
   uint8_t s[POLY1305_BLOCK_SIZE];
+  /* final bytes */
+  if (ctx->index > 0)
+    {
+      assert (ctx->index < POLY1305_BLOCK_SIZE);
+
+      ctx->block[ctx->index] = 1;
+      memset (ctx->block + ctx->index + 1,
+	      0, POLY1305_BLOCK_SIZE - 1 - ctx->index);
+
+      _poly1305_block (&ctx->pctx, ctx->block, 0);
+    }
   aes128_encrypt(&ctx->aes, POLY1305_BLOCK_SIZE, s, ctx->nonce);
-  poly1305_digest (&ctx->pctx, length, digest, s);
+  
+  poly1305_digest (&ctx->pctx, s);
+  memcpy (digest, s, length);
+
   INCREMENT (16, ctx->nonce);
-  ctx->pctx.index = 0;
+  ctx->index = 0;
 }
diff --git a/poly1305-internal.c b/poly1305-internal.c
index 62c6976c..b33a3c9d 100644
--- a/poly1305-internal.c
+++ b/poly1305-internal.c
@@ -86,7 +86,7 @@ poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16])
 }
 
 void
-poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned t4)
+_poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned t4)
 {
   uint32_t t0,t1,t2,t3;
   uint32_t b;
@@ -119,28 +119,13 @@ poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned t4)
   ctx->h0 += b * 5;
 }
 
+/* Adds digest to the nonce */
 void
-poly1305_digest (struct poly1305_ctx *ctx,
- 		 size_t length, uint8_t *digest,
-		 const uint8_t *s)
+poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s)
 {
   uint32_t b, nb;
   uint64_t f0,f1,f2,f3;
   uint32_t g0,g1,g2,g3,g4;
-  uint8_t td[16];
-
-  /* final bytes */
-  /* poly1305_donna_atmost15bytes: */
-  if (ctx->index > 0)
-    {
-      assert (ctx->index < POLY1305_BLOCK_SIZE);
-
-      ctx->block[ctx->index] = 1;
-      memset (ctx->block + ctx->index + 1,
-	      0, POLY1305_BLOCK_SIZE - 1 - ctx->index);
-
-      poly1305_block (ctx, ctx->block, 0);
-    }
 
   b = ctx->h0 >> 26; ctx->h0 = ctx->h0 & 0x3ffffff;
   ctx->h1 +=     b; b = ctx->h1 >> 26; ctx->h1 = ctx->h1 & 0x3ffffff;
@@ -169,13 +154,17 @@ poly1305_digest (struct poly1305_ctx *ctx,
   f2 = ((ctx->h2 >> 12) | (ctx->h3 << 14)) + (uint64_t)LE_READ_UINT32(s+8);
   f3 = ((ctx->h3 >> 18) | (ctx->h4 <<  8)) + (uint64_t)LE_READ_UINT32(s+12);
 
-  LE_WRITE_UINT32(td, f0);
+  LE_WRITE_UINT32(s, f0);
   f1 += (f0 >> 32);
-  LE_WRITE_UINT32(&td[4], f1);
+  LE_WRITE_UINT32(s+4, f1);
   f2 += (f1 >> 32);
-  LE_WRITE_UINT32(&td[8], f2);
+  LE_WRITE_UINT32(s+8, f2);
   f3 += (f2 >> 32);
-  LE_WRITE_UINT32(&td[12], f3);
+  LE_WRITE_UINT32(s+12, f3);
 
-  memcpy(digest, td, length);
+  ctx->h0 = 0;
+  ctx->h1 = 0;
+  ctx->h2 = 0;
+  ctx->h3 = 0;
+  ctx->h4 = 0;
 }
diff --git a/poly1305.c b/poly1305.c
deleted file mode 100644
index 20f669f5..00000000
--- a/poly1305.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/* nettle, low-level cryptographics library
- *
- * Copyright (C) 2013 Nikos Mavrogiannopoulos, Niels Möller
- *
- * The nettle library is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at your
- * option) any later version.
- *
- * The nettle library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
- * License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with the nettle library; see the file COPYING.LIB.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02111-1301, USA.
- */
-
-#if HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <string.h>
-
-#include "poly1305.h"
-
-#include "macros.h"
-
-#define COMPRESS(ctx, data) poly1305_block((ctx), (data), 1)
-
-void
-poly1305_update (struct poly1305_ctx *ctx, size_t length, const uint8_t *data)
-{
-  MD_UPDATE (ctx, length, data, COMPRESS, (void) 0);
-}
diff --git a/poly1305.h b/poly1305.h
index 3517d9c3..359c8e32 100644
--- a/poly1305.h
+++ b/poly1305.h
@@ -35,12 +35,12 @@ extern "C" {
 
 /* Name mangling */
 #define poly1305_set_key nettle_poly1305_set_key
-#define poly1305_update nettle_poly1305_update
-#define poly1305_block nettle_poly1305_block
 #define poly1305_digest nettle_poly1305_digest
+#define _poly1305_block _nettle_poly1305_block
 
 #define poly1305_aes_set_key nettle_poly1305_aes_set_key
 #define poly1305_aes_set_nonce nettle_poly1305_aes_set_nonce
+#define poly1305_aes_update nettle_poly1305_aes_update
 #define poly1305_aes_digest nettle_poly1305_aes_digest
 
 /* Low level functions/macros for the poly1305 construction. */
@@ -66,17 +66,15 @@ struct poly1305_ctx {
     uint32_t h32[4];
     uint64_t h64[2];
   } h;
-
-  uint8_t block[POLY1305_BLOCK_SIZE];
-  unsigned index;
 };
 
+/* Low-level internal interface. */
 void poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[POLY1305_KEY_SIZE]);
-void poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[POLY1305_BLOCK_SIZE],
-		     unsigned high);
-void poly1305_update (struct poly1305_ctx *ctx, size_t size, const uint8_t *data);
-void poly1305_digest (struct poly1305_ctx *ctx,
-		      size_t length, uint8_t *digest, const uint8_t *s);
+/* Extracts digest, and adds it to s, the encrypted nonce. */
+void poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s);
+/* Internal function. Process one block. */
+void _poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[POLY1305_BLOCK_SIZE],
+		      unsigned high);
 
 /* poly1305-aes */
 
@@ -86,8 +84,11 @@ void poly1305_digest (struct poly1305_ctx *ctx,
 
 struct poly1305_aes_ctx
 {
-  /* Must be first element, for the poly1305_aes_update cast to work. */
+  /* Keep aes context last, to make it possible to use a general
+     poly1305_update if other variants are added. */
   struct poly1305_ctx pctx;
+  uint8_t block[POLY1305_BLOCK_SIZE];
+  unsigned index;
   uint8_t nonce[POLY1305_BLOCK_SIZE];
   struct aes128_ctx aes;
 };
@@ -101,9 +102,10 @@ void
 poly1305_aes_set_nonce (struct poly1305_aes_ctx *ctx,
 		        const uint8_t *nonce);
 
-/* An alias, nothing aes-specific. */
-#define poly1305_aes_update \
-  (*(void(*)(struct poly1305_aes_ctx *, size_t, const uint8_t *))&poly1305_update)
+/* Update is not aes-specific, but since this is the only implemented
+   variant, we need no more general poly1305_update. */
+void
+poly1305_aes_update (struct poly1305_aes_ctx *ctx, size_t length, const uint8_t *data);
 
 /* Also increments the nonce */
 void
diff --git a/x86_64/poly1305-internal.asm b/x86_64/poly1305-internal.asm
index 9b8ae013..453c62b2 100644
--- a/x86_64/poly1305-internal.asm
+++ b/x86_64/poly1305-internal.asm
@@ -52,7 +52,6 @@ PROLOGUE(nettle_poly1305_set_key)
 	mov	%rax, P1305_H0 (CTX)
 	mov	%rax, P1305_H1 (CTX)
 	mov	XREG(%rax), P1305_H2 (CTX)
-	mov	XREG(%rax), P1305_INDEX (CTX)
 	
 	W64_EXIT(2,0)
 	ret
@@ -61,7 +60,7 @@ EPILOGUE(nettle_poly1305_set_key)
 
 C 64-bit multiplication mod 2^130 - 5
 C
-C (x_0 + B x_1 + B^2 x_1) * (r_0 + B r_1) =
+C (x_0 + B x_1 + B^2 x_2) * (r_0 + B r_1) =
 C     1   B B^2 B^3 
 C   x_0 r_0
 C       x_0 r_1
@@ -73,40 +72,47 @@ C Then r_1 B^2 = r_1/4 (2^130) = 5/4 r_1.
 C and  r_1 B^3 = 5/4 B r_1
 C So we get
 C
-C  x_0 r_0 + x_1 (5/4 r_1) + B (x_0 r_1 + x_1 r_0 + x_2 5/4 r_1 + B x_2 r_0)	
+C  x_0 r_0 + x_1 (5/4 r_1) + B (x_0 r_1 + x_1 r_0 + x_2 5/4 r_1 + B x_2 r_0)
+C     1   B B^2 B^3 
+C   x_0 r_0
+C   x_1 r'_1
+C       x_0 r_1
+C	x_1 r_0
+C       x_2 r'_1
+C           x_2 r_0
 
-	C poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned hi)
+	C _poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned hi)
 	
-PROLOGUE(nettle_poly1305_block)
+PROLOGUE(_nettle_poly1305_block)
+	W64_ENTRY(3, 0)
 	mov	(%rsi), T0
 	mov	8(%rsi), T1
 	mov	XREG(%rdx),	XREG(T2)
-C FIXME: Support windows ABI 
+
 	C Registers:
 	C Inputs:  CTX, T0, T1, T2,
 	C Outputs: H0, H1, H2, stored into the context.
 
-C_NAME(poly1305_block):
 	add	P1305_H0 (CTX), T0
 	adc	P1305_H1 (CTX), T1
 	adc	P1305_H2 (CTX), XREG(T2)
 	mov	P1305_R0 (CTX), %rax
-	mul	T0
+	mul	T0			C x0*r0
 	mov	%rax, H0
 	mov	%rdx, H1
 	mov	P1305_S1 (CTX), %rax	C 5/4 r1
 	mov	%rax, H2
-	mul	T1
-	imul	T2, H2
-	imul	P1305_R0 (CTX), T2
+	mul	T1			C x1*r1'
+	imul	T2, H2			C x2*r1'
+	imul	P1305_R0 (CTX), T2	C x2*r0
 	add	%rax, H0
 	adc	%rdx, H1
 	mov	P1305_R0 (CTX), %rax
-	mul	T1
+	mul	T1			C x1*r0
 	add	%rax, H2
 	adc	%rdx, T2
 	mov	P1305_R1 (CTX), %rax
-	mul	T0
+	mul	T0			C x0*r1
 	add	%rax, H2
 	adc	%rdx, T2
 	mov	T2, %rax
@@ -119,57 +125,17 @@ C_NAME(poly1305_block):
 	mov	H0, P1305_H0 (CTX)
 	mov	H1, P1305_H1 (CTX)
 	mov	XREG(T2), P1305_H2 (CTX)
+	W64_EXIT(3, 0)
 	ret
-EPILOGUE(nettle_poly1305_block)
+EPILOGUE(_nettle_poly1305_block)
 
-	C poly1305_digest (struct poly1305_ctx *ctx,
- 	C		   size_t length, uint8_t *digest,
-	C		   const uint8_t *s)
+	C poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s)
 	C Registers:
 	C   %rdi: ctx
-	C   %rsi: length
-	C   %rdx: digest
-	C   %rcx: s
+	C   %rsi: s
 	
 PROLOGUE(nettle_poly1305_digest)
-	W64_ENTRY(4, 0)
-	mov	P1305_INDEX (CTX), XREG(%rax)
-	push	%rsi
-	push	%rdx
-	push	%rcx
-	test	XREG(%rax), XREG(%rax)
-	jz	.Lfinal
-
-	C Pad with a 1 byte.
-	C FIXME: Or in, without storing in memory.
-	inc	XREG(%rax)	C Also clears high half
-	movb	$1, P1305_BLOCK-1 (CTX, %rax)
-	
-	mov	XREG(%rax), XREG(%rcx)
-	mov	$1, T1
-	and	$7, XREG(%rcx)	
-	shl	$3, XREG(%rcx)
-	shl	LREG(%rcx), T1
-	dec	T1
-	mov	P1305_BLOCK (CTX), T0
-	xor	T2, T2
-	cmp	$8, XREG(%rax)
-	jc	.Lfinal_lt8
-	C	If %rax == 16, we get T1 == 0,
-	C 	tweak so we get need T1 = -1 instead.
-	cmp	$16, XREG(%rax)
-	adc	$-1, T1
-	and	P1305_BLOCK+8 (CTX), T1
-	jmp	.Lfinal_block
-
-.Lfinal_lt8:
-	and	T1, T0
-	xor	T1, T1
-.Lfinal_block:
-
-	call	poly1305_block
-
-.Lfinal:
+	W64_ENTRY(2, 0)
 
 	mov	P1305_H0 (CTX), H0
 	mov	P1305_H1 (CTX), H1
@@ -182,6 +148,8 @@ PROLOGUE(nettle_poly1305_digest)
 	adc	$0, H1
 	adc	$0, XREG(H2)
 
+C Use %rax instead of %rsi
+define(<T1>, <%rax>)
 	C Add 5, use result if >= 2^130
 	mov	$5, T0
 	xor	T1, T1
@@ -192,38 +160,13 @@ PROLOGUE(nettle_poly1305_digest)
 	cmovnc	T0, H0
 	cmovnc	T1, H1
 
-	pop	%rcx
-	pop	%rdx
-	pop	%rsi
-
-	add	(%rcx), H0
-	adc	8(%rcx), H1
+	add	H0, (%rsi)
+	adc	H1, 8(%rsi)
 
-	C Store, taking length into account
-	cmp	$8, %rsi
-	jc	.Ldigest_lt8
-	mov	H0, (%rdx)
-	jz	.Ldigest_done
-	cmp	$16, %rsi
-	jc	.Ldigest_lt16
-	mov	H1, 8(%rdx)
-	jmp	.Ldigest_done
-.Ldigest_lt16:
-	mov	H1, H0
-	add	$8, %rdx
-	sub	$8, %rsi
-.Ldigest_lt8:
-	movb	LREG(H0), (%rdx)
-	shr	$8, H0
-	inc	%rdx
-	dec	%rsi
-	jnz	.Ldigest_lt8
-.Ldigest_done:
 	xor	XREG(%rax), XREG(%rax)
 	mov	%rax, P1305_H0 (CTX)
 	mov	%rax, P1305_H1 (CTX)
 	mov	XREG(%rax), P1305_H2 (CTX)
-	mov	XREG(%rax), P1305_INDEX (CTX)
-	W64_EXIT(4, 0)
+	W64_EXIT(2, 0)
 	ret
 
-- 
GitLab