From 3594d19e2d0c28715e3295e11ea681c2c1cde010 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se>
Date: Tue, 12 Nov 2013 17:23:43 +0100
Subject: [PATCH] New function poly1305_block_internal.

---
 ChangeLog           |  2 ++
 poly1305-internal.c | 77 +++++++++++++++++++--------------------------
 2 files changed, 35 insertions(+), 44 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 5aeea615..49a29bd6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,8 @@
 
 	* poly1305-internal.c: New file, for poly1305 functions depending
 	on the internal mod (2^130 - 5) representation.
+	(poly1305_block_internal): New helper function.
+	(poly1305_block, poly1305_digest): Use it.
 
 2013-11-08  Nikos Mavrogiannopoulos  <nmav@gnutls.org>
 
diff --git a/poly1305-internal.c b/poly1305-internal.c
index 72806625..fd179027 100644
--- a/poly1305-internal.c
+++ b/poly1305-internal.c
@@ -66,26 +66,20 @@ poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16])
   ctx->h4 = 0;
 }
 
-
-void
-poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16])
+static void
+poly1305_block_internal (struct poly1305_ctx *ctx,
+			 uint32_t t0, uint32_t t1, uint32_t t2, uint32_t t3,
+			 uint32_t t4)
 {
-  uint32_t t0,t1,t2,t3;
   uint32_t b;
   uint64_t t[5];
   uint64_t c;
 
-  /* full blocks */
-  t0 = LE_READ_UINT32(m);
-  t1 = LE_READ_UINT32(m+4);
-  t2 = LE_READ_UINT32(m+8);
-  t3 = LE_READ_UINT32(m+12);
-
   ctx->h0 += t0 & 0x3ffffff;
   ctx->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
   ctx->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
   ctx->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
-  ctx->h4 += (t3 >> 8) | (1 << 24);
+  ctx->h4 += (t3 >> 8) | (t4 << 24);
 
   /* poly1305_donna_mul: */
   t[0]  = mul32x32_64(ctx->h0,ctx->r0) + mul32x32_64(ctx->h1,ctx->s4) + mul32x32_64(ctx->h2,ctx->s3) + mul32x32_64(ctx->h3,ctx->s2) + mul32x32_64(ctx->h4,ctx->s1);
@@ -102,54 +96,49 @@ poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16])
   ctx->h0 += b * 5;
 }
 
+void
+poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16])
+{
+  uint32_t t0,t1,t2,t3;
+
+  /* full blocks */
+  t0 = LE_READ_UINT32(m);
+  t1 = LE_READ_UINT32(m+4);
+  t2 = LE_READ_UINT32(m+8);
+  t3 = LE_READ_UINT32(m+12);
+
+  poly1305_block_internal (ctx, t0, t1, t2, t3, 1);
+}
+
 void
 poly1305_digest (struct poly1305_ctx *ctx,
  		 size_t length, uint8_t *digest)
 {
-  uint32_t t0,t1,t2,t3;
   uint32_t b, nb;
-  size_t j;
-  uint64_t t[5];
   uint64_t f0,f1,f2,f3;
   uint32_t g0,g1,g2,g3,g4;
-  uint64_t c;
-  uint8_t mp[16];
   uint8_t td[16];
 
   /* final bytes */
   /* poly1305_donna_atmost15bytes: */
-  if (!ctx->index) goto poly1305_donna_finish;
-
-  for (j = 0; j < ctx->index; j++) mp[j] = ctx->block[j];
-  mp[j++] = 1;
-  for (; j < 16; j++)	mp[j] = 0;
+  if (ctx->index > 0)
+    {
+      uint32_t t0,t1,t2,t3;
+      size_t j;
+      uint8_t mp[16];
 
-  t0 = LE_READ_UINT32(mp);
-  t1 = LE_READ_UINT32(mp+4);
-  t2 = LE_READ_UINT32(mp+8);
-  t3 = LE_READ_UINT32(mp+12);
+      for (j = 0; j < ctx->index; j++) mp[j] = ctx->block[j];
+      mp[j++] = 1;
+      for (; j < 16; j++)	mp[j] = 0;
 
-  ctx->h0 += t0 & 0x3ffffff;
-  ctx->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
-  ctx->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
-  ctx->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
-  ctx->h4 += (t3 >> 8);
-
-  /* poly1305_donna_mul: */
-  t[0]  = mul32x32_64(ctx->h0,ctx->r0) + mul32x32_64(ctx->h1,ctx->s4) + mul32x32_64(ctx->h2,ctx->s3) + mul32x32_64(ctx->h3,ctx->s2) + mul32x32_64(ctx->h4,ctx->s1);
-  t[1]  = mul32x32_64(ctx->h0,ctx->r1) + mul32x32_64(ctx->h1,ctx->r0) + mul32x32_64(ctx->h2,ctx->s4) + mul32x32_64(ctx->h3,ctx->s3) + mul32x32_64(ctx->h4,ctx->s2);
-  t[2]  = mul32x32_64(ctx->h0,ctx->r2) + mul32x32_64(ctx->h1,ctx->r1) + mul32x32_64(ctx->h2,ctx->r0) + mul32x32_64(ctx->h3,ctx->s4) + mul32x32_64(ctx->h4,ctx->s3);
-  t[3]  = mul32x32_64(ctx->h0,ctx->r3) + mul32x32_64(ctx->h1,ctx->r2) + mul32x32_64(ctx->h2,ctx->r1) + mul32x32_64(ctx->h3,ctx->r0) + mul32x32_64(ctx->h4,ctx->s4);
-  t[4]  = mul32x32_64(ctx->h0,ctx->r4) + mul32x32_64(ctx->h1,ctx->r3) + mul32x32_64(ctx->h2,ctx->r2) + mul32x32_64(ctx->h3,ctx->r1) + mul32x32_64(ctx->h4,ctx->r0);
+      t0 = LE_READ_UINT32(mp);
+      t1 = LE_READ_UINT32(mp+4);
+      t2 = LE_READ_UINT32(mp+8);
+      t3 = LE_READ_UINT32(mp+12);
 
-  ctx->h0 = (uint32_t)t[0] & 0x3ffffff; c =           (t[0] >> 26);
-  t[1] += c;      ctx->h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >> 26);
-  t[2] += b;      ctx->h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >> 26);
-  t[3] += b;      ctx->h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >> 26);
-  t[4] += b;      ctx->h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >> 26);
-  ctx->h0 += b * 5;
+      poly1305_block_internal (ctx, t0, t1, t2, t3, 0);
+    }
 
- poly1305_donna_finish:
   b = ctx->h0 >> 26; ctx->h0 = ctx->h0 & 0x3ffffff;
   ctx->h1 +=     b; b = ctx->h1 >> 26; ctx->h1 = ctx->h1 & 0x3ffffff;
   ctx->h2 +=     b; b = ctx->h2 >> 26; ctx->h2 = ctx->h2 & 0x3ffffff;
-- 
GitLab