diff --git a/ChangeLog b/ChangeLog
index 740ec54a576f2eac150fabd0119574b77754474d..ca7d0c1fce916769c9c06d0aea82bd0006f80925 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
 2013-09-28  Niels Möller  <nisse@lysator.liu.se>
 
+	* md4.h (struct md4_ctx): Use single uint64_t variable for block
+	count.
+	* md4.c: Use new block count variable.
+	* md5.c, md5.h (struct md5_ctx): Likewise.
+	* ripemd160.c, ripemd160.h (struct ripemd160_ctx): Likewise.
+	* sha1.c, sha1.h (struct sha1_ctx): Likewise.
+	* sha256.c, sha2.h (struct sha256_ctx): Likewise.
+
 	* testsuite/testutils.c (test_hash_large): Added simple progress
 	indicator.
 
diff --git a/macros.h b/macros.h
index 091b4f046ebd585622eb1a09a598daba5cd9898d..6bbbb634b06f0e3a37650f705dcd2efeb6a5b905 100644
--- a/macros.h
+++ b/macros.h
@@ -162,14 +162,11 @@ do {						\
 /* Helper macro for Merkle-Damgård hash functions. Assumes the context
    structs includes the following fields:
 
-     xxx count_low, count_high;		// Two word block count
      uint8_t block[...];		// Buffer holding one block
      unsigned int index;		// Index into block
 */
 
-/* FIXME: Should probably switch to using uint64_t for the count, but
-   due to alignment and byte order that may be an ABI change. */
-
+/* Currently used by sha512 (and sha384) only. */
 #define MD_INCR(ctx) ((ctx)->count_high += !++(ctx)->count_low)
 
 /* Takes the compression function f as argument. NOTE: also clobbers
diff --git a/md4.c b/md4.c
index 80d50f6b40258a0d58e91217c3cf5e4af7ca7e20..05321ee2bfcb3d39b814d5305be2cde70961c897 100644
--- a/md4.c
+++ b/md4.c
@@ -61,7 +61,7 @@ md4_init(struct md4_ctx *ctx)
     };
   memcpy(ctx->state, iv, sizeof(ctx->state));
   
-  ctx->count_low = ctx->count_high = 0;
+  ctx->count = 0;
   ctx->index = 0;
 }
 
@@ -70,7 +70,7 @@ md4_update(struct md4_ctx *ctx,
 	   size_t length,
 	   const uint8_t *data)
 {
-  MD_UPDATE(ctx, length, data, md4_compress, MD_INCR(ctx));
+  MD_UPDATE(ctx, length, data, md4_compress, ctx->count++);
 }
 
 void
@@ -78,6 +78,7 @@ md4_digest(struct md4_ctx *ctx,
 	   size_t length,
 	   uint8_t *digest)
 {
+  uint64_t bit_count;
   uint32_t data[MD4_DATA_LENGTH];
   unsigned i;
 
@@ -89,9 +90,9 @@ md4_digest(struct md4_ctx *ctx,
 
   /* There are 512 = 2^9 bits in one block 
    * Little-endian order => Least significant word first */
-
-  data[MD4_DATA_LENGTH-1] = (ctx->count_high << 9) | (ctx->count_low >> 23);
-  data[MD4_DATA_LENGTH-2] = (ctx->count_low << 9) | (ctx->index << 3);
+  bit_count = (ctx->count << 9) | (ctx->index << 3);
+  data[MD4_DATA_LENGTH-2] = bit_count;
+  data[MD4_DATA_LENGTH-1] = bit_count >> 32;
   md4_transform(ctx->state, data);
 
   _nettle_write_le32(length, digest, ctx->state);
diff --git a/md4.h b/md4.h
index 5b805c2207c160bf7c750dca0cda7a46d49fef2e..a415e021e272b9877b853287d3cf4d0151b0394a 100644
--- a/md4.h
+++ b/md4.h
@@ -47,7 +47,7 @@ extern "C" {
 struct md4_ctx
 {
   uint32_t state[_MD4_DIGEST_LENGTH];
-  uint32_t count_low, count_high;	/* Block count */
+  uint64_t count;			/* Block count */
   uint8_t block[MD4_DATA_SIZE];		/* Block buffer */
   unsigned index;			/* Into buffer */
 };
diff --git a/md5.c b/md5.c
index 32c3583e6f2d3a30ff5718baf52e56747d76aaff..370361b83df9139e2b0dcf51ab37d3e8c960f272 100644
--- a/md5.c
+++ b/md5.c
@@ -49,7 +49,7 @@ md5_init(struct md5_ctx *ctx)
       0x10325476,
     };
   memcpy(ctx->state, iv, sizeof(ctx->state));
-  ctx->count_low = ctx->count_high = 0;
+  ctx->count = 0;
   ctx->index = 0;
 }
 
@@ -60,7 +60,7 @@ md5_update(struct md5_ctx *ctx,
 	   size_t length,
 	   const uint8_t *data)
 {
-  MD_UPDATE(ctx, length, data, COMPRESS, MD_INCR(ctx));
+  MD_UPDATE(ctx, length, data, COMPRESS, ctx->count++);
 }
 
 void
@@ -68,18 +68,16 @@ md5_digest(struct md5_ctx *ctx,
 	   size_t length,
 	   uint8_t *digest)
 {
-  uint32_t high, low;
+  uint64_t bit_count;
   
   assert(length <= MD5_DIGEST_SIZE);
 
   MD_PAD(ctx, 8, COMPRESS);
 
-  /* There are 512 = 2^9 bits in one block */  
-  high = (ctx->count_high << 9) | (ctx->count_low >> 23);
-  low = (ctx->count_low << 9) | (ctx->index << 3);
+  /* There are 512 = 2^9 bits in one block */
+  bit_count = (ctx->count << 9) | (ctx->index << 3);
 
-  LE_WRITE_UINT32(ctx->block + (MD5_DATA_SIZE - 8), low);
-  LE_WRITE_UINT32(ctx->block + (MD5_DATA_SIZE - 4), high);
+  LE_WRITE_UINT64(ctx->block + (MD5_DATA_SIZE - 8), bit_count);
   _nettle_md5_compress(ctx->state, ctx->block);
 
   _nettle_write_le32(length, digest, ctx->state);
diff --git a/md5.h b/md5.h
index 2899cdfce6dbfcbf31b6906dfa6bc7ac7d044719..409329001bcbb91c46e85e05d3fdfee54be37afc 100644
--- a/md5.h
+++ b/md5.h
@@ -46,7 +46,7 @@ extern "C" {
 struct md5_ctx
 {
   uint32_t state[_MD5_DIGEST_LENGTH];
-  uint32_t count_low, count_high; /* Block count */
+  uint64_t count;               /* Block count */
   uint8_t block[MD5_DATA_SIZE]; /* Block buffer */
   unsigned index;               /* Into buffer */
 };
diff --git a/ripemd160.c b/ripemd160.c
index 92a6414f1d55436d5b93866fb97aba26fda27b2f..2f9735ca3de2ba20daf1f60d2447da22b644cfba 100644
--- a/ripemd160.c
+++ b/ripemd160.c
@@ -155,7 +155,7 @@ ripemd160_init(struct ripemd160_ctx *ctx)
       0xC3D2E1F0,
     };
   memcpy(ctx->state, iv, sizeof(ctx->state));
-  ctx->count_low = ctx->count_high = 0;
+  ctx->count = 0;
   ctx->index = 0;
 }
 
@@ -167,25 +167,23 @@ ripemd160_init(struct ripemd160_ctx *ctx)
 void
 ripemd160_update(struct ripemd160_ctx *ctx, size_t length, const uint8_t *data)
 {
-  MD_UPDATE(ctx, length, data, COMPRESS, MD_INCR(ctx));
+  MD_UPDATE(ctx, length, data, COMPRESS, ctx->count++);
 }
 
 void
 ripemd160_digest(struct ripemd160_ctx *ctx, size_t length, uint8_t *digest)
 {
-  uint32_t high, low;
+  uint64_t bit_count;
 
   assert(length <= RIPEMD160_DIGEST_SIZE);
 
   MD_PAD(ctx, 8, COMPRESS);
 
   /* There are 2^9 bits in one block */
-  high = (ctx->count_high << 9) | (ctx->count_low >> 23);
-  low = (ctx->count_low << 9) | (ctx->index << 3);
+  bit_count = (ctx->count << 9) | (ctx->index << 3);
 									\
   /* append the 64 bit count */
-  LE_WRITE_UINT32(ctx->block + 56, low);
-  LE_WRITE_UINT32(ctx->block + 60, high);
+  LE_WRITE_UINT64(ctx->block + 56, bit_count);
   _nettle_ripemd160_compress(ctx->state, ctx->block);
 
   _nettle_write_le32(length, digest, ctx->state);
diff --git a/ripemd160.h b/ripemd160.h
index eca987361c9a0bfe597b294d3bbf079e4152f7bf..9ef86d310fc6b4f4205df2557bd20a3a8147374a 100644
--- a/ripemd160.h
+++ b/ripemd160.h
@@ -48,7 +48,7 @@ extern "C" {
 struct ripemd160_ctx
 {
   uint32_t state[_RIPEMD160_DIGEST_LENGTH];
-  uint32_t count_low, count_high;         /* 64-bit block count */
+  uint64_t count;         /* 64-bit block count */
   uint8_t block[RIPEMD160_DATA_SIZE];
   unsigned int index;
 };
diff --git a/sha1.c b/sha1.c
index b7ab94124f942075fb7633c15cd1f830999e21c4..e53913c987f400202d225dbcd12b2ed3e69cc5d2 100644
--- a/sha1.c
+++ b/sha1.c
@@ -66,7 +66,7 @@ sha1_init(struct sha1_ctx *ctx)
     };
 
   memcpy(ctx->state, iv, sizeof(ctx->state));
-  ctx->count_low = ctx->count_high = 0;
+  ctx->count = 0;
   
   /* Initialize buffer */
   ctx->index = 0;
@@ -78,7 +78,7 @@ void
 sha1_update(struct sha1_ctx *ctx,
 	    size_t length, const uint8_t *data)
 {
-  MD_UPDATE (ctx, length, data, COMPRESS, MD_INCR(ctx));
+  MD_UPDATE (ctx, length, data, COMPRESS, ctx->count++);
 }
 	  
 void
@@ -86,19 +86,17 @@ sha1_digest(struct sha1_ctx *ctx,
 	    size_t length,
 	    uint8_t *digest)
 {
-  uint32_t high, low;
+  uint64_t bit_count;
 
   assert(length <= SHA1_DIGEST_SIZE);
 
   MD_PAD(ctx, 8, COMPRESS);
 
-  /* There are 512 = 2^9 bits in one block */  
-  high = (ctx->count_high << 9) | (ctx->count_low >> 23);
-  low = (ctx->count_low << 9) | (ctx->index << 3);
+  /* There are 512 = 2^9 bits in one block */
+  bit_count = (ctx->count << 9) | (ctx->index << 3);
 
   /* append the 64 bit count */
-  WRITE_UINT32(ctx->block + (SHA1_DATA_SIZE - 8), high);
-  WRITE_UINT32(ctx->block + (SHA1_DATA_SIZE - 4), low);
+  WRITE_UINT64(ctx->block + (SHA1_DATA_SIZE - 8), bit_count);
   _nettle_sha1_compress(ctx->state, ctx->block);
 
   _nettle_write_be32(length, digest, ctx->state);
diff --git a/sha1.h b/sha1.h
index 9452f0aa8ac77b9f0579c8f48f3135a7b34b9a40..da6bfbd0ab4cc5bcd3088590dcab6daf8e1aac70 100644
--- a/sha1.h
+++ b/sha1.h
@@ -48,7 +48,7 @@ extern "C" {
 struct sha1_ctx
 {
   uint32_t state[_SHA1_DIGEST_LENGTH];    /* State variables */
-  uint32_t count_low, count_high;         /* 64-bit block count */
+  uint64_t count;                         /* 64-bit block count */
   uint8_t block[SHA1_DATA_SIZE];          /* SHA1 data buffer */
   unsigned int index;                     /* index into buffer */
 };
diff --git a/sha2.h b/sha2.h
index 3a0b449aac7d70f0b4b5c1032820dfbecd8b4c07..f095dad10135e86286bde6cf53af0c652c74621c 100644
--- a/sha2.h
+++ b/sha2.h
@@ -55,7 +55,7 @@ extern "C" {
 struct sha256_ctx
 {
   uint32_t state[_SHA256_DIGEST_LENGTH];    /* State variables */
-  uint32_t count_low, count_high;           /* 64-bit block count */
+  uint64_t count;                           /* 64-bit block count */
   uint8_t block[SHA256_DATA_SIZE];          /* SHA256 data buffer */
   unsigned int index;                       /* index into buffer */
 };
diff --git a/sha256.c b/sha256.c
index 276f1351b14ff1a5da72a9c7333f275c47ccbf5c..46cccffec7a2d062393f3bc8781c262da58a649c 100644
--- a/sha256.c
+++ b/sha256.c
@@ -79,7 +79,7 @@ sha256_init(struct sha256_ctx *ctx)
   memcpy(ctx->state, H0, sizeof(H0));
 
   /* Initialize bit count */
-  ctx->count_low = ctx->count_high = 0;
+  ctx->count = 0;
   
   /* Initialize buffer */
   ctx->index = 0;
@@ -89,7 +89,7 @@ void
 sha256_update(struct sha256_ctx *ctx,
 	      size_t length, const uint8_t *data)
 {
-  MD_UPDATE (ctx, length, data, COMPRESS, MD_INCR(ctx));
+  MD_UPDATE (ctx, length, data, COMPRESS, ctx->count++);
 }
 
 static void
@@ -97,21 +97,19 @@ sha256_write_digest(struct sha256_ctx *ctx,
 		    size_t length,
 		    uint8_t *digest)
 {
-  uint32_t high, low;
+  uint64_t bit_count;
 
   assert(length <= SHA256_DIGEST_SIZE);
 
   MD_PAD(ctx, 8, COMPRESS);
 
   /* There are 512 = 2^9 bits in one block */  
-  high = (ctx->count_high << 9) | (ctx->count_low >> 23);
-  low = (ctx->count_low << 9) | (ctx->index << 3);
+  bit_count = (ctx->count << 9) | (ctx->index << 3);
 
   /* This is slightly inefficient, as the numbers are converted to
      big-endian format, and will be converted back by the compression
      function. It's probably not worth the effort to fix this. */
-  WRITE_UINT32(ctx->block + (SHA256_DATA_SIZE - 8), high);
-  WRITE_UINT32(ctx->block + (SHA256_DATA_SIZE - 4), low);
+  WRITE_UINT64(ctx->block + (SHA256_DATA_SIZE - 8), bit_count);
   COMPRESS(ctx, ctx->block);
 
   _nettle_write_be32(length, digest, ctx->state);
@@ -141,7 +139,7 @@ sha224_init(struct sha256_ctx *ctx)
   memcpy(ctx->state, H0, sizeof(H0));
 
   /* Initialize bit count */
-  ctx->count_low = ctx->count_high = 0;
+  ctx->count = 0;
   
   /* Initialize buffer */
   ctx->index = 0;