diff --git a/ChangeLog b/ChangeLog
index 2685b5f99932025a37366cb6dfb0e0a4a9d5ce07..a445477916f9566e6c61f17b33cfc2d6bcd27964 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,12 @@
 2010-03-24  Niels Möller  <nisse@lysator.liu.se>
 
-	* Makefile.in (nettle_SOURCES): Added sha256-compress.c.
+	* sha512.c: Reorganized to use _nettle_sha512_compress.
+
+	* sha512-compress.c (_nettle_sha512_compress): Compression
+	function extracted from sha512.c to a new file.
+
+	* Makefile.in (nettle_SOURCES): Added sha256-compress.c and
+	sha512-compress.c.
 
 	* sha256.c: Reorganized to use _nettle_sha256_compress.
 
diff --git a/Makefile.in b/Makefile.in
index f2d82d950a93af26b71f5af1be59bf6781be1387..be5416ebc111ec987781fae3234af61bc519af08 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -65,7 +65,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c \
 		 md2.c md2-meta.c md4.c md4-meta.c \
 		 md5.c md5-compress.c md5-compat.c md5-meta.c \
 		 sha1.c sha1-compress.c sha1-meta.c sha256.c sha256-compress.c sha256-meta.c \
-		 sha512.c sha512-meta.c \
+		 sha512.c sha512-compress.c sha512-meta.c \
 		 serpent.c serpent-meta.c \
 		 twofish.c twofish-meta.c \
 		 yarrow256.c yarrow_key_event.c \
diff --git a/sha.h b/sha.h
index c95dffdaa29c2f4dad455faca102381cdccec0be..cf4743e6fd7f5a892a9731792c88b3b53d14a956 100644
--- a/sha.h
+++ b/sha.h
@@ -141,6 +141,12 @@ sha512_digest(struct sha512_ctx *ctx,
 	      unsigned length,
 	      uint8_t *digest);
 
+/* Internal compression function. STATE points to 8 uint64_t words,
+   DATA points to 128 bytes of input data, possibly unaligned, and K
+   points to the table of constants. */
+void
+_nettle_sha512_compress(uint64_t *state, const uint8_t *data, const uint64_t *k);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/sha512-compress.c b/sha512-compress.c
new file mode 100644
index 0000000000000000000000000000000000000000..f11226e368cc794f7aeec94f5baeacf5d46c0544
--- /dev/null
+++ b/sha512-compress.c
@@ -0,0 +1,164 @@
+/* sha512-compress.c
+ *
+ * The compression function of the sha512 hash function.
+ */
+
+/* nettle, low-level cryptographics library
+ *
+ * Copyright (C) 2001, 2010 Niels Möller
+ *  
+ * The nettle library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ * 
+ * The nettle library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the nettle library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sha.h"
+
+#include "macros.h"
+
+/* A block, treated as a sequence of 64-bit words. */
+#define SHA512_DATA_LENGTH 16
+
+#define ROTR(n,x) ((x)>>(n) | ((x)<<(64-(n))))
+#define SHR(n,x) ((x)>>(n))
+
+/* The SHA512 functions. The Choice function is the same as the SHA1
+   function f1, and the majority function is the same as the SHA1 f3
+   function, and the same as for SHA256. */
+
+#define Choice(x,y,z)   ( (z) ^ ( (x) & ( (y) ^ (z) ) ) ) 
+#define Majority(x,y,z) ( ((x) & (y)) ^ ((z) & ((x) ^ (y))) )
+
+#define S0(x) (ROTR(28,(x)) ^ ROTR(34,(x)) ^ ROTR(39,(x))) 
+#define S1(x) (ROTR(14,(x)) ^ ROTR(18,(x)) ^ ROTR(41,(x)))
+
+#define s0(x) (ROTR(1,(x)) ^ ROTR(8,(x)) ^ SHR(7,(x)))
+#define s1(x) (ROTR(19,(x)) ^ ROTR(61,(x)) ^ SHR(6,(x)))
+
+/* The initial expanding function. The hash function is defined over
+   an 64-word expanded input array W, where the first 16 are copies of
+   the input data, and the remaining 64 are defined by
+
+        W[ t ] = s1(W[t-2]) + W[t-7] + s0(W[i-15]) + W[i-16]
+
+   This implementation generates these values on the fly in a circular
+   buffer.
+*/
+
+#define EXPAND(W,i) \
+( W[(i) & 15 ] += (s1(W[((i)-2) & 15]) + W[((i)-7) & 15] + s0(W[((i)-15) & 15])) )
+
+/* The prototype SHA sub-round.  The fundamental sub-round is:
+
+        T1 = h + S1(e) + Choice(e,f,g) + K[t] + W[t]
+	T2 = S0(a) + Majority(a,b,c)
+	a' = T1+T2
+	b' = a
+	c' = b
+	d' = c
+	e' = d + T1
+	f' = e
+	g' = f
+	h' = g
+
+   but this is implemented by unrolling the loop 8 times and renaming
+   the variables
+   ( h, a, b, c, d, e, f, g ) = ( a, b, c, d, e, f, g, h ) each
+   iteration. This code is then replicated 8, using the next 8 values
+   from the W[] array each time */
+
+/* It's crucial that DATA is only used once, as that argument will
+ * have side effects. */
+#define ROUND(a,b,c,d,e,f,g,h,k,data) do {		\
+  uint64_t T = h + S1(e) + Choice(e,f,g) + k + data;	\
+  d += T;						\
+  h = T + S0(a) + Majority(a,b,c);			\
+} while (0)
+
+void
+_nettle_sha512_compress(uint64_t *state, const uint8_t *input, const uint64_t *k)
+{
+  uint64_t data[SHA512_DATA_LENGTH];
+  uint64_t A, B, C, D, E, F, G, H;     /* Local vars */
+  unsigned i;
+  uint64_t *d;
+
+  for (i = 0; i < SHA512_DATA_LENGTH; i++, input += 8)
+    {
+      data[i] = READ_UINT64(input);
+    }
+
+  /* Set up first buffer and local data buffer */
+  A = state[0];
+  B = state[1];
+  C = state[2];
+  D = state[3];
+  E = state[4];
+  F = state[5];
+  G = state[6];
+  H = state[7];
+  
+  /* Heavy mangling */
+  /* First 16 subrounds that act on the original data */
+
+  for (i = 0, d = data; i<16; i+=8, k += 8, d+= 8)
+    {
+      ROUND(A, B, C, D, E, F, G, H, k[0], d[0]);
+      ROUND(H, A, B, C, D, E, F, G, k[1], d[1]);
+      ROUND(G, H, A, B, C, D, E, F, k[2], d[2]);
+      ROUND(F, G, H, A, B, C, D, E, k[3], d[3]);
+      ROUND(E, F, G, H, A, B, C, D, k[4], d[4]);
+      ROUND(D, E, F, G, H, A, B, C, k[5], d[5]);
+      ROUND(C, D, E, F, G, H, A, B, k[6], d[6]);
+      ROUND(B, C, D, E, F, G, H, A, k[7], d[7]);
+    }
+  
+  for (; i<80; i += 16, k+= 16)
+    {
+      ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data,  0));
+      ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data,  1));
+      ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data,  2));
+      ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data,  3));
+      ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data,  4));
+      ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data,  5));
+      ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data,  6));
+      ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data,  7));
+      ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data,  8));
+      ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data,  9));
+      ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10));
+      ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11));
+      ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12));
+      ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13));
+      ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14));
+      ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15));
+    }
+
+  /* Update state */
+  state[0] += A;
+  state[1] += B;
+  state[2] += C;
+  state[3] += D;
+  state[4] += E;
+  state[5] += F;
+  state[6] += G;
+  state[7] += H;
+}
diff --git a/sha512.c b/sha512.c
index aba7317eee91be7a74f215926902c1a902048d66..0a7b6585afd6eb255f6719a5abdf6963081c404f 100644
--- a/sha512.c
+++ b/sha512.c
@@ -39,25 +39,6 @@
 
 #include "macros.h"
 
-/* A block, treated as a sequence of 64-bit words. */
-#define SHA512_DATA_LENGTH 16
-
-#define ROTR(n,x) ((x)>>(n) | ((x)<<(64-(n))))
-#define SHR(n,x) ((x)>>(n))
-
-/* The SHA512 functions. The Choice function is the same as the SHA1
-   function f1, and the majority function is the same as the SHA1 f3
-   function, and the same as for SHA256. */
-
-#define Choice(x,y,z)   ( (z) ^ ( (x) & ( (y) ^ (z) ) ) ) 
-#define Majority(x,y,z) ( ((x) & (y)) ^ ((z) & ((x) ^ (y))) )
-
-#define S0(x) (ROTR(28,(x)) ^ ROTR(34,(x)) ^ ROTR(39,(x))) 
-#define S1(x) (ROTR(14,(x)) ^ ROTR(18,(x)) ^ ROTR(41,(x)))
-
-#define s0(x) (ROTR(1,(x)) ^ ROTR(8,(x)) ^ SHR(7,(x)))
-#define s1(x) (ROTR(19,(x)) ^ ROTR(61,(x)) ^ SHR(6,(x)))
-
 /* Generated by the gp script
 
      {
@@ -123,46 +104,6 @@ K[80] =
   0x5FCB6FAB3AD6FAECULL,0x6C44198C4A475817ULL,
 };
 
-/* The initial expanding function. The hash function is defined over
-   an 64-word expanded input array W, where the first 16 are copies of
-   the input data, and the remaining 64 are defined by
-
-        W[ t ] = s1(W[t-2]) + W[t-7] + s0(W[i-15]) + W[i-16]
-
-   This implementation generates these values on the fly in a circular
-   buffer.
-*/
-
-#define EXPAND(W,i) \
-( W[(i) & 15 ] += (s1(W[((i)-2) & 15]) + W[((i)-7) & 15] + s0(W[((i)-15) & 15])) )
-
-/* The prototype SHA sub-round.  The fundamental sub-round is:
-
-        T1 = h + S1(e) + Choice(e,f,g) + K[t] + W[t]
-	T2 = S0(a) + Majority(a,b,c)
-	a' = T1+T2
-	b' = a
-	c' = b
-	d' = c
-	e' = d + T1
-	f' = e
-	g' = f
-	h' = g
-
-   but this is implemented by unrolling the loop 8 times and renaming
-   the variables
-   ( h, a, b, c, d, e, f, g ) = ( a, b, c, d, e, f, g, h ) each
-   iteration. This code is then replicated 8, using the next 8 values
-   from the W[] array each time */
-
-/* It's crucial that DATA is only used once, as that argument will
- * have side effects. */
-#define ROUND(a,b,c,d,e,f,g,h,k,data) do {		\
-  uint64_t T = h + S1(e) + Choice(e,f,g) + k + data;	\
-  d += T;						\
-  h = T + S0(a) + Majority(a,b,c);			\
-} while (0)
-
 void
 sha512_init(struct sha512_ctx *ctx)
 {
@@ -192,90 +133,7 @@ sha512_init(struct sha512_ctx *ctx)
   ctx->index = 0;
 }
 
-/* Perform the SHA transformation. Note that this function destroys
-   the data area */
-
-static void
-sha512_transform(uint64_t *state, uint64_t *data)
-{
-  /* FIXME: XXX Just copied from sha256. */
-  uint64_t A, B, C, D, E, F, G, H;     /* Local vars */
-  unsigned i;
-  const uint64_t *k;
-  uint64_t *d;
-  
-  /* Set up first buffer and local data buffer */
-  A = state[0];
-  B = state[1];
-  C = state[2];
-  D = state[3];
-  E = state[4];
-  F = state[5];
-  G = state[6];
-  H = state[7];
-  
-  /* Heavy mangling */
-  /* First 16 subrounds that act on the original data */
-
-  for (i = 0, k = K, d = data; i<16; i+=8, k += 8, d+= 8)
-    {
-      ROUND(A, B, C, D, E, F, G, H, k[0], d[0]);
-      ROUND(H, A, B, C, D, E, F, G, k[1], d[1]);
-      ROUND(G, H, A, B, C, D, E, F, k[2], d[2]);
-      ROUND(F, G, H, A, B, C, D, E, k[3], d[3]);
-      ROUND(E, F, G, H, A, B, C, D, k[4], d[4]);
-      ROUND(D, E, F, G, H, A, B, C, k[5], d[5]);
-      ROUND(C, D, E, F, G, H, A, B, k[6], d[6]);
-      ROUND(B, C, D, E, F, G, H, A, k[7], d[7]);
-    }
-  
-  for (; i<80; i += 16, k+= 16)
-    {
-      ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data,  0));
-      ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data,  1));
-      ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data,  2));
-      ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data,  3));
-      ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data,  4));
-      ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data,  5));
-      ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data,  6));
-      ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data,  7));
-      ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data,  8));
-      ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data,  9));
-      ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10));
-      ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11));
-      ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12));
-      ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13));
-      ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14));
-      ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15));
-    }
-
-  /* Update state */
-  state[0] += A;
-  state[1] += B;
-  state[2] += C;
-  state[3] += D;
-  state[4] += E;
-  state[5] += F;
-  state[6] += G;
-  state[7] += H;
-}
-
-static void
-sha512_block(struct sha512_ctx *ctx, const uint8_t *block)
-{
-  uint64_t data[SHA512_DATA_LENGTH];
-  int i;
-
-  /* Update block count */
-  if (!++ctx->count_low)
-    ++ctx->count_high;
-
-  /* Endian independent conversion */
-  for (i = 0; i<SHA512_DATA_LENGTH; i++, block += 8)
-    data[i] = READ_UINT64(block);
-
-  sha512_transform(ctx->state, data);
-}
+#define SHA512_INCR(ctx) ((ctx)->count_high += !++(ctx)->count_low)
 
 void
 sha512_update(struct sha512_ctx *ctx,
@@ -293,14 +151,19 @@ sha512_update(struct sha512_ctx *ctx,
       else
 	{
 	  memcpy(ctx->block + ctx->index, buffer, left);
-	  sha512_block(ctx, ctx->block);
+
+	  _nettle_sha512_compress(ctx->state, ctx->block, K);
+	  SHA512_INCR(ctx);
+
 	  buffer += left;
 	  length -= left;
 	}
     }
   while (length >= SHA512_DATA_SIZE)
     {
-      sha512_block(ctx, buffer);
+      _nettle_sha512_compress(ctx->state, buffer, K);
+      SHA512_INCR(ctx);
+
       buffer += SHA512_DATA_SIZE;
       length -= SHA512_DATA_SIZE;
     }
@@ -316,9 +179,9 @@ sha512_update(struct sha512_ctx *ctx,
 static void
 sha512_final(struct sha512_ctx *ctx)
 {
-  uint64_t data[SHA512_DATA_LENGTH];
+  uint64_t bitcount_high;
+  uint64_t bitcount_low;
   int i;
-  int words;
 
   i = ctx->index;
   
@@ -328,32 +191,29 @@ sha512_final(struct sha512_ctx *ctx)
   assert(i < SHA512_DATA_SIZE);
   ctx->block[i++] = 0x80;
 
-  /* Fill rest of word */
-  for( ; i & 7; i++)
-    ctx->block[i] = 0;
-
-  /* i is now a multiple of the word size 8 */
-  words = i >> 3;
-  for (i = 0; i < words; i++)
-    data[i] = READ_UINT64(ctx->block + 8*i);
-  
-  if (words > (SHA512_DATA_LENGTH-2))
+  if (i > (SHA512_DATA_SIZE-16))
     { /* No room for length in this block. Process it and
        * pad with another one */
-      for (i = words ; i < SHA512_DATA_LENGTH; i++)
-	data[i] = 0;
-      sha512_transform(ctx->state, data);
-      for (i = 0; i < (SHA512_DATA_LENGTH-2); i++)
-	data[i] = 0;
+      memset(ctx->block + i, 0, SHA512_DATA_SIZE - i);
+      _nettle_sha512_compress(ctx->state, ctx->block, K);
+
+      i = 0;
     }
-  else
-    for (i = words ; i < SHA512_DATA_LENGTH - 2; i++)
-      data[i] = 0;
+
+  if (i < (SHA512_DATA_SIZE - 16))
+    memset(ctx->block + i, 0, (SHA512_DATA_SIZE - 16) - i);
 
   /* There are 1024 = 2^10 bits in one block */
-  data[SHA512_DATA_LENGTH-2] = (ctx->count_high << 10) | (ctx->count_low >> 54);
-  data[SHA512_DATA_LENGTH-1] = (ctx->count_low << 10) | (ctx->index << 3);
-  sha512_transform(ctx->state, data);
+  bitcount_high = (ctx->count_high << 10) | (ctx->count_low >> 54);
+  bitcount_low = (ctx->count_low << 10) | (ctx->index << 3);
+
+  /* This is slightly inefficient, as the numbers are converted to
+     big-endian format, and will be converted back by the compression
+     function. It's probably not worth the effort to fix this. */
+  WRITE_UINT64(ctx->block + (SHA512_DATA_SIZE - 16), bitcount_high);
+  WRITE_UINT64(ctx->block + (SHA512_DATA_SIZE - 8), bitcount_low);
+
+  _nettle_sha512_compress(ctx->state, ctx->block, K);
 }
 
 void