diff --git a/md5-compress.c b/md5-compress.c
index 9be4c1d2c9d82d864d0dcf21866de4dde3c629ec..ec949e7430f29fc0174fde95b6a4c146734b5a3a 100644
--- a/md5-compress.c
+++ b/md5-compress.c
@@ -32,6 +32,18 @@
 # include "config.h"
 #endif
 
+#ifndef MD5_DEBUG
+# define MD5_DEBUG 0
+#endif
+
+#if MD5_DEBUG
+# include <stdio.h>
+# define DEBUG(i) \
+  fprintf(stderr, "%2d: %8x %8x %8x %8x\n", i, a, b, c, d)
+#else
+# define DEBUG(i)
+#endif
+
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
@@ -74,8 +86,9 @@ _nettle_md5_compress(uint32_t *digest, const uint8_t *input)
   c = digest[2];
   d = digest[3];
 
-  ROUND(F1, a, b, c, d, data[ 0] + 0xd76aa478, 7);
-  ROUND(F1, d, a, b, c, data[ 1] + 0xe8c7b756, 12);
+  DEBUG(-1);
+  ROUND(F1, a, b, c, d, data[ 0] + 0xd76aa478, 7); DEBUG(0);
+  ROUND(F1, d, a, b, c, data[ 1] + 0xe8c7b756, 12); DEBUG(1);
   ROUND(F1, c, d, a, b, data[ 2] + 0x242070db, 17);
   ROUND(F1, b, c, d, a, data[ 3] + 0xc1bdceee, 22);
   ROUND(F1, a, b, c, d, data[ 4] + 0xf57c0faf, 7);
@@ -89,10 +102,10 @@ _nettle_md5_compress(uint32_t *digest, const uint8_t *input)
   ROUND(F1, a, b, c, d, data[12] + 0x6b901122, 7);
   ROUND(F1, d, a, b, c, data[13] + 0xfd987193, 12);
   ROUND(F1, c, d, a, b, data[14] + 0xa679438e, 17);
-  ROUND(F1, b, c, d, a, data[15] + 0x49b40821, 22);
+  ROUND(F1, b, c, d, a, data[15] + 0x49b40821, 22); DEBUG(15);
 
-  ROUND(F2, a, b, c, d, data[ 1] + 0xf61e2562, 5);
-  ROUND(F2, d, a, b, c, data[ 6] + 0xc040b340, 9);
+  ROUND(F2, a, b, c, d, data[ 1] + 0xf61e2562, 5); DEBUG(16);
+  ROUND(F2, d, a, b, c, data[ 6] + 0xc040b340, 9); DEBUG(17);
   ROUND(F2, c, d, a, b, data[11] + 0x265e5a51, 14);
   ROUND(F2, b, c, d, a, data[ 0] + 0xe9b6c7aa, 20);
   ROUND(F2, a, b, c, d, data[ 5] + 0xd62f105d, 5);
@@ -106,10 +119,10 @@ _nettle_md5_compress(uint32_t *digest, const uint8_t *input)
   ROUND(F2, a, b, c, d, data[13] + 0xa9e3e905, 5);
   ROUND(F2, d, a, b, c, data[ 2] + 0xfcefa3f8, 9);
   ROUND(F2, c, d, a, b, data[ 7] + 0x676f02d9, 14);
-  ROUND(F2, b, c, d, a, data[12] + 0x8d2a4c8a, 20);
+  ROUND(F2, b, c, d, a, data[12] + 0x8d2a4c8a, 20); DEBUG(31);
 
-  ROUND(F3, a, b, c, d, data[ 5] + 0xfffa3942, 4);
-  ROUND(F3, d, a, b, c, data[ 8] + 0x8771f681, 11);
+  ROUND(F3, a, b, c, d, data[ 5] + 0xfffa3942, 4); DEBUG(32);
+  ROUND(F3, d, a, b, c, data[ 8] + 0x8771f681, 11); DEBUG(33);
   ROUND(F3, c, d, a, b, data[11] + 0x6d9d6122, 16);
   ROUND(F3, b, c, d, a, data[14] + 0xfde5380c, 23);
   ROUND(F3, a, b, c, d, data[ 1] + 0xa4beea44, 4);
@@ -123,10 +136,10 @@ _nettle_md5_compress(uint32_t *digest, const uint8_t *input)
   ROUND(F3, a, b, c, d, data[ 9] + 0xd9d4d039, 4);
   ROUND(F3, d, a, b, c, data[12] + 0xe6db99e5, 11);
   ROUND(F3, c, d, a, b, data[15] + 0x1fa27cf8, 16);
-  ROUND(F3, b, c, d, a, data[ 2] + 0xc4ac5665, 23);
+  ROUND(F3, b, c, d, a, data[ 2] + 0xc4ac5665, 23); DEBUG(47);
 
-  ROUND(F4, a, b, c, d, data[ 0] + 0xf4292244, 6);
-  ROUND(F4, d, a, b, c, data[ 7] + 0x432aff97, 10);
+  ROUND(F4, a, b, c, d, data[ 0] + 0xf4292244, 6); DEBUG(48);
+  ROUND(F4, d, a, b, c, data[ 7] + 0x432aff97, 10); DEBUG(49);
   ROUND(F4, c, d, a, b, data[14] + 0xab9423a7, 15);
   ROUND(F4, b, c, d, a, data[ 5] + 0xfc93a039, 21);
   ROUND(F4, a, b, c, d, data[12] + 0x655b59c3, 6);
@@ -140,10 +153,15 @@ _nettle_md5_compress(uint32_t *digest, const uint8_t *input)
   ROUND(F4, a, b, c, d, data[ 4] + 0xf7537e82, 6);
   ROUND(F4, d, a, b, c, data[11] + 0xbd3af235, 10);
   ROUND(F4, c, d, a, b, data[ 2] + 0x2ad7d2bb, 15);
-  ROUND(F4, b, c, d, a, data[ 9] + 0xeb86d391, 21);
+  ROUND(F4, b, c, d, a, data[ 9] + 0xeb86d391, 21); DEBUG(63);
 
   digest[0] += a;
   digest[1] += b;
   digest[2] += c;
   digest[3] += d;
+#if MD5_DEBUG
+  fprintf(stderr, "99: %8x %8x %8x %8x\n",
+	  digest[0], digest[1], digest[2], digest[3]);
+#endif
+  
 }
diff --git a/sha1-compress.c b/sha1-compress.c
index b8ee02fe6a865499307200ebbd23373886367565..3fc0ff91f019e1071df7bba685cc07cef0600b21 100644
--- a/sha1-compress.c
+++ b/sha1-compress.c
@@ -39,6 +39,18 @@
 # include "config.h"
 #endif
 
+#ifndef SHA1_DEBUG
+# define SHA1_DEBUG 0
+#endif
+
+#if SHA1_DEBUG
+# include <stdio.h>
+# define DEBUG(i) \
+  fprintf(stderr, "%2d: %8x %8x %8x %8x %8x\n", i, A, B, C, D ,E)
+#else
+# define DEBUG(i)
+#endif
+
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
@@ -131,9 +143,10 @@ _nettle_sha1_compress(uint32_t *state, const uint8_t *input)
   D = state[3];
   E = state[4];
 
+  DEBUG(-1);
   /* Heavy mangling, in 4 sub-rounds of 20 interations each. */
-  subRound( A, B, C, D, E, f1, K1, data[ 0] );
-  subRound( E, A, B, C, D, f1, K1, data[ 1] );
+  subRound( A, B, C, D, E, f1, K1, data[ 0] ); DEBUG(0);
+  subRound( E, A, B, C, D, f1, K1, data[ 1] ); DEBUG(1);
   subRound( D, E, A, B, C, f1, K1, data[ 2] );
   subRound( C, D, E, A, B, f1, K1, data[ 3] );
   subRound( B, C, D, E, A, f1, K1, data[ 4] );
@@ -147,14 +160,14 @@ _nettle_sha1_compress(uint32_t *state, const uint8_t *input)
   subRound( D, E, A, B, C, f1, K1, data[12] );
   subRound( C, D, E, A, B, f1, K1, data[13] );
   subRound( B, C, D, E, A, f1, K1, data[14] );
-  subRound( A, B, C, D, E, f1, K1, data[15] );
-  subRound( E, A, B, C, D, f1, K1, expand( data, 16 ) );
-  subRound( D, E, A, B, C, f1, K1, expand( data, 17 ) );
-  subRound( C, D, E, A, B, f1, K1, expand( data, 18 ) );
-  subRound( B, C, D, E, A, f1, K1, expand( data, 19 ) );
-
-  subRound( A, B, C, D, E, f2, K2, expand( data, 20 ) );
-  subRound( E, A, B, C, D, f2, K2, expand( data, 21 ) );
+  subRound( A, B, C, D, E, f1, K1, data[15] ); DEBUG(15);
+  subRound( E, A, B, C, D, f1, K1, expand( data, 16 ) ); DEBUG(16);
+  subRound( D, E, A, B, C, f1, K1, expand( data, 17 ) ); DEBUG(17);
+  subRound( C, D, E, A, B, f1, K1, expand( data, 18 ) ); DEBUG(18);
+  subRound( B, C, D, E, A, f1, K1, expand( data, 19 ) ); DEBUG(19);
+
+  subRound( A, B, C, D, E, f2, K2, expand( data, 20 ) ); DEBUG(20);
+  subRound( E, A, B, C, D, f2, K2, expand( data, 21 ) ); DEBUG(21);
   subRound( D, E, A, B, C, f2, K2, expand( data, 22 ) );
   subRound( C, D, E, A, B, f2, K2, expand( data, 23 ) );
   subRound( B, C, D, E, A, f2, K2, expand( data, 24 ) );
@@ -171,11 +184,11 @@ _nettle_sha1_compress(uint32_t *state, const uint8_t *input)
   subRound( A, B, C, D, E, f2, K2, expand( data, 35 ) );
   subRound( E, A, B, C, D, f2, K2, expand( data, 36 ) );
   subRound( D, E, A, B, C, f2, K2, expand( data, 37 ) );
-  subRound( C, D, E, A, B, f2, K2, expand( data, 38 ) );
-  subRound( B, C, D, E, A, f2, K2, expand( data, 39 ) );
+  subRound( C, D, E, A, B, f2, K2, expand( data, 38 ) ); DEBUG(38);
+  subRound( B, C, D, E, A, f2, K2, expand( data, 39 ) ); DEBUG(39);
 
-  subRound( A, B, C, D, E, f3, K3, expand( data, 40 ) );
-  subRound( E, A, B, C, D, f3, K3, expand( data, 41 ) );
+  subRound( A, B, C, D, E, f3, K3, expand( data, 40 ) ); DEBUG(40);
+  subRound( E, A, B, C, D, f3, K3, expand( data, 41 ) ); DEBUG(41);
   subRound( D, E, A, B, C, f3, K3, expand( data, 42 ) );
   subRound( C, D, E, A, B, f3, K3, expand( data, 43 ) );
   subRound( B, C, D, E, A, f3, K3, expand( data, 44 ) );
@@ -192,11 +205,11 @@ _nettle_sha1_compress(uint32_t *state, const uint8_t *input)
   subRound( A, B, C, D, E, f3, K3, expand( data, 55 ) );
   subRound( E, A, B, C, D, f3, K3, expand( data, 56 ) );
   subRound( D, E, A, B, C, f3, K3, expand( data, 57 ) );
-  subRound( C, D, E, A, B, f3, K3, expand( data, 58 ) );
-  subRound( B, C, D, E, A, f3, K3, expand( data, 59 ) );
+  subRound( C, D, E, A, B, f3, K3, expand( data, 58 ) ); DEBUG(58);
+  subRound( B, C, D, E, A, f3, K3, expand( data, 59 ) ); DEBUG(59);
 
-  subRound( A, B, C, D, E, f4, K4, expand( data, 60 ) );
-  subRound( E, A, B, C, D, f4, K4, expand( data, 61 ) );
+  subRound( A, B, C, D, E, f4, K4, expand( data, 60 ) ); DEBUG(60);
+  subRound( E, A, B, C, D, f4, K4, expand( data, 61 ) ); DEBUG(61);
   subRound( D, E, A, B, C, f4, K4, expand( data, 62 ) );
   subRound( C, D, E, A, B, f4, K4, expand( data, 63 ) );
   subRound( B, C, D, E, A, f4, K4, expand( data, 64 ) );
@@ -213,8 +226,8 @@ _nettle_sha1_compress(uint32_t *state, const uint8_t *input)
   subRound( A, B, C, D, E, f4, K4, expand( data, 75 ) );
   subRound( E, A, B, C, D, f4, K4, expand( data, 76 ) );
   subRound( D, E, A, B, C, f4, K4, expand( data, 77 ) );
-  subRound( C, D, E, A, B, f4, K4, expand( data, 78 ) );
-  subRound( B, C, D, E, A, f4, K4, expand( data, 79 ) );
+  subRound( C, D, E, A, B, f4, K4, expand( data, 78 ) ); DEBUG(78);
+  subRound( B, C, D, E, A, f4, K4, expand( data, 79 ) ); DEBUG(79);
 
   /* Build message digest */
   state[0] += A;
@@ -222,4 +235,9 @@ _nettle_sha1_compress(uint32_t *state, const uint8_t *input)
   state[2] += C;
   state[3] += D;
   state[4] += E;
+
+#if SHA1_DEBUG
+  fprintf(stderr, "99: %8x %8x %8x %8x %8x\n",
+	  state[0], state[1], state[2], state[3], state[4]);
+#endif
 }