Skip to content
Snippets Groups Projects
Commit 1bc61893 authored by Niels Möller's avatar Niels Möller
Browse files

(ROL32): Renamed macro (was "rol"). Deleted

x86 version using inline assembly; at least gcc-4.4.5 recognizes
shift-and-or expressions which are in fact rotations.
(_nettle_ripemd160_compress): Use LE_READ_UINT32.

Rev: nettle/ripemd160-compress.c:1.2
parent 4b10881d
No related branches found
No related tags found
No related merge requests found
...@@ -21,21 +21,14 @@ ...@@ -21,21 +21,14 @@
#include "ripemd160.h" #include "ripemd160.h"
#include "macros.h"
/**************** /****************
* Rotate the 32 bit unsigned integer X by N bits left/right * Rotate the 32 bit unsigned integer X by N bits left
*/ */
#if defined(__GNUC__) && defined(__i386__)
static inline uint32_t #define ROL32(x,n) ( ((x) << (n)) | ((x) >> (32-(n))) )
rol(uint32_t x, int n)
{
__asm__("roll %%cl,%0"
:"=r" (x)
:"0" (x),"c" (n));
return x;
}
#else
#define rol(x,n) ( ((x) << (n)) | ((x) >> (32-(n))) )
#endif
/**************** /****************
* Transform the message X which consists of 16 32-bit-words * Transform the message X which consists of 16 32-bit-words
...@@ -45,29 +38,18 @@ _nettle_ripemd160_compress(uint32_t *state, const uint8_t *data) ...@@ -45,29 +38,18 @@ _nettle_ripemd160_compress(uint32_t *state, const uint8_t *data)
{ {
register uint32_t a,b,c,d,e; register uint32_t a,b,c,d,e;
uint32_t aa,bb,cc,dd,ee,t; uint32_t aa,bb,cc,dd,ee,t;
#ifdef WORDS_BIGENDIAN
uint32_t x[16]; uint32_t x[16];
#ifdef WORDS_BIGENDIAN
{ {
int i; int i;
uint8_t *p2, *p1; for (i=0; i < 16; i++, data += 4 )
for (i=0, p1=data, p2=(uint8_t*)x; i < 16; i++, p2 += 4 ) x[i] = LE_READ_UINT32(data);
{
p2[3] = *p1++;
p2[2] = *p1++;
p2[1] = *p1++;
p2[0] = *p1++;
}
} }
#else #else
/* This version is better because it is always aligned; /* memcpy seems a bit faster. Benchmarked on Intel SU4100, it makes
* The performance penalty on a 586-100 is about 6% which the entire update function roughly 6% faster. */
* is acceptable - because the data is more local it might memcpy(x, data, sizeof(x));
* also be possible that this is faster on some machines.
* This function (when compiled with -02 on gcc 2.7.2)
* executes on a 586-100 (39.73 bogomips) at about 1900kb/sec;
* [measured with a 4MB data and "gpgm --print-md rmd160"] */
uint32_t x[16];
memcpy(x, data, 64);
#endif #endif
...@@ -87,8 +69,8 @@ _nettle_ripemd160_compress(uint32_t *state, const uint8_t *data) ...@@ -87,8 +69,8 @@ _nettle_ripemd160_compress(uint32_t *state, const uint8_t *data)
#define F3(x,y,z) ( ((x) & (z)) | ((y) & ~(z)) ) #define F3(x,y,z) ( ((x) & (z)) | ((y) & ~(z)) )
#define F4(x,y,z) ( (x) ^ ((y) | ~(z)) ) #define F4(x,y,z) ( (x) ^ ((y) | ~(z)) )
#define R(a,b,c,d,e,f,k,r,s) do { t = a + f(b,c,d) + k + x[r]; \ #define R(a,b,c,d,e,f,k,r,s) do { t = a + f(b,c,d) + k + x[r]; \
a = rol(t,s) + e; \ a = ROL32(t,s) + e; \
c = rol(c,10); \ c = ROL32(c,10); \
} while(0) } while(0)
/* left lane */ /* left lane */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment