Commit 0325eaf3 authored by Niels Möller's avatar Niels Möller

CTR mode optimizations for 16-byte block size.

parent e09ed92e
......@@ -9,6 +9,13 @@
2018-01-08 Niels Möller <nisse@lysator.liu.se>
* ctr.c (ctr_crypt16): New function, with optimizations specific
to 16-byte block size.
(ctr_fill16): New helper function, definition depending on
WORDS_BIGENDIAN, and little endian version requiring
HAVE_BUILTIN_BSWAP64.
(ctr_crypt): Use ctr_crypt16, when appropriate.
* nettle-types.h (union nettle_block16): Add uint64_t field.
* configure.ac: Check for __builtin_bswap64, define
......
......@@ -62,36 +62,128 @@ ctr_fill (size_t block_size, uint8_t *ctr, size_t length, uint8_t *buffer)
return i;
}
#if WORDS_BIGENDIAN
# define USE_CTR_CRYPT16 1
static void
ctr_fill16(uint8_t *ctr, size_t blocks, uint64_t *buffer)
{
uint64_t hi, lo;
hi = READ_UINT64(ctr);
lo = READ_UINT64(ctr + 8);
while (blocks-- > 0)
{
*buffer++ = hi;
*buffer++ = lo;
hi += !(++lo);
}
WRITE_UINT64(ctr, hi);
WRITE_UINT64(ctr + 8, lo);
}
#else /* !WORDS_BIGENDIAN */
# if HAVE_BUILTIN_BSWAP64
# define USE_CTR_CRYPT16 1
static void
ctr_fill16(uint8_t *ctr, size_t blocks, uint64_t *buffer)
{
uint64_t hi, lo;
/* Read hi in native endianness */
hi = LE_READ_UINT64(ctr);
lo = READ_UINT64(ctr + 8);
while (blocks-- > 0)
{
*buffer++ = hi;
*buffer++ = __builtin_bswap64(lo);
if (!++lo)
hi = __builtin_bswap64(__builtin_bswap64(hi) + 1);
}
LE_WRITE_UINT64(ctr, hi);
WRITE_UINT64(ctr + 8, lo);
}
# else /* ! HAVE_BUILTIN_BSWAP64 */
# define USE_CTR_CRYPT16 0
# endif
#endif /* !WORDS_BIGENDIAN */
#if USE_CTR_CRYPT16
static size_t
ctr_crypt16(const void *ctx, nettle_cipher_func *f,
uint8_t *ctr,
size_t length, uint8_t *dst,
const uint8_t *src)
{
if (dst != src && !((uintptr_t) dst % sizeof(uint64_t)))
{
size_t blocks = length / 16u;
ctr_fill16 (ctr, blocks, (uint64_t *) dst);
f(ctx, blocks * 16, dst, dst);
memxor (dst, src, blocks * 16);
return blocks * 16;
}
else
{
/* Construct an aligned buffer of consecutive counter values, of
size at most CTR_BUFFER_LIMIT. */
TMP_DECL(buffer, union nettle_block16, CTR_BUFFER_LIMIT / 16);
size_t blocks = (length + 15) / 16u;
size_t i;
TMP_ALLOC(buffer, MIN(blocks, CTR_BUFFER_LIMIT / 16));
for (i = 0; blocks >= CTR_BUFFER_LIMIT / 16;
i += CTR_BUFFER_LIMIT, blocks -= CTR_BUFFER_LIMIT / 16)
{
ctr_fill16 (ctr, CTR_BUFFER_LIMIT / 16, buffer->u64);
f(ctx, CTR_BUFFER_LIMIT, buffer->b, buffer->b);
if (length - i < CTR_BUFFER_LIMIT)
goto done;
memxor3 (dst, src, buffer->b, CTR_BUFFER_LIMIT);
}
if (blocks > 0)
{
assert (length - i < CTR_BUFFER_LIMIT);
ctr_fill16 (ctr, blocks, buffer->u64);
f(ctx, blocks * 16, buffer->b, buffer->b);
done:
memxor3 (dst + i, src + i, buffer->b, length - i);
}
return length;
}
}
#endif /* USE_CTR_CRYPT16 */
void
ctr_crypt(const void *ctx, nettle_cipher_func *f,
size_t block_size, uint8_t *ctr,
size_t length, uint8_t *dst,
const uint8_t *src)
{
if (src != dst)
#if USE_CTR_CRYPT16
if (block_size == 16)
{
if (length == block_size)
{
f(ctx, block_size, dst, ctr);
INCREMENT(block_size, ctr);
memxor(dst, src, block_size);
}
else
{
size_t filled = ctr_fill (block_size, ctr, length, dst);
size_t done = ctr_crypt16(ctx, f, ctr, length, dst, src);
length -= done;
src += done;
dst += done;
}
#endif
f(ctx, filled, dst, dst);
memxor(dst, src, filled);
if(src != dst)
{
size_t filled = ctr_fill (block_size, ctr, length, dst);
f(ctx, filled, dst, dst);
memxor(dst, src, filled);
if (filled < length)
{
TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE);
TMP_ALLOC(buffer, block_size);
if (filled < length)
{
TMP_DECL(block, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE);
TMP_ALLOC(block, block_size);
f(ctx, block_size, buffer, ctr);
INCREMENT(block_size, ctr);
memxor3(dst + filled, src + filled, buffer, length - filled);
}
f(ctx, block_size, block, ctr);
INCREMENT(block_size, ctr);
memxor3(dst + filled, src + filled, block, length - filled);
}
}
else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment