Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Nettle
nettle
Commits
be1f8dc8
Commit
be1f8dc8
authored
Jan 14, 2018
by
Niels Möller
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'ctr-opt' into master-updates
parents
7560b387
0325eaf3
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
189 additions
and
67 deletions
+189
-67
ChangeLog
ChangeLog
+18
-0
configure.ac
configure.ac
+16
-0
ctr.c
ctr.c
+154
-67
nettle-types.h
nettle-types.h
+1
-0
No files found.
ChangeLog
View file @
be1f8dc8
...
...
@@ -9,6 +9,24 @@
2018-01-08 Niels Möller <nisse@lysator.liu.se>
* ctr.c (ctr_crypt16): New function, with optimizations specific
to 16-byte block size.
(ctr_fill16): New helper function, definition depending on
WORDS_BIGENDIAN, and little endian version requiring
HAVE_BUILTIN_BSWAP64.
(ctr_crypt): Use ctr_crypt16, when appropriate.
* nettle-types.h (union nettle_block16): Add uint64_t field.
* configure.ac: Check for __builtin_bswap64, define
HAVE_BUILTIN_BSWAP64 if available.
* ctr.c (ctr_fill): New function. Use in ctr_crypt.
* ctr.c (ctr_crypt): For in-place operation, increase max buffer
size from 4 blocks to 512 bytes, similarly to CBC and CFB.
Improves in-place aes128 CTR performance by 25% on x86_64.
* examples/nettle-benchmark.c (time_cipher): Benchmark in-place
operation separately, for cbc_decrypt and ctr_crypt.
...
...
configure.ac
View file @
be1f8dc8
...
...
@@ -203,6 +203,22 @@ LSH_FUNC_STRERROR
AC_CHECK_FUNCS(secure_getenv getline)
AC_C_BIGENDIAN
AC_CACHE_CHECK([for __builtin_bswap64],
nettle_cv_c_builtin_bswap64,
[AC_TRY_COMPILE([
#include <stdint.h>
],[
uint64_t x = 17;
uint64_t y = __builtin_bswap64(x);
],
nettle_cv_c_builtin_bswap64=yes,
nettle_cv_c_builtin_bswap64=no)])
AH_TEMPLATE([HAVE_BUILTIN_BSWAP64], [Define if __builtin_bswap64 is available])
if test "x$nettle_cv_c_builtin_bswap64" = "xyes" ; then
AC_DEFINE(HAVE_BUILTIN_BSWAP64)
fi
LSH_GCC_ATTRIBUTES
# According to Simon Josefsson, looking for uint32_t and friends in
...
...
ctr.c
View file @
be1f8dc8
...
...
@@ -45,7 +45,113 @@
#include "memxor.h"
#include "nettle-internal.h"
#define NBLOCKS 4
/* Don't allocate any more space than this on the stack */
#define CTR_BUFFER_LIMIT 512
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
static
size_t
ctr_fill
(
size_t
block_size
,
uint8_t
*
ctr
,
size_t
length
,
uint8_t
*
buffer
)
{
size_t
i
;
for
(
i
=
0
;
i
+
block_size
<=
length
;
i
+=
block_size
)
{
memcpy
(
buffer
+
i
,
ctr
,
block_size
);
INCREMENT
(
block_size
,
ctr
);
}
return
i
;
}
#if WORDS_BIGENDIAN
# define USE_CTR_CRYPT16 1
static
void
ctr_fill16
(
uint8_t
*
ctr
,
size_t
blocks
,
uint64_t
*
buffer
)
{
uint64_t
hi
,
lo
;
hi
=
READ_UINT64
(
ctr
);
lo
=
READ_UINT64
(
ctr
+
8
);
while
(
blocks
--
>
0
)
{
*
buffer
++
=
hi
;
*
buffer
++
=
lo
;
hi
+=
!
(
++
lo
);
}
WRITE_UINT64
(
ctr
,
hi
);
WRITE_UINT64
(
ctr
+
8
,
lo
);
}
#else
/* !WORDS_BIGENDIAN */
# if HAVE_BUILTIN_BSWAP64
# define USE_CTR_CRYPT16 1
static
void
ctr_fill16
(
uint8_t
*
ctr
,
size_t
blocks
,
uint64_t
*
buffer
)
{
uint64_t
hi
,
lo
;
/* Read hi in native endianness */
hi
=
LE_READ_UINT64
(
ctr
);
lo
=
READ_UINT64
(
ctr
+
8
);
while
(
blocks
--
>
0
)
{
*
buffer
++
=
hi
;
*
buffer
++
=
__builtin_bswap64
(
lo
);
if
(
!++
lo
)
hi
=
__builtin_bswap64
(
__builtin_bswap64
(
hi
)
+
1
);
}
LE_WRITE_UINT64
(
ctr
,
hi
);
WRITE_UINT64
(
ctr
+
8
,
lo
);
}
# else
/* ! HAVE_BUILTIN_BSWAP64 */
# define USE_CTR_CRYPT16 0
# endif
#endif
/* !WORDS_BIGENDIAN */
#if USE_CTR_CRYPT16
static
size_t
ctr_crypt16
(
const
void
*
ctx
,
nettle_cipher_func
*
f
,
uint8_t
*
ctr
,
size_t
length
,
uint8_t
*
dst
,
const
uint8_t
*
src
)
{
if
(
dst
!=
src
&&
!
((
uintptr_t
)
dst
%
sizeof
(
uint64_t
)))
{
size_t
blocks
=
length
/
16u
;
ctr_fill16
(
ctr
,
blocks
,
(
uint64_t
*
)
dst
);
f
(
ctx
,
blocks
*
16
,
dst
,
dst
);
memxor
(
dst
,
src
,
blocks
*
16
);
return
blocks
*
16
;
}
else
{
/* Construct an aligned buffer of consecutive counter values, of
size at most CTR_BUFFER_LIMIT. */
TMP_DECL
(
buffer
,
union
nettle_block16
,
CTR_BUFFER_LIMIT
/
16
);
size_t
blocks
=
(
length
+
15
)
/
16u
;
size_t
i
;
TMP_ALLOC
(
buffer
,
MIN
(
blocks
,
CTR_BUFFER_LIMIT
/
16
));
for
(
i
=
0
;
blocks
>=
CTR_BUFFER_LIMIT
/
16
;
i
+=
CTR_BUFFER_LIMIT
,
blocks
-=
CTR_BUFFER_LIMIT
/
16
)
{
ctr_fill16
(
ctr
,
CTR_BUFFER_LIMIT
/
16
,
buffer
->
u64
);
f
(
ctx
,
CTR_BUFFER_LIMIT
,
buffer
->
b
,
buffer
->
b
);
if
(
length
-
i
<
CTR_BUFFER_LIMIT
)
goto
done
;
memxor3
(
dst
,
src
,
buffer
->
b
,
CTR_BUFFER_LIMIT
);
}
if
(
blocks
>
0
)
{
assert
(
length
-
i
<
CTR_BUFFER_LIMIT
);
ctr_fill16
(
ctr
,
blocks
,
buffer
->
u64
);
f
(
ctx
,
blocks
*
16
,
buffer
->
b
,
buffer
->
b
);
done:
memxor3
(
dst
+
i
,
src
+
i
,
buffer
->
b
,
length
-
i
);
}
return
length
;
}
}
#endif
/* USE_CTR_CRYPT16 */
void
ctr_crypt
(
const
void
*
ctx
,
nettle_cipher_func
*
f
,
...
...
@@ -53,84 +159,65 @@ ctr_crypt(const void *ctx, nettle_cipher_func *f,
size_t
length
,
uint8_t
*
dst
,
const
uint8_t
*
src
)
{
if
(
src
!=
dst
)
#if USE_CTR_CRYPT16
if
(
block_size
==
16
)
{
size_t
done
=
ctr_crypt16
(
ctx
,
f
,
ctr
,
length
,
dst
,
src
);
length
-=
done
;
src
+=
done
;
dst
+=
done
;
}
#endif
if
(
src
!=
dst
)
{
if
(
length
==
block_size
)
size_t
filled
=
ctr_fill
(
block_size
,
ctr
,
length
,
dst
);
f
(
ctx
,
filled
,
dst
,
dst
);
memxor
(
dst
,
src
,
filled
);
if
(
filled
<
length
)
{
f
(
ctx
,
block_size
,
dst
,
ctr
);
TMP_DECL
(
block
,
uint8_t
,
NETTLE_MAX_CIPHER_BLOCK_SIZE
);
TMP_ALLOC
(
block
,
block_size
);
f
(
ctx
,
block_size
,
block
,
ctr
);
INCREMENT
(
block_size
,
ctr
);
memxor
(
dst
,
src
,
block_size
);
}
else
{
size_t
left
;
uint8_t
*
p
;
for
(
p
=
dst
,
left
=
length
;
left
>=
block_size
;
left
-=
block_size
,
p
+=
block_size
)
{
memcpy
(
p
,
ctr
,
block_size
);
INCREMENT
(
block_size
,
ctr
);
}
f
(
ctx
,
length
-
left
,
dst
,
dst
);
memxor
(
dst
,
src
,
length
-
left
);
if
(
left
)
{
TMP_DECL
(
buffer
,
uint8_t
,
NETTLE_MAX_CIPHER_BLOCK_SIZE
);
TMP_ALLOC
(
buffer
,
block_size
);
f
(
ctx
,
block_size
,
buffer
,
ctr
);
INCREMENT
(
block_size
,
ctr
);
memxor3
(
dst
+
length
-
left
,
src
+
length
-
left
,
buffer
,
left
);
}
memxor3
(
dst
+
filled
,
src
+
filled
,
block
,
length
-
filled
);
}
}
else
{
if
(
length
>
block_size
)
/* For in-place CTR, construct a buffer of consecutive counter
values, of size at most CTR_BUFFER_LIMIT. */
TMP_DECL
(
buffer
,
uint8_t
,
CTR_BUFFER_LIMIT
);
size_t
buffer_size
;
if
(
length
<
block_size
)
buffer_size
=
block_size
;
else
if
(
length
<=
CTR_BUFFER_LIMIT
)
buffer_size
=
length
;
else
buffer_size
=
CTR_BUFFER_LIMIT
;
TMP_ALLOC
(
buffer
,
buffer_size
);
while
(
length
>=
block_size
)
{
TMP_DECL
(
buffer
,
uint8_t
,
NBLOCKS
*
NETTLE_MAX_CIPHER_BLOCK_SIZE
);
size_t
chunk
=
NBLOCKS
*
block_size
;
TMP_ALLOC
(
buffer
,
chunk
);
for
(;
length
>=
chunk
;
length
-=
chunk
,
src
+=
chunk
,
dst
+=
chunk
)
{
unsigned
n
;
uint8_t
*
p
;
for
(
n
=
0
,
p
=
buffer
;
n
<
NBLOCKS
;
n
++
,
p
+=
block_size
)
{
memcpy
(
p
,
ctr
,
block_size
);
INCREMENT
(
block_size
,
ctr
);
}
f
(
ctx
,
chunk
,
buffer
,
buffer
);
memxor
(
dst
,
buffer
,
chunk
);
}
if
(
length
>
0
)
{
/* Final, possibly partial, blocks */
for
(
chunk
=
0
;
chunk
<
length
;
chunk
+=
block_size
)
{
memcpy
(
buffer
+
chunk
,
ctr
,
block_size
);
INCREMENT
(
block_size
,
ctr
);
}
f
(
ctx
,
chunk
,
buffer
,
buffer
);
memxor3
(
dst
,
src
,
buffer
,
length
);
}
size_t
filled
=
ctr_fill
(
block_size
,
ctr
,
MIN
(
buffer_size
,
length
),
buffer
);
assert
(
filled
>
0
);
f
(
ctx
,
filled
,
buffer
,
buffer
);
memxor
(
dst
,
buffer
,
filled
);
length
-=
filled
;
dst
+=
filled
;
}
else
if
(
length
>
0
)
{
TMP_DECL
(
buffer
,
uint8_t
,
NETTLE_MAX_CIPHER_BLOCK_SIZE
);
TMP_ALLOC
(
buffer
,
block_size
);
/* Final, possibly partial, block. */
if
(
length
>
0
)
{
f
(
ctx
,
block_size
,
buffer
,
ctr
);
INCREMENT
(
block_size
,
ctr
);
memxor
3
(
dst
,
src
,
buffer
,
length
);
memxor
(
dst
,
buffer
,
length
);
}
}
}
nettle-types.h
View file @
be1f8dc8
...
...
@@ -48,6 +48,7 @@ union nettle_block16
{
uint8_t
b
[
16
];
unsigned
long
w
[
16
/
sizeof
(
unsigned
long
)];
uint64_t
u64
[
2
];
};
/* Randomness. Used by key generation and dsa signature creation. */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment