Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Brian Smith
nettle
Commits
c187393c
Commit
c187393c
authored
Oct 16, 2005
by
Niels Möller
Browse files
Deleted old aes implementation.
Rev: src/nettle/sparc/aes.asm:1.127(DEAD)
parent
3456614a
Changes
1
Hide whitespace changes
Inline
Side-by-side
sparc/aes.asm
deleted
100644 → 0
View file @
3456614a
!
-*-
mode:
asm
; asm-comment-char: ?!; -*-
!
nettle
,
low
-
level
cryptographics
library
!
!
Copyright
(
C
)
2002
Niels
Mller
!
!
The
nettle
library
is
free
software
; you can redistribute it and/or modify
!
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
!
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
!
option
)
any
later
version.
!
!
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
!
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
!
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
!
License
for
more
details.
!
!
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
!
along
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
!
the
Free
Software
Foundation
,
Inc.
,
59
Temple
Place
-
Suite
330
,
Boston
,
!
MA
02111
-
1307
,
USA.
!
FIXME:
For
improved
ultra
sp
arc
performance
,
we
should
avoid
AL
U
!
instructions
that
use
the
result
of
an
immediately
preceeding
AL
U
!
instruction.
It
is
al
so
a
good
idea
to
have
a
greater
di
stance
than
!
one
instruction
between
a
load
and
use
of
its
value
,
as
that
reduces
!
the
penalty
for
cache
misses.
Such
instruction
sequences
are
marked
!
with
!
U
comments.
!
NOTE:
Some
of
the
%
g
registers
are
reserved
for
operating
system
etc
!
(
see
gcc
/
config
/
sp
arc.h
)
.
The
only
%
g
registers
that
seems
safe
to
!
use
are
%
g1
-%
g3.
C
FIXME
:
Use
separate
code
for
encryption
and
decryption
,
to
avoid
the
IDX
lookups.
C
Put
AES
state
in
registers.
If
possible
,
use
two
register
sets
and
unroll
the
loop
twice.
C
On
sp
arc64
,
investigate
if
we
can
do
two
bl
ocks
in
parallell
,
using
C
the
upper
and
lower
parts
of
the
registers
for
di
fferent
bl
ocks.
C
It
seems
hard
to
do
the
byte
indexing
in
parallel
though.
!
Used
registers
:
%
l0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
!
%
i0
,
1
,
2
,
3
,
4
(
%
i6
=
%
fp
,
%
i7
=
return
)
!
%
o0
,
1
,
2
,
3
,
4
(
%
o6
=
%
sp
)
!
.file
"aes.asm"
!
Arguments
define
(
ctx
,
%
i0
)
define
(
T
,
%
i1
)
define
(
length
,
%
i2
)
define
(
ds
t
,
%
i3
)
define
(
src
,
%
i4
)
!
Loop
invariants
define
(
wtxt
,
%
l0
)
define
(
tmp
,
%
l1
)
define
(
di
ff
,
%
l2
)
define
(
nrounds
,
%
l3
)
!
Further
loop
invariants
define
(
T0
,
%
l4
)
define
(
T1
,
%
l5
)
define
(
T2
,
%
l6
)
define
(
T3
,
%
l7
)
!
Teporaries
define
(
t0
,
%
o0
)
define
(
t1
,
%
o1
)
define
(
t2
,
%
o2
)
!
Loop
variables
define
(
round
,
%
o3
)
define
(
key
,
%
o4
)
C
IDX1
contains
the
permutation
values
*
4
+
2
define
(
IDX1
,
<
T
+
AES_SIDX1
>
)
C
IDX3
contains
the
permutation
values
*
4
define
(
IDX3
,
<
T
+
AES_SIDX3
>
)
C
AES_LOAD
(
i
)
C
Get
one
word
of
input
,
XOR
with
first
subkey
,
store
in
wtxt
define
(
<
AES_LOAD
>
,
<
ldub
[
src
+
$
1
],
t0
ldub
[
src
+
$
1
+
1
],
t1
ldub
[
src
+
$
1
+
2
],
t2
sll
t1
,
8
,
t1
or
t0
,
t1
,
t0
!
U
ldub
[
src
+
$
1
+
3
],
t1
sll
t2
,
16
,
t2
or
t0
,
t2
,
t0
sll
t1
,
24
,
t1
!
Get
subkey
ld
[
ctx
+
$
1
],
t2
or
t0
,
t1
,
t0
xor
t0
,
t2
,
t0
st
t0
,
[
wtxt
+
$
1
]
>
)
dnl
C
AES_ROUND
(
i
)
C
Compute
one
word
in
the
round
function.
C
Input
in
wtxt
,
output
stored
in
tmp
+
i.
C
C
The
comments
mark
which
j
in
T
->
table
[
j
][
Bj
(
wtxt
[
IDXi
(
i
)])
]
C
the
instruction
is
a
part
of.
define
(
<
AES_ROUND
>
,
<
ld
[
IDX1
+
$
1
],
t1
!
1
ldub
[
wtxt
+
$
1
+
3
],
t0
!
0
ldub
[
wtxt
+
t1
],
t1
!
1
sll
t0
,
2
,
t0
!
0
ld
[
T0
+
t0
],
t0
!
0
sll
t1
,
2
,
t1
!
1
ld
[
T1
+
t1
],
t1
!
1
!
U
ld
[
IDX3
+
$
1
],
t2
!
3
xor
t0
,
t1
,
t0
!
0
,
1
!
IDX2
(
j
)
=
j
XOR
2
ldub
[
wtxt
+
eval
(
$
1
^
8
)
+
1
],
t1
!
2
ldub
[
wtxt
+
t2
],
t2
!
3
sll
t1
,
2
,
t1
!
2
ld
[
T2
+
t1
],
t1
!
2
!
U
sll
t2
,
2
,
t2
!
3
ld
[
T3
+
t2
],
t2
!
3
!
U
xor
t0
,
t1
,
t0
!
0
,
1
,
2
!
Fetch
roundkey
ld
[
key
+
$
1
],
t1
xor
t0
,
t2
,
t0
!
0
,
1
,
2
,
3
xor
t0
,
t1
,
t0
!
U
st
t0
,
[
tmp
+
$
1
]
>
)
dnl
C
AES_FINAL_ROUND
(
i
)
C
Compute
one
word
in
the
final
round
function.
C
Input
in
wtxt
,
output
converted
to
an
octet
string
and
stored
at
ds
t.
C
C
The
comments
mark
which
j
in
T
->
table
[
j
][
Bj
(
wtxt
[
IDXi
(
i
)])
]
C
the
instruction
is
a
part
of.
define
(
<
AES_FINAL_ROUND
>
,
<
ld
[
IDX1
+
$
1
],
t1
!
1
ldub
[
wtxt
+
$
1
+
3
],
t0
!
0
ldub
[
wtxt
+
t1
],
t1
!
1
ldub
[
T
+
t0
],
t0
!
0
ldub
[
T
+
t1
],
t1
!
1
ld
[
IDX3
+
$
1
],
t2
!
3
sll
t1
,
8
,
t1
!
1
or
t0
,
t1
,
t0
!
0
,
1
!
U
!
IDX2
(
j
)
=
j
XOR
2
ldub
[
wtxt
+
eval
(
$
1
^
8
)
+
1
],
t1
!
2
ldub
[
wtxt
+
t2
],
t2
!
3
ldub
[
T
+
t1
],
t1
!
2
ldub
[
T
+
t2
],
t2
!
3
sll
t1
,
16
,
t1
!
2
or
t0
,
t1
,
t0
!
0
,
1
,
2
!
U
sll
t2
,
24
,
t2
!
3
ld
[
key
+
$
1
],
t1
or
t0
,
t2
,
t0
!
0
,
1
,
2
,
3
xor
t0
,
t1
,
t0
!
U
srl
t0
,
24
,
t1
!
U
stb
t1
,
[
ds
t
+
$
1
+
3
]
!
U
srl
t0
,
16
,
t1
stb
t1
,
[
ds
t
+
$
1
+
2
]
!
U
srl
t0
,
8
,
t1
stb
t1
,
[
ds
t
+
$
1
+
1
]
!
U
stb
t0
,
[
ds
t
+
$
1
]
>
)
dnl
C
The
stack
frame
looks
like
C
C
%
fp
-
4
:
OS
-
dependent
link
field
C
%
fp
-
8
:
OS
-
dependent
link
field
C
%
fp
-
24
:
tmp
,
uint32_t
[
4
]
C
%
fp
-
40
:
wtxt
,
uint32_t
[
4
]
C
%
fp
-
136
:
OS
register
save
area.
define
(
<
FRAME_SIZE
>
,
136
)
.section
".text"
.align
16
.global
_nettle_aes_crypt
.type
_nettle_aes_crypt
,
#
function
.proc
020
_nettle_aes_crypt:
save
%
sp
,
-
FRAME_SIZE
,
%
sp
cmp
length
,
0
be
.Lend
!
wtxt
add
%
fp
,
-
24
,
wtxt
add
%
fp
,
-
40
,
tmp
ld
[
ctx
+
AES_NROUNDS
],
nrounds
!
Compute
xor
,
so
that
we
can
swap
efficiently.
xor
wtxt
,
tmp
,
di
ff
!
The
loop
variable
will
be
multiplied
by
16
.
!
More
loop
invariants
add
T
,
AES_TABLE0
,
T0
add
T
,
AES_TABLE1
,
T1
add
T
,
AES_TABLE2
,
T2
add
T
,
AES_TABLE3
,
T3
nop
.Lblock_loop:
C
Read
src
,
and
add
initial
subkey
AES_LOAD
(
0
)
!
i
=
0
AES_LOAD
(
4
)
!
i
=
1
AES_LOAD
(
8
)
!
i
=
2
AES_LOAD
(
12
)
!
i
=
3
add
src
,
16
,
src
sub
nrounds
,
1
,
round
add
ctx
,
16
,
key
nop
.Lround_loop:
AES_ROUND
(
0
)
!
i
=
0
AES_ROUND
(
4
)
!
i
=
1
AES_ROUND
(
8
)
!
i
=
2
AES_ROUND
(
12
)
!
i
=
3
!
switch
roles
for
tmp
and
wtxt
xor
wtxt
,
di
ff
,
wtxt
xor
tmp
,
di
ff
,
tmp
subcc
round
,
1
,
round
bne
.Lround_loop
add
key
,
16
,
key
C
Final
round
,
and
storage
of
the
output
AES_FINAL_ROUND
(
0
)
!
i
=
0
AES_FINAL_ROUND
(
4
)
!
i
=
1
AES_FINAL_ROUND
(
8
)
!
i
=
2
AES_FINAL_ROUND
(
12
)
!
i
=
3
addcc
length
,
-
16
,
length
bne
.Lblock_loop
add
ds
t
,
16
,
ds
t
.Lend:
ret
rest
ore
.Leord:
.size
_nettle_aes_crypt
,
.Leord
-
_nettle_aes_crypt
!
Benchmarks
on
my
slow
sp
arcstation
:
!
Original
C
code
!
aes128
(
ECB
encrypt
):
14.36
s
,
0.696
MB
/
s
!
aes128
(
ECB
decrypt
):
17.19
s
,
0.582
MB
/
s
!
aes128
(
CBC
encrypt
):
16.08
s
,
0.622
MB
/
s
!
aes128
((
CBC
decrypt
)):
18.79
s
,
0.532
MB
/
s
!
!
aes192
(
ECB
encrypt
):
16.85
s
,
0.593
MB
/
s
!
aes192
(
ECB
decrypt
):
19.64
s
,
0.509
MB
/
s
!
aes192
(
CBC
encrypt
):
18.43
s
,
0.543
MB
/
s
!
aes192
(
CBC
decrypt
):
20.76
s
,
0.482
MB
/
s
!
!
aes256
(
ECB
encrypt
):
19.12
s
,
0.523
MB
/
s
!
aes256
(
ECB
decrypt
):
22.57
s
,
0.443
MB
/
s
!
aes256
(
CBC
encrypt
):
20.92
s
,
0.478
MB
/
s
!
aes256
(
CBC
decrypt
):
23.22
s
,
0.431
MB
/
s
!
After
unrolling
key_addition32
,
and
getting
rid
of
!
some
sll
x
,
2
,
x
,
encryption
sp
eed
is
0.760
MB
/
s.
!
Next
,
the
C
code
was
optimized
to
use
larger
tables
and
!
no
rotates.
New
timings
:
!
aes128
(
ECB
encrypt
):
13.10
s
,
0.763
MB
/
s
!
aes128
(
ECB
decrypt
):
11.51
s
,
0.869
MB
/
s
!
aes128
(
CBC
encrypt
):
15.15
s
,
0.660
MB
/
s
!
aes128
(
CBC
decrypt
):
13.10
s
,
0.763
MB
/
s
!
!
aes192
(
ECB
encrypt
):
15.68
s
,
0.638
MB
/
s
!
aes192
(
ECB
decrypt
):
13.59
s
,
0.736
MB
/
s
!
aes192
(
CBC
encrypt
):
17.65
s
,
0.567
MB
/
s
!
aes192
(
CBC
decrypt
):
15.31
s
,
0.653
MB
/
s
!
!
aes256
(
ECB
encrypt
):
17.95
s
,
0.557
MB
/
s
!
aes256
(
ECB
decrypt
):
15.90
s
,
0.629
MB
/
s
!
aes256
(
CBC
encrypt
):
20.16
s
,
0.496
MB
/
s
!
aes256
(
CBC
decrypt
):
17.47
s
,
0.572
MB
/
s
!
After
optimization
using
pre
-
shifted
indices
!
(
AES_SIDX
[
1
-
3
]):
!
aes128
(
ECB
encrypt
):
12.46
s
,
0.803
MB
/
s
!
aes128
(
ECB
decrypt
):
10.74
s
,
0.931
MB
/
s
!
aes128
(
CBC
encrypt
):
17.74
s
,
0.564
MB
/
s
!
aes128
(
CBC
decrypt
):
12.43
s
,
0.805
MB
/
s
!
!
aes192
(
ECB
encrypt
):
14.59
s
,
0.685
MB
/
s
!
aes192
(
ECB
decrypt
):
12.76
s
,
0.784
MB
/
s
!
aes192
(
CBC
encrypt
):
19.97
s
,
0.501
MB
/
s
!
aes192
(
CBC
decrypt
):
14.46
s
,
0.692
MB
/
s
!
!
aes256
(
ECB
encrypt
):
17.00
s
,
0.588
MB
/
s
!
aes256
(
ECB
decrypt
):
14.81
s
,
0.675
MB
/
s
!
aes256
(
CBC
encrypt
):
22.65
s
,
0.442
MB
/
s
!
aes256
(
CBC
decrypt
):
16.46
s
,
0.608
MB
/
s
!
After
implementing
double
buffering
!
aes128
(
ECB
encrypt
):
12.59
s
,
0.794
MB
/
s
!
aes128
(
ECB
decrypt
):
10.56
s
,
0.947
MB
/
s
!
aes128
(
CBC
encrypt
):
17.91
s
,
0.558
MB
/
s
!
aes128
(
CBC
decrypt
):
12.30
s
,
0.813
MB
/
s
!
!
aes192
(
ECB
encrypt
):
15.03
s
,
0.665
MB
/
s
!
aes192
(
ECB
decrypt
):
12.56
s
,
0.796
MB
/
s
!
aes192
(
CBC
encrypt
):
20.30
s
,
0.493
MB
/
s
!
aes192
(
CBC
decrypt
):
14.26
s
,
0.701
MB
/
s
!
!
aes256
(
ECB
encrypt
):
17.30
s
,
0.578
MB
/
s
!
aes256
(
ECB
decrypt
):
14.51
s
,
0.689
MB
/
s
!
aes256
(
CBC
encrypt
):
22.75
s
,
0.440
MB
/
s
!
aes256
(
CBC
decrypt
):
16.35
s
,
0.612
MB
/
s
!
After
reordering
aes
-
encrypt.c
and
aes
-
decypt.c
!
(
the
order
probably
causes
strange
cache
-
effects
):
!
aes128
(
ECB
encrypt
):
9.21
s
,
1.086
MB
/
s
!
aes128
(
ECB
decrypt
):
11.13
s
,
0.898
MB
/
s
!
aes128
(
CBC
encrypt
):
14.12
s
,
0.708
MB
/
s
!
aes128
(
CBC
decrypt
):
13.77
s
,
0.726
MB
/
s
!
!
aes192
(
ECB
encrypt
):
10.86
s
,
0.921
MB
/
s
!
aes192
(
ECB
decrypt
):
13.17
s
,
0.759
MB
/
s
!
aes192
(
CBC
encrypt
):
15.74
s
,
0.635
MB
/
s
!
aes192
(
CBC
decrypt
):
15.91
s
,
0.629
MB
/
s
!
!
aes256
(
ECB
encrypt
):
12.71
s
,
0.787
MB
/
s
!
aes256
(
ECB
decrypt
):
15.38
s
,
0.650
MB
/
s
!
aes256
(
CBC
encrypt
):
17.49
s
,
0.572
MB
/
s
!
aes256
(
CBC
decrypt
):
17.87
s
,
0.560
MB
/
s
!
After
further
optimizations
of
the
initial
and
final
loops
,
!
source_loop
and
final_loop.
!
aes128
(
ECB
encrypt
):
8.07
s
,
1.239
MB
/
s
!
aes128
(
ECB
decrypt
):
9.48
s
,
1.055
MB
/
s
!
aes128
(
CBC
encrypt
):
12.76
s
,
0.784
MB
/
s
!
aes128
(
CBC
decrypt
):
12.15
s
,
0.823
MB
/
s
!
!
aes192
(
ECB
encrypt
):
9.43
s
,
1.060
MB
/
s
!
aes192
(
ECB
decrypt
):
11.20
s
,
0.893
MB
/
s
!
aes192
(
CBC
encrypt
):
14.19
s
,
0.705
MB
/
s
!
aes192
(
CBC
decrypt
):
13.97
s
,
0.716
MB
/
s
!
!
aes256
(
ECB
encrypt
):
10.81
s
,
0.925
MB
/
s
!
aes256
(
ECB
decrypt
):
12.92
s
,
0.774
MB
/
s
!
aes256
(
CBC
encrypt
):
15.59
s
,
0.641
MB
/
s
!
aes256
(
CBC
decrypt
):
15.76
s
,
0.635
MB
/
s
!
After
unrolling
loops
,
and
other
optimizations
suggested
by
!
Marcus:
!
aes128
(
ECB
encrypt
):
6.40
s
,
1.562
MB
/
s
!
aes128
(
ECB
decrypt
):
8.17
s
,
1.224
MB
/
s
!
aes128
(
CBC
encrypt
):
13.11
s
,
0.763
MB
/
s
!
aes128
(
CBC
decrypt
):
10.05
s
,
0.995
MB
/
s
!
!
aes192
(
ECB
encrypt
):
7.43
s
,
1.346
MB
/
s
!
aes192
(
ECB
decrypt
):
9.51
s
,
1.052
MB
/
s
!
aes192
(
CBC
encrypt
):
14.09
s
,
0.710
MB
/
s
!
aes192
(
CBC
decrypt
):
11.58
s
,
0.864
MB
/
s
!
!
aes256
(
ECB
encrypt
):
8.57
s
,
1.167
MB
/
s
!
aes256
(
ECB
decrypt
):
11.13
s
,
0.898
MB
/
s
!
aes256
(
CBC
encrypt
):
15.30
s
,
0.654
MB
/
s
!
aes256
(
CBC
decrypt
):
12.93
s
,
0.773
MB
/
s
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment