Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
N
nettle
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Norbert Pócs
nettle
Commits
91e59caa
Commit
91e59caa
authored
19 years ago
by
Niels Möller
Browse files
Options
Downloads
Patches
Plain Diff
Deleted unused file.
Rev: src/nettle/sparc64/aes.asm:1.2(DEAD)
parent
79eb7d8d
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
sparc64/aes.asm
+0
-369
0 additions, 369 deletions
sparc64/aes.asm
with
0 additions
and
369 deletions
sparc64/aes.asm
deleted
100644 → 0
+
0
−
369
View file @
79eb7d8d
!
-*-
mode:
asm
; asm-comment-char: ?!; -*-
!
nettle
,
low
-
level
cryptographics
library
!
!
Copyright
(
C
)
2002
Niels
Mller
!
!
The
nettle
library
is
free
software
; you can redistribute it and/or modify
!
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
!
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
!
option
)
any
later
version.
!
!
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
!
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
!
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
!
License
for
more
details.
!
!
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
!
along
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
!
the
Free
Software
Foundation
,
Inc.
,
59
Temple
Place
-
Suite
330
,
Boston
,
!
MA
02111
-
1307
,
USA.
!
The
only
di
fference
between
this
code
and
the
sp
arc32
code
is
the
!
frame
offsets
,
and
the
magic
BIAS
when
accessing
the
stack.
!
FIXME:
For
improved
ultra
sp
arc
performance
,
we
should
avoid
AL
U
!
instructions
that
use
the
result
of
an
immediately
preceeding
AL
U
!
instruction.
It
is
al
so
a
good
idea
to
have
a
greater
di
stance
than
!
one
instruction
between
a
load
and
use
of
its
value
,
as
that
reduces
!
the
penalty
for
cache
misses.
Such
instruction
sequences
are
marked
!
with
!
U
comments.
!
NOTE:
Some
of
the
%
g
registers
are
reserved
for
operating
system
etc
!
(
see
gcc
/
config
/
sp
arc.h
)
.
The
only
%
g
registers
that
seems
safe
to
!
use
are
%
g1
-%
g3.
!
Used
registers
:
%
l0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
!
%
i0
,
1
,
2
,
3
,
4
(
%
i6
=
%
fp
,
%
i7
=
return
)
!
%
o0
,
1
,
2
,
3
,
4
(
%
o6
=
%
sp
)
!
.file
"aes.asm"
!
Arguments
define
(
ctx
,
%
i0
)
define
(
T
,
%
i1
)
define
(
length
,
%
i2
)
define
(
ds
t
,
%
i3
)
define
(
src
,
%
i4
)
!
Loop
invariants
define
(
wtxt
,
%
l0
)
define
(
tmp
,
%
l1
)
define
(
di
ff
,
%
l2
)
define
(
nrounds
,
%
l3
)
!
Further
loop
invariants
define
(
T0
,
%
l4
)
define
(
T1
,
%
l5
)
define
(
T2
,
%
l6
)
define
(
T3
,
%
l7
)
!
Teporaries
define
(
t0
,
%
o0
)
define
(
t1
,
%
o1
)
define
(
t2
,
%
o2
)
!
Loop
variables
define
(
round
,
%
o3
)
define
(
key
,
%
o4
)
C
IDX1
cointains
the
permutation
values
*
4
+
2
define
(
IDX1
,
<
T
+
AES_SIDX1
>
)
C
IDX3
cointains
the
permutation
values
*
4
define
(
IDX3
,
<
T
+
AES_SIDX3
>
)
C
AES_LOAD
(
i
)
C
Get
one
word
of
input
,
XOR
with
first
subkey
,
store
in
wtxt
define
(
<
AES_LOAD
>
,
<
ldub
[
src
+
$
1
],
t0
ldub
[
src
+
$
1
+
1
],
t1
ldub
[
src
+
$
1
+
2
],
t2
sll
t1
,
8
,
t1
or
t0
,
t1
,
t0
!
U
ldub
[
src
+
$
1
+
3
],
t1
sll
t2
,
16
,
t2
or
t0
,
t2
,
t0
sll
t1
,
24
,
t1
!
Get
subkey
ld
[
ctx
+
$
1
],
t2
or
t0
,
t1
,
t0
xor
t0
,
t2
,
t0
st
t0
,
[
wtxt
+
$
1
]
>
)
dnl
C
AES_ROUND
(
i
)
C
Compute
one
word
in
the
round
function.
C
Input
in
wtxt
,
output
stored
in
tmp
+
i.
C
C
The
comments
mark
which
j
in
T
->
table
[
j
][
Bj
(
wtxt
[
IDXi
(
i
)])
]
C
the
instruction
is
a
part
of.
define
(
<
AES_ROUND
>
,
<
ld
[
IDX1
+
$
1
],
t1
!
1
ldub
[
wtxt
+
$
1
+
3
],
t0
!
0
ldub
[
wtxt
+
t1
],
t1
!
1
sll
t0
,
2
,
t0
!
0
ld
[
T0
+
t0
],
t0
!
0
sll
t1
,
2
,
t1
!
1
ld
[
T1
+
t1
],
t1
!
1
!
U
ld
[
IDX3
+
$
1
],
t2
!
3
xor
t0
,
t1
,
t0
!
0
,
1
!
IDX2
(
j
)
=
j
XOR
2
ldub
[
wtxt
+
eval
(
$
1
^
8
)
+
1
],
t1
!
2
ldub
[
wtxt
+
t2
],
t2
!
3
sll
t1
,
2
,
t1
!
2
ld
[
T2
+
t1
],
t1
!
2
!
U
sll
t2
,
2
,
t2
!
3
ld
[
T3
+
t2
],
t2
!
3
!
U
xor
t0
,
t1
,
t0
!
0
,
1
,
2
!
Fetch
roundkey
ld
[
key
+
$
1
],
t1
xor
t0
,
t2
,
t0
!
0
,
1
,
2
,
3
xor
t0
,
t1
,
t0
!
U
st
t0
,
[
tmp
+
$
1
]
>
)
dnl
C
AES_FINAL_ROUND
(
i
)
C
Compute
one
word
in
the
final
round
function.
C
Input
in
wtxt
,
output
converted
to
an
octet
string
and
stored
at
ds
t.
C
C
The
comments
mark
which
j
in
T
->
table
[
j
][
Bj
(
wtxt
[
IDXi
(
i
)])
]
C
the
instruction
is
a
part
of.
define
(
<
AES_FINAL_ROUND
>
,
<
ld
[
IDX1
+
$
1
],
t1
!
1
ldub
[
wtxt
+
$
1
+
3
],
t0
!
0
ldub
[
wtxt
+
t1
],
t1
!
1
ldub
[
T
+
t0
],
t0
!
0
ldub
[
T
+
t1
],
t1
!
1
ld
[
IDX3
+
$
1
],
t2
!
3
sll
t1
,
8
,
t1
!
1
or
t0
,
t1
,
t0
!
0
,
1
!
U
!
IDX2
(
j
)
=
j
XOR
2
ldub
[
wtxt
+
eval
(
$
1
^
8
)
+
1
],
t1
!
2
ldub
[
wtxt
+
t2
],
t2
!
3
ldub
[
T
+
t1
],
t1
!
2
ldub
[
T
+
t2
],
t2
!
3
sll
t1
,
16
,
t1
!
2
or
t0
,
t1
,
t0
!
0
,
1
,
2
!
U
sll
t2
,
24
,
t2
!
3
ld
[
key
+
$
1
],
t1
or
t0
,
t2
,
t0
!
0
,
1
,
2
,
3
xor
t0
,
t1
,
t0
!
U
srl
t0
,
24
,
t1
!
U
stb
t1
,
[
ds
t
+
$
1
+
3
]
!
U
srl
t0
,
16
,
t1
stb
t1
,
[
ds
t
+
$
1
+
2
]
!
U
srl
t0
,
8
,
t1
stb
t1
,
[
ds
t
+
$
1
+
1
]
!
U
stb
t0
,
[
ds
t
+
$
1
]
>
)
dnl
C
The
stack
frame
looks
like
C
C
%
fp
-
8
:
OS
-
dependent
link
field
C
%
fp
-
16
:
OS
-
dependent
link
field
C
%
fp
-
32
:
tmp
,
uint32_t
[
4
]
C
%
fp
-
48
:
wtxt
,
uint32_t
[
4
]
C
%
fp
-
224
:
OS
register
save
area.
(
22
*
8
==
176
byte
s
)
define
(
<
FRAME_SIZE
>
,
224
)
define
(
<
BIAS
>
,
2047
)
C
Magic
stack
bias
for
the
Sp
arc64
ABI
.section
".text"
.align
16
.global
_nettle_aes_crypt
.type
_nettle_aes_crypt
,
#
function
.proc
020
_nettle_aes_crypt:
save
%
sp
,
-
FRAME_SIZE
,
%
sp
cmp
length
,
0
be
.Lend
!
wtxt
add
%
fp
,
BIAS
-
32
,
wtxt
add
%
fp
,
BIAS
-
40
,
tmp
ld
[
ctx
+
AES_NROUNDS
],
nrounds
!
Compute
xor
,
so
that
we
can
swap
efficiently.
xor
wtxt
,
tmp
,
di
ff
!
The
loop
variable
will
be
multiplied
by
16
.
!
More
loop
invariants
add
T
,
AES_TABLE0
,
T0
add
T
,
AES_TABLE1
,
T1
add
T
,
AES_TABLE2
,
T2
add
T
,
AES_TABLE3
,
T3
nop
.Lblock_loop:
C
Read
src
,
and
add
initial
subkey
AES_LOAD
(
0
)
!
i
=
0
AES_LOAD
(
4
)
!
i
=
1
AES_LOAD
(
8
)
!
i
=
2
AES_LOAD
(
12
)
!
i
=
3
add
src
,
16
,
src
sub
nrounds
,
1
,
round
add
ctx
,
16
,
key
nop
.Lround_loop:
AES_ROUND
(
0
)
!
i
=
0
AES_ROUND
(
4
)
!
i
=
1
AES_ROUND
(
8
)
!
i
=
2
AES_ROUND
(
12
)
!
i
=
3
!
switch
roles
for
tmp
and
wtxt
xor
wtxt
,
di
ff
,
wtxt
xor
tmp
,
di
ff
,
tmp
subcc
round
,
1
,
round
bne
.Lround_loop
add
key
,
16
,
key
C
Final
round
,
and
storage
of
the
output
AES_FINAL_ROUND
(
0
)
!
i
=
0
AES_FINAL_ROUND
(
4
)
!
i
=
1
AES_FINAL_ROUND
(
8
)
!
i
=
2
AES_FINAL_ROUND
(
12
)
!
i
=
3
addcc
length
,
-
16
,
length
bne
.Lblock_loop
add
ds
t
,
16
,
ds
t
.Lend:
ret
rest
ore
.Leord:
.size
_nettle_aes_crypt
,
.Leord
-
_nettle_aes_crypt
!
Benchmarks
on
my
slow
sp
arcstation
:
!
Original
C
code
!
aes128
(
ECB
encrypt
):
14.36
s
,
0.696
MB
/
s
!
aes128
(
ECB
decrypt
):
17.19
s
,
0.582
MB
/
s
!
aes128
(
CBC
encrypt
):
16.08
s
,
0.622
MB
/
s
!
aes128
((
CBC
decrypt
)):
18.79
s
,
0.532
MB
/
s
!
!
aes192
(
ECB
encrypt
):
16.85
s
,
0.593
MB
/
s
!
aes192
(
ECB
decrypt
):
19.64
s
,
0.509
MB
/
s
!
aes192
(
CBC
encrypt
):
18.43
s
,
0.543
MB
/
s
!
aes192
(
CBC
decrypt
):
20.76
s
,
0.482
MB
/
s
!
!
aes256
(
ECB
encrypt
):
19.12
s
,
0.523
MB
/
s
!
aes256
(
ECB
decrypt
):
22.57
s
,
0.443
MB
/
s
!
aes256
(
CBC
encrypt
):
20.92
s
,
0.478
MB
/
s
!
aes256
(
CBC
decrypt
):
23.22
s
,
0.431
MB
/
s
!
After
unrolling
key_addition32
,
and
getting
rid
of
!
some
sll
x
,
2
,
x
,
encryption
sp
eed
is
0.760
MB
/
s.
!
Next
,
the
C
code
was
optimized
to
use
larger
tables
and
!
no
rotates.
New
timings
:
!
aes128
(
ECB
encrypt
):
13.10
s
,
0.763
MB
/
s
!
aes128
(
ECB
decrypt
):
11.51
s
,
0.869
MB
/
s
!
aes128
(
CBC
encrypt
):
15.15
s
,
0.660
MB
/
s
!
aes128
(
CBC
decrypt
):
13.10
s
,
0.763
MB
/
s
!
!
aes192
(
ECB
encrypt
):
15.68
s
,
0.638
MB
/
s
!
aes192
(
ECB
decrypt
):
13.59
s
,
0.736
MB
/
s
!
aes192
(
CBC
encrypt
):
17.65
s
,
0.567
MB
/
s
!
aes192
(
CBC
decrypt
):
15.31
s
,
0.653
MB
/
s
!
!
aes256
(
ECB
encrypt
):
17.95
s
,
0.557
MB
/
s
!
aes256
(
ECB
decrypt
):
15.90
s
,
0.629
MB
/
s
!
aes256
(
CBC
encrypt
):
20.16
s
,
0.496
MB
/
s
!
aes256
(
CBC
decrypt
):
17.47
s
,
0.572
MB
/
s
!
After
optimization
using
pre
-
shifted
indices
!
(
AES_SIDX
[
1
-
3
]):
!
aes128
(
ECB
encrypt
):
12.46
s
,
0.803
MB
/
s
!
aes128
(
ECB
decrypt
):
10.74
s
,
0.931
MB
/
s
!
aes128
(
CBC
encrypt
):
17.74
s
,
0.564
MB
/
s
!
aes128
(
CBC
decrypt
):
12.43
s
,
0.805
MB
/
s
!
!
aes192
(
ECB
encrypt
):
14.59
s
,
0.685
MB
/
s
!
aes192
(
ECB
decrypt
):
12.76
s
,
0.784
MB
/
s
!
aes192
(
CBC
encrypt
):
19.97
s
,
0.501
MB
/
s
!
aes192
(
CBC
decrypt
):
14.46
s
,
0.692
MB
/
s
!
!
aes256
(
ECB
encrypt
):
17.00
s
,
0.588
MB
/
s
!
aes256
(
ECB
decrypt
):
14.81
s
,
0.675
MB
/
s
!
aes256
(
CBC
encrypt
):
22.65
s
,
0.442
MB
/
s
!
aes256
(
CBC
decrypt
):
16.46
s
,
0.608
MB
/
s
!
After
implementing
double
buffering
!
aes128
(
ECB
encrypt
):
12.59
s
,
0.794
MB
/
s
!
aes128
(
ECB
decrypt
):
10.56
s
,
0.947
MB
/
s
!
aes128
(
CBC
encrypt
):
17.91
s
,
0.558
MB
/
s
!
aes128
(
CBC
decrypt
):
12.30
s
,
0.813
MB
/
s
!
!
aes192
(
ECB
encrypt
):
15.03
s
,
0.665
MB
/
s
!
aes192
(
ECB
decrypt
):
12.56
s
,
0.796
MB
/
s
!
aes192
(
CBC
encrypt
):
20.30
s
,
0.493
MB
/
s
!
aes192
(
CBC
decrypt
):
14.26
s
,
0.701
MB
/
s
!
!
aes256
(
ECB
encrypt
):
17.30
s
,
0.578
MB
/
s
!
aes256
(
ECB
decrypt
):
14.51
s
,
0.689
MB
/
s
!
aes256
(
CBC
encrypt
):
22.75
s
,
0.440
MB
/
s
!
aes256
(
CBC
decrypt
):
16.35
s
,
0.612
MB
/
s
!
After
reordering
aes
-
encrypt.c
and
aes
-
decypt.c
!
(
the
order
probably
causes
strange
cache
-
effects
):
!
aes128
(
ECB
encrypt
):
9.21
s
,
1.086
MB
/
s
!
aes128
(
ECB
decrypt
):
11.13
s
,
0.898
MB
/
s
!
aes128
(
CBC
encrypt
):
14.12
s
,
0.708
MB
/
s
!
aes128
(
CBC
decrypt
):
13.77
s
,
0.726
MB
/
s
!
!
aes192
(
ECB
encrypt
):
10.86
s
,
0.921
MB
/
s
!
aes192
(
ECB
decrypt
):
13.17
s
,
0.759
MB
/
s
!
aes192
(
CBC
encrypt
):
15.74
s
,
0.635
MB
/
s
!
aes192
(
CBC
decrypt
):
15.91
s
,
0.629
MB
/
s
!
!
aes256
(
ECB
encrypt
):
12.71
s
,
0.787
MB
/
s
!
aes256
(
ECB
decrypt
):
15.38
s
,
0.650
MB
/
s
!
aes256
(
CBC
encrypt
):
17.49
s
,
0.572
MB
/
s
!
aes256
(
CBC
decrypt
):
17.87
s
,
0.560
MB
/
s
!
After
further
optimizations
of
the
initial
and
final
loops
,
!
source_loop
and
final_loop.
!
aes128
(
ECB
encrypt
):
8.07
s
,
1.239
MB
/
s
!
aes128
(
ECB
decrypt
):
9.48
s
,
1.055
MB
/
s
!
aes128
(
CBC
encrypt
):
12.76
s
,
0.784
MB
/
s
!
aes128
(
CBC
decrypt
):
12.15
s
,
0.823
MB
/
s
!
!
aes192
(
ECB
encrypt
):
9.43
s
,
1.060
MB
/
s
!
aes192
(
ECB
decrypt
):
11.20
s
,
0.893
MB
/
s
!
aes192
(
CBC
encrypt
):
14.19
s
,
0.705
MB
/
s
!
aes192
(
CBC
decrypt
):
13.97
s
,
0.716
MB
/
s
!
!
aes256
(
ECB
encrypt
):
10.81
s
,
0.925
MB
/
s
!
aes256
(
ECB
decrypt
):
12.92
s
,
0.774
MB
/
s
!
aes256
(
CBC
encrypt
):
15.59
s
,
0.641
MB
/
s
!
aes256
(
CBC
decrypt
):
15.76
s
,
0.635
MB
/
s
!
After
unrolling
loops
,
and
other
optimizations
suggested
by
!
Marcus:
!
aes128
(
ECB
encrypt
):
6.40
s
,
1.562
MB
/
s
!
aes128
(
ECB
decrypt
):
8.17
s
,
1.224
MB
/
s
!
aes128
(
CBC
encrypt
):
13.11
s
,
0.763
MB
/
s
!
aes128
(
CBC
decrypt
):
10.05
s
,
0.995
MB
/
s
!
!
aes192
(
ECB
encrypt
):
7.43
s
,
1.346
MB
/
s
!
aes192
(
ECB
decrypt
):
9.51
s
,
1.052
MB
/
s
!
aes192
(
CBC
encrypt
):
14.09
s
,
0.710
MB
/
s
!
aes192
(
CBC
decrypt
):
11.58
s
,
0.864
MB
/
s
!
!
aes256
(
ECB
encrypt
):
8.57
s
,
1.167
MB
/
s
!
aes256
(
ECB
decrypt
):
11.13
s
,
0.898
MB
/
s
!
aes256
(
CBC
encrypt
):
15.30
s
,
0.654
MB
/
s
!
aes256
(
CBC
decrypt
):
12.93
s
,
0.773
MB
/
s
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment