Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Dmitry Baryshkov
nettle
Commits
4319568d
Commit
4319568d
authored
Oct 23, 2005
by
Niels Möller
Browse files
Tried unrolling only twice.
Rev: src/nettle/sparc/arcfour-crypt.asm:1.7
parent
379fc214
Changes
1
Hide whitespace changes
Inline
Side-by-side
sparc/arcfour-crypt.asm
View file @
4319568d
...
...
@@ -30,107 +30,34 @@ define(<LENGTH>,<%i1>)
define
(
<
DS
T
>
,
<%
i2
>
)
define
(
<
SRC
>
,
<%
i3
>
)
define
(
<
I
>
,
<%
i4
>
)
define
(
<
J
>
,
<%
i5
>
)
define
(
<
SI
>
,
<%
g1
>
)
define
(
<
S
J
>
,
<%
g2
>
)
define
(
<
TMP
>
,
<%
g3
>
)
define
(
<
N
>
,
<%
o0
>
)
define
(
<
WORD
>
,
<%
o1
>
)
C
Encrypts
n
byte
s
,
one
byte
at
a
time.
C
ARCFOUR_BYTE_LOOP
(
n
,
label
)
define
(
<
ARCFOUR_BYTE_LOOP
>
,
<
$2:
add
I
,
1
,
I
and
I
,
0xff
,
I
ldub
[
CTX
+
I
],
SI
subcc
$
1
,
1
,
$
1
ldub
[
SRC
],
TMP
define
(
<
I
1
>
,
<%
i4
>
)
define
(
<
I2
>
,
<%
i5
>
)
define
(
<
J
>
,
<%
g1
>
)
define
(
<
S
I
>
,
<%
g2
>
)
define
(
<
SJ
>
,
<%
g3
>
)
define
(
<
TMP
>
,
<%
o0
>
)
define
(
<
N
>
,
<%
o1
>
)
define
(
<
DATA
>
,
<%
o2
>
)
C
Computes
the
next
byte
of
the
key
stream.
As
input
,
i
must
C
al
ready
point
to
the
index
for
the
current
access
,
the
index
C
for
the
next
access
is
stored
in
ni.
The
resulting
key
byte
is
C
stored
in
res.
C
ARCFOUR_BYTE
(
i
,
ni
,
res
)
define
(
<
ARCFOUR_BYTE
>
,
<
ldub
[
CTX
+
$
1
],
SI
add
$
1
,
1
,
$
2
add
J
,
SI
,
J
and
J
,
0xff
,
J
ldub
[
CTX
+
J
],
SJ
a
d
d
SRC
,
1
,
SRC
a
n
d
$
2
,
0xff
,
$
2
stb
SI
,
[
CTX
+
J
]
add
SI
,
SJ
,
SI
and
SI
,
0xff
,
SI
stb
SJ
,
[
CTX
+
I
]
ldub
[
CTX
+
SI
],
SI
xor
TMP
,
SI
,
TMP
stb
TMP
,
[
DS
T
]
bne
$
2
add
DS
T
,
1
,
DS
T
stb
SJ
,
[
CTX
+
$
1
]
ldub
[
CTX
+
SI
],
$
3
>)
dnl
C
Encrypts
4
n
byte
s
,
four
at
a
time.
Requires
proper
al
ignmentof
C
SRC
and
DS
T.
C
ARCFOUR_WORD_LOOP
(
n
,
label
)
define
(
<
ARCFOUR_WORD_LOOP
>
,
<
$2:
add
I
,
1
,
I
and
I
,
0xff
,
I
ldub
[
CTX
+
I
],
SI
ld
[
SRC
],
WORD
add
J
,
SI
,
J
and
J
,
0xff
,
J
ldub
[
CTX
+
J
],
SJ
stb
SI
,
[
CTX
+
J
]
add
SI
,
SJ
,
SI
and
SI
,
0xff
,
SI
stb
SJ
,
[
CTX
+
I
]
ldub
[
CTX
+
SI
],
TMP
add
I
,
1
,
I
and
I
,
0xff
,
I
ldub
[
CTX
+
I
],
SI
add
SRC
,
4
,
SRC
add
J
,
SI
,
J
and
J
,
0xff
,
J
ldub
[
CTX
+
J
],
SJ
stb
SI
,
[
CTX
+
J
]
add
SI
,
SJ
,
SI
and
SI
,
0xff
,
SI
stb
SJ
,
[
CTX
+
I
]
ldub
[
CTX
+
SI
],
SI
sll
TMP
,
8
,
TMP
or
TMP
,
SI
,
TMP
add
I
,
1
,
I
and
I
,
0xff
,
I
ldub
[
CTX
+
I
],
SI
subcc
$
1
,
1
,
$
1
add
J
,
SI
,
J
and
J
,
0xff
,
J
ldub
[
CTX
+
J
],
SJ
stb
SI
,
[
CTX
+
J
]
add
SI
,
SJ
,
SI
and
SI
,
0xff
,
SI
stb
SJ
,
[
CTX
+
I
]
ldub
[
CTX
+
SI
],
SI
sll
TMP
,
8
,
TMP
or
TMP
,
SI
,
TMP
add
I
,
1
,
I
and
I
,
0xff
,
I
ldub
[
CTX
+
I
],
SI
C
empty
slot
add
J
,
SI
,
J
and
J
,
0xff
,
J
ldub
[
CTX
+
J
],
SJ
stb
SI
,
[
CTX
+
J
]
add
SI
,
SJ
,
SI
and
SI
,
0xff
,
SI
stb
SJ
,
[
CTX
+
I
]
ldub
[
CTX
+
SI
],
SI
sll
TMP
,
8
,
TMP
or
TMP
,
SI
,
TMP
xor
WORD
,
TMP
,
WORD
st
WORD
,
[
DS
T
]
bne
$
2
add
DS
T
,
4
,
DS
T
>)
dnl
C
FIXME
:
Consider
using
the
callers
window
define
(
<
FRAME_SIZE
>
,
104
)
...
...
@@ -151,52 +78,48 @@ PROLOGUE(nettle_arcfour_crypt)
be
.Lend
C
Load
both
I
and
J
lduh
[
CTX
+
ARCFOUR_I
],
I
and
I
,
0xff
,
J
srl
I
,
8
,
I
lduh
[
CTX
+
ARCFOUR_I
],
I
1
and
I
1
,
0xff
,
J
srl
I
1
,
8
,
I
1
ifelse
(
WITH_ALIGN
,
YES
,
<
C
Ch
eck
if
SRC
and
DS
T
have
compatible
al
ignment
xor
SRC
,
DS
T
,
TMP
andcc
TMP
,
3
,
TMP
andcc
LENGTH
,
1
,
%
g0
beq
.Loop
bne
.Lrest
nop
andcc
DS
T
,
3
,
N
bz
.Laligned
nop
sub
N
,
4
,
N
neg
N
cmp
N
,
LENGTH
bgeu
.Lrest
nop
sub
LENGTH
,
N
,
LENGTH
ARCFOUR_BYTE_LOOP
(
N
,
.Lunalignedloop
)
add
I1
,
1
,
I1
and
I1
,
0xff
,
I1
.Laligned:
srl
LENGTH
,
2
,
N
cmp
N
,
0
be
.Lrest
nop
ARCFOUR_WORD_LOOP
(
N
,
.Lalignedloop
)
ARCFOUR_BYTE
(
I1
,
I2
,
TMP
)
ldub
[
SRC
],
DATA
subcc
LENGTH
,
1
,
LENGTH
add
SRC
,
1
,
SRC
xor
DATA
,
TMP
,
DATA
stb
DATA
,
[
DS
T
]
beq
.Ldone
add
DS
T
,
1
,
DS
T
andcc
LENGTH
,
3
,
LENGTH
bz
.Ldone
nop
>)
.Lrest:
ARCFOUR_BYTE_LOOP
(
LENGTH
,
.Loop
)
mov
I2
,
I1
.Loop:
ARCFOUR_BYTE
(
I1
,
I2
,
TMP
)
ldub
[
SRC
],
DATA
add
SRC
,
2
,
SRC
xor
DATA
,
TMP
,
DATA
stb
DATA
,
[
DS
T
]
ARCFOUR_BYTE
(
I2
,
I1
,
TMP
)
ldub
[
SRC
-
1
],
DATA
subcc
LENGTH
,
2
,
LENGTH
add
DS
T
,
2
,
DS
T
xor
DATA
,
TMP
,
DATA
bne
.Loop
stb
DATA
,
[
DS
T
-
1
]
mov
I2
,
I1
.Ldone:
C
Save
back
I
and
J
sll
I
,
8
,
I
or
I
,
J
,
I
stuh
I
,
[
CTX
+
ARCFOUR_I
]
C
Save
back
I
and
J
sll
I
1
,
8
,
I
1
or
I
1
,
J
,
I
1
stuh
I
1
,
[
CTX
+
ARCFOUR_I
]
.Lend:
ret
...
...
@@ -212,6 +135,7 @@ C 3: Moved load of source byte
C
4
:
Better
instruction
scheduling
C
5
:
Sp
ecial
case
SRC
and
DS
T
with
compatible
al
ignment
C
6
:
After
bugfix
(
reorder
of
ld
[
CTX
+
SI
+
SJ
]
and
st
[
CTX
+
SI
])
C
7
:
Unrolled
only
twice
,
with
byte
-
accesses
C
MB
/
s
cycles
/
byte
Code
si
ze
(
byte
s
)
C
1
:
6.6
12.4
132
...
...
@@ -220,3 +144,4 @@ C 3: 6.0 13.5 116
C
4
:
6.5
12.4
116
C
5
:
7.9
10.4
496
C
6
:
8.3
9.7
496
C
7
:
6.7
12.1
268
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment