Nettle
nettle
Commits
8c02a9dc
Commit
8c02a9dc
authored
Mar 06, 2013
by
Niels Möller
x86_64 assembly for ecc_384_modp.
parent
12fe3cc1
ChangeLog
View file @
8c02a9dc
20130306 Niels Möller <nisse@lysator.liu.se>
* x86_64/ecc384modp.asm: New file, 3 time speedup.
* x86_64/ecc256redc.asm: New file, 2.5 time speedup.
* x86_64/ecc224modp.asm: New file, 5 time speedup over C
version.
...
...
x86_64/ecc384modp.asm
0 → 100644
View file @
8c02a9dc
C
nettle
,
low

level
cryptographics
library
C
C
Copyright
(
C
)
2013
Niels
M
ö
ller
C
C
The
nettle
library
is
free
software
; you can redistribute it and/or modify
C
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
published
by
C
the
Free
Software
Foundation
; either version 2.1 of the License, or (at your
C
option
)
any
later
version.
C
C
The
nettle
library
is
di
stributed
in
the
hope
that
it
will
be
useful
,
but
C
WITHOUT
ANY
WARRANTY
; without even the implied warranty of MERCHANTABILITY
C
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
GNU
Lesser
General
Public
C
License
for
more
details.
C
C
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
License
C
al
ong
with
the
nettle
library
; see the file COPYING.LIB. If not, write to
C
the
Free
Software
Foundation
,
Inc.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
,
C
MA
02111

1301
,
USA.
.file
"ecc384modp.asm"
define
(
<
RP
>
,
<%
rsi
>
)
define
(
<
D4
>
,
<%
rax
>
)
define
(
<
T0
>
,
<%
rbx
>
)
define
(
<
T1
>
,
<%
rcx
>
)
define
(
<
T2
>
,
<%
rdx
>
)
define
(
<
T3
>
,
<%
rbp
>
)
define
(
<
T4
>
,
<%
rdi
>
)
define
(
<
T5
>
,
<%
r8
>
)
define
(
<
H0
>
,
<%
r9
>
)
define
(
<
H1
>
,
<%
r10
>
)
define
(
<
H2
>
,
<%
r11
>
)
define
(
<
H3
>
,
<%
r12
>
)
define
(
<
H4
>
,
<%
r13
>
)
define
(
<
H5
>
,
<%
r14
>
)
define
(
<
C2
>
,
<%
r15
>
)
define
(
<
C0
>
,
H5
)
C
Overlap
define
(
<
D0
>
,
RP
)
C
Overlap
define
(
<
TMP
>
,
H4
)
C
Overlap
PROLOGUE
(
nettle_ecc_384_modp
)
W64_ENTRY
(
2
,
0
)
push
%
rbx
push
%
rbp
push
%
r12
push
%
r13
push
%
r14
push
%
r15
C
First
get
top
2
limbs
,
which
need
folding
twice
C
C
H5
H4
C

H5
C

C
H0
D4
C
C
Then
shift
right
,
(
H1
,
H0
,
D4
)
<
(
H0
,
D4
)
<<
32
C
and
add
C
C
H5
H4
C
H1
H0
C

C
C2
H1
H0
mov
80
(
RP
),
D4
mov
88
(
RP
),
H0
mov
D4
,
H4
mov
H0
,
H5
sub
H0
,
D4
sbb
$
0
,
H0
mov
D4
,
T2
mov
H0
,
H1
shl
$
32
,
H0
shr
$
32
,
T2
shr
$
32
,
H1
or
T2
,
H0
xor
C2
,
C2
add
H4
,
H0
adc
H5
,
H1
adc
$
0
,
C2
C
Add
in
to
high
part
add
48
(
RP
),
H0
adc
56
(
RP
),
H1
adc
$
0
,
C2
C
Do
C2
later
C
+
1
term
mov
(
RP
),
T0
add
H0
,
T0
mov
8
(
RP
),
T1
adc
H1
,
T1
mov
16
(
RP
),
T2
mov
64
(
RP
),
H2
adc
H2
,
T2
mov
24
(
RP
),
T3
mov
72
(
RP
),
H3
adc
H3
,
T3
mov
32
(
RP
),
T4
adc
H4
,
T4
mov
40
(
RP
),
T5
adc
H5
,
T5
sbb
C0
,
C0
neg
C0
C
FIXME
:
Switch
si
gn
of
C0?
push
RP
C
+
B
^
2
term
add
H0
,
T2
adc
H1
,
T3
adc
H2
,
T4
adc
H3
,
T5
adc
$
0
,
C0
C
H3
H2
H1
H0
0
C

H4
H3
H2
H1
H0
C

C
H3
H2
H1
H0
D0
mov
XREG
(
D4
),
XREG
(
D4
)
mov
H0
,
D0
neg
D0
sbb
H1
,
H0
sbb
H2
,
H1
sbb
H3
,
H2
sbb
H4
,
H3
sbb
$
0
,
D4
C
Shift
right.
High
bits
are
si
gn
,
to
be
added
to
C0.
mov
D4
,
TMP
sar
$
32
,
TMP
shl
$
32
,
D4
add
TMP
,
C0
mov
H3
,
TMP
shr
$
32
,
TMP
shl
$
32
,
H3
or
TMP
,
D4
mov
H2
,
TMP
shr
$
32
,
TMP
shl
$
32
,
H2
or
TMP
,
H3
mov
H1
,
TMP
shr
$
32
,
TMP
shl
$
32
,
H1
or
TMP
,
H2
mov
H0
,
TMP
shr
$
32
,
TMP
shl
$
32
,
H0
or
TMP
,
H1
mov
D0
,
TMP
shr
$
32
,
TMP
shl
$
32
,
D0
or
TMP
,
H0
add
D0
,
T0
adc
H0
,
T1
adc
H1
,
T2
adc
H2
,
T3
adc
H3
,
T4
adc
D4
,
T5
adc
$
0
,
C0
C
Remains
to
add
in
C2
and
C0
C
C0
C0
<<
32
(

2
^
32
+
1
)
C0
C
C2
C2
<<
32
(

2
^
32
+
1
)
C2
C
where
C2
is
al
ways
positive
,
while
C0
may
be

1
.
mov
C0
,
H0
mov
C0
,
H1
mov
C0
,
H2
sar
$
63
,
C0
C
Get
si
gn
shl
$
32
,
H1
sub
H1
,
H0
C
Gives
borrow
iff
C0
>
0
sbb
$
0
,
H1
add
C0
,
H2
add
H0
,
T0
adc
H1
,
T1
adc
$
0
,
H2
adc
$
0
,
C0
C
Set
(
H1
H0
)
<
C2
<<
96

C2
<<
32
+
1
mov
C2
,
H0
mov
C2
,
H1
shl
$
32
,
H1
sub
H1
,
H0
sbb
$
0
,
H1
add
H2
,
H0
adc
C0
,
H1
adc
C2
,
C0
mov
C0
,
H2
sar
$
63
,
C0
add
H0
,
T2
adc
H1
,
T3
adc
H2
,
T4
adc
C0
,
T5
sbb
C0
,
C0
C
Final
unlikely
carry
mov
C0
,
H0
mov
C0
,
H1
mov
C0
,
H2
sar
$
63
,
C0
shl
$
32
,
H1
sub
H1
,
H0
sbb
$
0
,
H1
add
C0
,
H2
pop
RP
sub
H0
,
T0
mov
T0
,
(
RP
)
sbb
H1
,
T1
mov
T1
,
8
(
RP
)
sbb
H2
,
T2
mov
T2
,
16
(
RP
)
sbb
C0
,
T3
mov
T3
,
24
(
RP
)
sbb
C0
,
T4
mov
T4
,
32
(
RP
)
sbb
C0
,
T5
mov
T5
,
40
(
RP
)
pop
%
r15
pop
%
r14
pop
%
r13
pop
%
r12
pop
%
rbp
pop
%
rbx
W64_EXIT
(
2
,
0
)
ret
EPILOGUE
(
nettle_ecc_384_modp
)
