Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Wim Lewis
nettle
Commits
fa269b6a
Commit
fa269b6a
authored
Dec 15, 2015
by
Niels Möller
Browse files
Fix carry folding bug in x86_64 ecc_384_modp. Problem reported by Hanno Böck.
parent
c71d2c9d
Changes
2
Hide whitespace changes
Inline
Side-by-side
ChangeLog
View file @
fa269b6a
2015-12-15 Niels Möller <nisse@lysator.liu.se>
* x86_64/ecc-384-modp.asm: Fixed carry propagation bug. Problem
reported by Hanno Böck. Simplified the folding to always use
non-negative carry, the old code attempted to add in a carry which
could be either positive or negative, but didn't get that case
right.
2015-12-10 Niels Möller <nisse@lysator.liu.se>
* ecc-256.c (ecc_256_modp): Fixed carry propagation bug. Problem
...
...
x86_64/ecc-384-modp.asm
View file @
fa269b6a
C
x86_64
/
ecc
-
384
-
modp.asm
ifelse
(
<
Copyright
(
C
)
2013
Niels
M
ö
ller
Copyright
(
C
)
2013
,
2015
Niels
M
ö
ller
This
file
is
part
of
GNU
Nettle.
...
...
@@ -33,7 +33,7 @@ ifelse(<
.file
"ecc-384-modp.asm"
define
(
<
RP
>
,
<%
rsi
>
)
define
(
<
D
4
>
,
<%
rax
>
)
define
(
<
D
5
>
,
<%
rax
>
)
define
(
<
T0
>
,
<%
rbx
>
)
define
(
<
T1
>
,
<%
rcx
>
)
define
(
<
T2
>
,
<%
rdx
>
)
...
...
@@ -48,8 +48,8 @@ define(<H4>, <%r13>)
define
(
<
H5
>
,
<%
r14
>
)
define
(
<
C2
>
,
<%
r15
>
)
define
(
<
C0
>
,
H5
)
C
Overlap
define
(
<
D0
>
,
RP
)
C
Overlap
define
(
<
TMP
>
,
H4
)
C
Overlap
define
(
<
TMP
>
,
RP
)
C
Overlap
PROLOGUE
(
nettle_ecc_384_modp
)
W64_ENTRY
(
2
,
0
)
...
...
@@ -61,34 +61,38 @@ PROLOGUE(nettle_ecc_384_modp)
push
%
r14
push
%
r15
C
First
get
top
2
limbs
,
which
need
folding
twice
C
First
get
top
2
limbs
,
which
need
folding
twice.
C
B
^
10
=
B
^
6
+
B
^
4
+
2
^
32
(
B
-
1
)
B
^
4
.
C
We
handle
the
terms
as
follow
:
C
C
H5
H4
C
-
H5
C
------
C
H0
D4
C
B
^
6
:
Folded
immediatly.
C
C
Then
shift
right
,
(
H1
,
H0
,
D4
)
<--
(
H0
,
D4
)
<<
32
C
and
add
C
B
^
4
:
Delayed
,
added
in
in
the
next
folding.
C
C
H5
H4
C
H1
H0
C
----------
C
C2
H1
H0
mov
80
(
RP
),
D4
mov
88
(
RP
),
H0
mov
D4
,
H4
mov
H0
,
H5
sub
H0
,
D4
sbb
$
0
,
H0
mov
D4
,
T2
mov
H0
,
H1
shl
$
32
,
H0
shr
$
32
,
T2
C
2
^
32
(
B
-
1
)
B
^
4
:
Low
half
limb
delayed
until
the
next
C
folding.
Top
1.5
limbs
subtracted
and
shifter
now
,
resulting
C
in
2.5
limbs.
The
low
limb
saved
in
D5
,
high
1.5
limbs
added
C
in.
mov
80
(
RP
),
H4
mov
88
(
RP
),
H5
C
Shift
right
32
bits
,
into
H1
,
H0
mov
H4
,
H0
mov
H5
,
H1
mov
H5
,
D5
shr
$
32
,
H1
or
T2
,
H0
shl
$
32
,
D5
shr
$
32
,
H0
or
D5
,
H0
C
H1
H0
C
-
H1
H0
C
--------
C
H1
H0
D5
mov
H0
,
D5
neg
D5
sbb
H1
,
H0
sbb
$
0
,
H1
xor
C2
,
C2
add
H4
,
H0
...
...
@@ -127,118 +131,95 @@ PROLOGUE(nettle_ecc_384_modp)
adc
H3
,
T5
adc
$
0
,
C0
C
H3
H2
H1
H0
0
C
-
H4
H3
H2
H1
H0
C
---------------
C
H3
H2
H1
H0
D0
mov
XREG
(
D4
),
XREG
(
D4
)
mov
H0
,
D0
neg
D0
sbb
H1
,
H0
sbb
H2
,
H1
sbb
H3
,
H2
sbb
H4
,
H3
sbb
$
0
,
D4
C
Shift
right.
High
bits
are
si
gn
,
to
be
added
to
C0.
mov
D4
,
TMP
sar
$
32
,
TMP
shl
$
32
,
D4
add
TMP
,
C0
C
Shift
left
,
including
low
half
of
H4
mov
H3
,
TMP
shl
$
32
,
H4
shr
$
32
,
TMP
shl
$
32
,
H3
or
TMP
,
D4
or
TMP
,
H4
mov
H2
,
TMP
shl
$
32
,
H3
shr
$
32
,
TMP
shl
$
32
,
H2
or
TMP
,
H3
mov
H1
,
TMP
shl
$
32
,
H2
shr
$
32
,
TMP
shl
$
32
,
H1
or
TMP
,
H2
mov
H0
,
TMP
shl
$
32
,
H1
shr
$
32
,
TMP
shl
$
32
,
H0
or
TMP
,
H1
mov
D0
,
TMP
shr
$
32
,
TMP
shl
$
32
,
D0
or
TMP
,
H0
shl
$
32
,
H0
C
H4
H3
H2
H1
H0
0
C
-
H4
H3
H2
H1
H0
C
---------------
C
H4
H3
H2
H1
H0
TMP
add
D0
,
T0
mov
H0
,
TMP
neg
TMP
sbb
H1
,
H0
sbb
H2
,
H1
sbb
H3
,
H2
sbb
H4
,
H3
sbb
$
0
,
H4
add
TMP
,
T0
adc
H0
,
T1
adc
H1
,
T2
adc
H2
,
T3
adc
H3
,
T4
adc
D
4
,
T5
adc
H
4
,
T5
adc
$
0
,
C0
C
Remains
to
add
in
C2
and
C0
C
C0
C0
<<
32
(
-
2
^
32
+
1
)
C0
C
C2
C2
<<
32
(
-
2
^
32
+
1
)
C2
C
where
C2
is
al
ways
positive
,
while
C0
may
be
-
1
.
C
Set
H1
,
H0
=
(
2
^
96
-
2
^
32
+
1
)
C0
mov
C0
,
H0
mov
C0
,
H1
mov
C0
,
H2
sar
$
63
,
C0
C
Get
si
gn
shl
$
32
,
H1
sub
H1
,
H0
C
Gives
borrow
iff
C0
>
0
sub
H1
,
H0
sbb
$
0
,
H1
add
C0
,
H2
C
Set
H3
,
H2
=
(
2
^
96
-
2
^
32
+
1
)
C2
mov
C2
,
H2
mov
C2
,
H3
shl
$
32
,
H3
sub
H3
,
H2
sbb
$
0
,
H3
add
C0
,
H2
C
No
carry.
Could
use
lea
trick
xor
C0
,
C0
add
H0
,
T0
adc
H1
,
T1
adc
$
0
,
H2
adc
$
0
,
C0
C
Set
(
H1
H0
)
<--
C2
<<
96
-
C2
<<
32
+
1
mov
C2
,
H0
mov
C2
,
H1
shl
$
32
,
H1
sub
H1
,
H0
sbb
$
0
,
H1
add
H2
,
H0
adc
C0
,
H1
adc
C2
,
C0
mov
C0
,
H2
sar
$
63
,
C0
add
H0
,
T2
adc
H1
,
T3
adc
H2
,
T4
adc
C0
,
T5
sbb
C0
,
C0
adc
H2
,
T2
adc
H3
,
T3
adc
C2
,
T4
adc
D5
,
T5
C
Value
delayed
from
initial
folding
adc
$
0
,
C0
C
Use
sbb
and
switch
si
gn?
C
Final
unlikely
carry
mov
C0
,
H0
mov
C0
,
H1
mov
C0
,
H2
sar
$
63
,
C0
shl
$
32
,
H1
sub
H1
,
H0
sbb
$
0
,
H1
add
C0
,
H2
pop
RP
sub
H0
,
T0
add
H0
,
T0
mov
T0
,
(
RP
)
sbb
H1
,
T1
adc
H1
,
T1
mov
T1
,
8
(
RP
)
sbb
H2
,
T2
adc
C0
,
T2
mov
T2
,
16
(
RP
)
sbb
C
0
,
T3
adc
$
0
,
T3
mov
T3
,
24
(
RP
)
sbb
C
0
,
T4
adc
$
0
,
T4
mov
T4
,
32
(
RP
)
sbb
C
0
,
T5
adc
$
0
,
T5
mov
T5
,
40
(
RP
)
pop
%
r15
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment