Commit 10e0615f authored by Niels Möller's avatar Niels Möller
Browse files

Reduce number of additions for x86_64 ecc_192_modp.

parent 190c1584
2013-02-28 Niels Möller <nisse@lysator.liu.se> 2013-02-28 Niels Möller <nisse@lysator.liu.se>
* x86_64/ecc-192-modp.asm: Reorganized to reduce number of
additions. Use setc instruction.
* examples/Makefile.in: Let $(HOGWEED_TARGETS) depend on * examples/Makefile.in: Let $(HOGWEED_TARGETS) depend on
../libhogweed.a. ../libhogweed.a.
......
...@@ -20,50 +20,53 @@ C MA 02111-1301, USA. ...@@ -20,50 +20,53 @@ C MA 02111-1301, USA.
.file "ecc-192-modp.asm" .file "ecc-192-modp.asm"
define(<RP>, <%rsi>) define(<RP>, <%rsi>)
define(<T1>, <%rdi>) C Overlaps unused ecc input define(<T0>, <%rdi>) C Overlaps unused ecc input
define(<T2>, <%rcx>) define(<T1>, <%rcx>)
define(<T3>, <%rdx>) define(<T2>, <%rdx>)
define(<T4>, <%r8>) define(<T3>, <%r8>)
define(<T5>, <%r9>) define(<H>, <%r9>)
define(<T6>, <%r10>) define(<C1>, <%r10>)
define(<C2>, <%r11>)
C ecc_192_modp (const struct ecc_curve *ecc, mp_limb_t *rp) C ecc_192_modp (const struct ecc_curve *ecc, mp_limb_t *rp)
.text .text
ALIGN(4) ALIGN(4)
PROLOGUE(nettle_ecc_192_modp) PROLOGUE(nettle_ecc_192_modp)
W64_ENTRY(2, 0) W64_ENTRY(2, 0)
C First: (B+1)*{r5, r4} < B^3 + B^2 - B mov 16(RP), T2
mov 32(RP), T1 mov 24(RP), T3
mov 40(RP), T2 mov 40(RP), H
mov T2, T3 xor C1, C1
xor T4, T4 xor C2, C2
add T1, T2
adc $0, T3
adc $0, T4
add 8(RP), T1 add H, T2
adc 16(RP), T2 adc H, T3
adc 24(RP), T3 C Carry to be added in at T1 and T2
adc $0, T4 setc LREG(C2)
C Sum is < 2B^4 + B^3 - B - 1, so {T4, T3} < 3B
mov 8(RP), T1
mov 32(RP), H
adc H, T1
adc H, T2
C Carry to be added in at T0 and T1
setc LREG(C1)
mov (RP), T0
adc T3, T0
adc T3, T1
adc $0, C2
C Next: (B+1) * {T4, T3} < 3B^2 + 2B C Add in C1 and C2
mov T4, T5 add C1, T1
add T3, T4 adc C2, T2
adc $0, T5 setc LREG(C1)
xor T6, T6 C Fold final carry.
add (RP), T3 adc $0, T0
adc T4, T1 adc C1, T1
adc T5, T2
adc $0, T6
C Fold in final carry.
add T6, T3
adc T6, T1
adc $0, T2 adc $0, T2
mov T3, (RP) mov T0, (RP)
mov T1, 8(RP) mov T1, 8(RP)
mov T2, 16(RP) mov T2, 16(RP)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment