Commit 10e0615f authored by Niels Möller's avatar Niels Möller
Browse files

Reduce number of additions for x86_64 ecc_192_modp.

parent 190c1584
2013-02-28 Niels Möller <nisse@lysator.liu.se> 2013-02-28 Niels Möller <nisse@lysator.liu.se>
* x86_64/ecc-192-modp.asm: Reorganized to reduce number of
additions. Use setc instruction.
* examples/Makefile.in: Let $(HOGWEED_TARGETS) depend on * examples/Makefile.in: Let $(HOGWEED_TARGETS) depend on
../libhogweed.a. ../libhogweed.a.
......
...@@ -20,50 +20,53 @@ C MA 02111-1301, USA. ...@@ -20,50 +20,53 @@ C MA 02111-1301, USA.
.file "ecc-192-modp.asm" .file "ecc-192-modp.asm"
define(<RP>, <%rsi>) define(<RP>, <%rsi>)
define(<T1>, <%rdi>) C Overlaps unused ecc input define(<T0>, <%rdi>) C Overlaps unused ecc input
define(<T2>, <%rcx>) define(<T1>, <%rcx>)
define(<T3>, <%rdx>) define(<T2>, <%rdx>)
define(<T4>, <%r8>) define(<T3>, <%r8>)
define(<T5>, <%r9>) define(<H>, <%r9>)
define(<T6>, <%r10>) define(<C1>, <%r10>)
define(<C2>, <%r11>)
C ecc_192_modp (const struct ecc_curve *ecc, mp_limb_t *rp) C ecc_192_modp (const struct ecc_curve *ecc, mp_limb_t *rp)
.text .text
ALIGN(4) ALIGN(4)
PROLOGUE(nettle_ecc_192_modp) PROLOGUE(nettle_ecc_192_modp)
W64_ENTRY(2, 0) W64_ENTRY(2, 0)
C First: (B+1)*{r5, r4} < B^3 + B^2 - B mov 16(RP), T2
mov 32(RP), T1 mov 24(RP), T3
mov 40(RP), T2 mov 40(RP), H
mov T2, T3 xor C1, C1
xor T4, T4 xor C2, C2
add T1, T2
adc $0, T3
adc $0, T4
add 8(RP), T1 add H, T2
adc 16(RP), T2 adc H, T3
adc 24(RP), T3 C Carry to be added in at T1 and T2
adc $0, T4 setc LREG(C2)
C Sum is < 2B^4 + B^3 - B - 1, so {T4, T3} < 3B
C Next: (B+1) * {T4, T3} < 3B^2 + 2B mov 8(RP), T1
mov T4, T5 mov 32(RP), H
add T3, T4 adc H, T1
adc $0, T5 adc H, T2
C Carry to be added in at T0 and T1
setc LREG(C1)
xor T6, T6 mov (RP), T0
add (RP), T3 adc T3, T0
adc T4, T1 adc T3, T1
adc T5, T2 adc $0, C2
adc $0, T6
C Fold in final carry. C Add in C1 and C2
add T6, T3 add C1, T1
adc T6, T1 adc C2, T2
setc LREG(C1)
C Fold final carry.
adc $0, T0
adc C1, T1
adc $0, T2 adc $0, T2
mov T3, (RP) mov T0, (RP)
mov T1, 8(RP) mov T1, 8(RP)
mov T2, 16(RP) mov T2, 16(RP)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment