Commit 59eb1aa1 authored by Niels Möller's avatar Niels Möller

x86_64 assembly for ecc_224_modp.

parent 933c4cef
2013-03-06 Niels Möller <nisse@lysator.liu.se>
* x86_64/ecc-224-modp.asm: New file, 5 time speedup over C
version.
2013-03-05 Niels Möller <nisse@lysator.liu.se>
* configure.ac (asm_optional_list): Added ecc-521-modp.asm.
......
C nettle, low-level cryptographics library
C
C Copyright (C) 2013 Niels Möller
C
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
C License for more details.
C
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB. If not, write to
C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
C MA 02111-1301, USA.
.file "ecc-224-modp.asm"
define(<RP>, <%rsi>)
define(<T0>, <%rdi>) C Overlaps unused ecc input
define(<T1>, <%rcx>)
define(<H0>, <%rax>)
define(<H1>, <%rdx>)
define(<H2>, <%r8>)
define(<F0>, <%r9>)
define(<F1>, <%r10>)
define(<F2>, <%r11>)
PROLOGUE(nettle_ecc_224_modp)
W64_ENTRY(2, 0)
mov 48(RP), H0
mov 56(RP), H1
C Set (F2,F1,F0) <-- (H1,H0) << 32
mov H0, F0
mov H0, F1
shl $32, F0
shr $32, F1
mov H1, F2
mov H1, T0
shl $32, T0
shr $32, F2
or T0, F1
xor H2, H2
mov 16(RP), T0
mov 24(RP), T1
sub F0, T0
sbb F1, T1
sbb F2, H0
sbb $0, H1 C No further borrow
adc 32(RP), H0
adc 40(RP), H1
adc $0, H2
C Set (F2,F1,F0) <-- (H2,H1,H0) << 32
C To free registers, add in T1, T0 as soon as H0, H1 have been copied
mov H0, F0
mov H0, F1
add T0, H0
mov H1, F2
mov H1, T0
adc T1, H1
mov H2, T1
adc $0, H2
C Shift 32 bits
shl $32, F0
shr $32, F1
shl $32, T0
shr $32, F2
shl $32, T1
or T0, F1
or T1, F2
mov (RP), T0
mov 8(RP), T1
sub F0, T0
sbb F1, T1
sbb F2, H0
sbb $0, H1
sbb $0, H2
C We now have H2, H1, H0, T1, T0, with 33 bits left to reduce
C Set F0 <-- (H2, H1) >> 32
C Set (F2,F1) <-- (H2, H1 & 0xffffffff00000000)
C H1 <-- H1 & 0xffffffff
mov H1, F0
mov H1, F1
mov H2, F2
movl XREG(H1), XREG(H1) C Clears high 32 bits
sub H1, F1 C Clears low 32 bits
shr $32, F0
shl $32, H2
or H2, F0
sub F0, T0
sbb $0, F1
sbb $0, F2
add F1, T1
adc F2, H0
adc $0, H1
mov T0, (RP)
mov T1, 8(RP)
mov H0, 16(RP)
mov H1, 24(RP)
W64_EXIT(2, 0)
ret
EPILOGUE(nettle_ecc_224_modp)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment