From 3be646d1cdbffbceef956de554a005320a1aa63d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Fri, 12 Apr 2013 12:22:56 +0200 Subject: [PATCH] ARM assembly for umac_nh. --- ChangeLog | 3 ++ armv7/machine.m4 | 38 ++++++++++++++++++++ armv7/umac-nh.asm | 91 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 armv7/umac-nh.asm diff --git a/ChangeLog b/ChangeLog index 86f75f11..7a163fe7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2013-04-12 Niels Möller <nisse@lysator.liu.se> + * armv7/umac-nh.asm: New file. 2.1 time speedup. + * armv7/machine.m4 (D0REG, D1REG): New macros. + * configure.ac (asm_replace_list): Added umac-nh.asm and umac-nh-n.asm. diff --git a/armv7/machine.m4 b/armv7/machine.m4 index a829c76e..f982a66a 100644 --- a/armv7/machine.m4 +++ b/armv7/machine.m4 @@ -16,3 +16,41 @@ define(<QREG>, <ifelse( $1, d28, q14, $1, d30, q15, <NO REGISTER>)>)dnl + +define(<D0REG>, <ifelse( + $1, q0, d0, + $1, q1, d2, + $1, q2, d4, + $1, q3, d6, + $1, q4, d8, + $1, q5, d10, + $1, q6, d12, + $1, q7, d14, + $1, q8, d16, + $1, q9, d18, + $1, q10, d20, + $1, q11, d22, + $1, q12, d24, + $1, q13, d26, + $1, q14, d28, + $1, q15, d30, + <NO REGISTER>)>)dnl + +define(<D1REG>, <ifelse( + $1, q0, d1, + $1, q1, d3, + $1, q2, d5, + $1, q3, d7, + $1, q4, d9, + $1, q5, d11, + $1, q6, d13, + $1, q7, d15, + $1, q8, d17, + $1, q9, d19, + $1, q10, d21, + $1, q11, d23, + $1, q12, d25, + $1, q13, d27, + $1, q14, d29, + $1, q15, d31, + <NO REGISTER>)>)dnl diff --git a/armv7/umac-nh.asm b/armv7/umac-nh.asm new file mode 100644 index 00000000..7c0a0290 --- /dev/null +++ b/armv7/umac-nh.asm @@ -0,0 +1,91 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013 Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + + .file "umac-nh.asm" + .fpu neon + +define(<KEY>, <r0>) +define(<LENGTH>, <r1>) +define(<MSG>, <r2>) +define(<SHIFT>, <r3>) + +define(<QA>, <q0>) +define(<QB>, <q1>) +define(<DM>, <d16>) +define(<QLEFT>, <q9>) +define(<QRIGHT>, <q10>) +define(<QACC>, <q11>) +define(<QT0>, <q12>) +define(<QT1>, <q13>) +define(<QK0>, <q14>) +define(<QK1>, <q15>) + + .text + .align 3 + +PROLOGUE(_nettle_umac_nh) + C Setup for 64-bit aligned reads + ands SHIFT, MSG, #7 + and MSG, MSG, #-8 + vld1.8 {DM}, [MSG :64] + addne MSG, MSG, #8 + addeq SHIFT, SHIFT, #8 + + C FIXME: Combine as rsb ? + lsl SHIFT, SHIFT, #3 + neg SHIFT, SHIFT + + C Right shift in QRIGHT (both halves) + vmov.i32 D0REG(QRIGHT)[0], SHIFT + vmov.32 D1REG(QRIGHT), D0REG(QRIGHT) + add SHIFT, SHIFT, #64 + + vmov.i32 D0REG(QLEFT)[0], SHIFT + vmov.32 D1REG(QLEFT), D0REG(QLEFT) + + vmov.i64 QACC, #0 + + vshl.u64 DM, DM, D0REG(QRIGHT) +.Loop: + C Set m[i] <-- m[i-1] >> RSHIFT + m[i] << LSHIFT + vld1.8 {QA, QB}, [MSG :64]! + vshl.u64 QT0, QA, QRIGHT + vshl.u64 QT1, QB, QRIGHT + vshl.u64 QA, QA, QLEFT + vshl.u64 QB, QB, QLEFT + veor D0REG(QA), D0REG(QA), DM + veor D1REG(QA), D1REG(QA), D0REG(QT0) + veor D0REG(QB), D0REG(QB), D1REG(QT0) + veor D1REG(QB), D1REG(QB), D0REG(QT1) + vmov DM, D1REG(QT1) + + vld1.i32 {QK0, QK1}, [KEY]! + vadd.i32 QA, QA, QK0 + vadd.i32 QB, QB, QK1 + vmull.u32 QT0, D0REG(QA), D0REG(QB) + vmull.u32 QT1, D1REG(QA), D1REG(QB) + subs LENGTH, LENGTH, #32 + vadd.i64 QACC, QACC, QT0 + vadd.i64 QACC, QACC, QT1 + bhi .Loop + + vadd.i64 D0REG(QACC), D0REG(QACC), D1REG(QACC) + vmov r0, r1, D0REG(QACC) + bx lr +EPILOGUE(_nettle_umac_nh) -- GitLab