From f6360a087252e637e06ec9ccdccf5c5462b95fff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= Date: Thu, 16 May 2013 16:28:37 +0200 Subject: [PATCH] arm: Added AES code for pre-v6 processors. --- ChangeLog | 8 +- arm/aes-decrypt-internal.asm | 167 +++++++++++++++++++++++++++++++++ arm/aes-encrypt-internal.asm | 176 +++++++++++++++++++++++++++++++++++ arm/aes.m4 | 18 ++++ 4 files changed, 367 insertions(+), 2 deletions(-) create mode 100644 arm/aes-decrypt-internal.asm create mode 100644 arm/aes-encrypt-internal.asm diff --git a/ChangeLog b/ChangeLog index 5cc88696..96429e1c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,11 @@ 2013-05-16 Niels Möller - * arm/aes.m4 (AES_FINAL_ROUND_V6): New name, updated callers. - (AES_FINAL_ROUND_): ... old name. Also eliminated one uxtb + * arm/aes-encrypt-internal.asm: New file, for pre-v6 processors. + * arm/aes-decrypt-internal.asm: New file, likewise. + + * arm/aes.m4 (AES_FINAL_ROUND_V5): Variant without using uxtb. + (AES_FINAL_ROUND_V6): New name, updated callers. + (AES_FINAL_ROUND): ... old name. Also eliminated one uxtb instruction. (AES_ENCRYPT_ROUND, AES_DECRYPT): Moved macros to the files using them. diff --git a/arm/aes-decrypt-internal.asm b/arm/aes-decrypt-internal.asm new file mode 100644 index 00000000..37abf1ec --- /dev/null +++ b/arm/aes-decrypt-internal.asm @@ -0,0 +1,167 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013 Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + +include_src() + +C define(, ) +define(, ) +define(, ) +define(, ) +define(, ) + +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) + +define(, ) C Overlaps LENGTH, SRC, DST +define(, ) +define(, ) +define(, ) C lr +define(, ) C Overlaps CTX input +define(, <[sp]>) + + +define(, < + and T0, MASK, $1, lsl #2 + ldr $5, [TABLE, T0] + and T0, MASK, $2, lsl #2 + ldr $6, [TABLE, T0] + and T0, MASK, $3, lsl #2 + ldr $7, [TABLE, T0] + and T0, MASK, $4, lsl #2 + ldr $8, [TABLE, T0] + + and T0, MASK, $4, ror #6 + add TABLE, TABLE, #1024 + ldr T0, [TABLE, T0] + eor $5, $5, T0 + and T0, MASK, $1, ror #6 + ldr T0, [TABLE, T0] + eor $6, $6, T0 + and T0, MASK, $2, ror #6 + ldr T0, [TABLE, T0] + eor $7, $7, T0 + and T0, MASK, $3, ror #6 + ldr T0, [TABLE, T0] + eor $8, $8, T0 + + and T0, MASK, $3, ror #14 + add TABLE, TABLE, #1024 + ldr T0, [TABLE, T0] + eor $5, $5, T0 + and T0, MASK, $4, ror #14 + ldr T0, [TABLE, T0] + eor $6, $6, T0 + and T0, MASK, $1, ror #14 + ldr T0, [TABLE, T0] + eor $7, $7, T0 + and T0, MASK, $2, ror #14 + ldr T0, [TABLE, T0] + eor $8, $8, T0 + + and T0, MASK, $2, ror #22 + add TABLE, TABLE, #1024 + ldr T0, [TABLE, T0] + eor $5, $5, T0 + and T0, MASK, $3, ror #22 + ldr T0, [TABLE, T0] + eor $6, $6, T0 + and T0, MASK, $4, ror #22 + ldr T0, [TABLE, T0] + eor $7, $7, T0 + and T0, MASK, $1, ror #22 + ldr T0, [TABLE, T0] + + ldm $9!, {$1,$2,$3,$4} + eor $8, $8, T0 + sub TABLE, TABLE, #3072 + eor $5, $5, $1 + eor $6, $6, $2 + eor $7, $7, $3 + eor $8, $8, $4 +>) + + .file "aes-decrypt-internal.asm" + + C _aes_decrypt(struct aes_context *ctx, + C const struct aes_table *T, + C size_t length, uint8_t *dst, + C uint8_t *src) + .text + ALIGN(4) +PROLOGUE(_nettle_aes_decrypt) + teq LENGTH, #0 + beq .Lend + ldr SRC, [sp] + + push {r0, r4,r5,r6,r7,r8,r10,r11,lr} + mov MASK, #0x3fc + ALIGN(16) +.Lblock_loop: + ldr KEY, CTX + ldr ROUND, [KEY, #+AES_NROUNDS] + AES_LOAD(SRC,KEY,W0) + AES_LOAD(SRC,KEY,W1) + AES_LOAD(SRC,KEY,W2) + AES_LOAD(SRC,KEY,W3) + + push {LENGTH, DST, SRC} + add TABLE, TABLE, #AES_TABLE0 + + b .Lentry + ALIGN(16) +.Lround_loop: + C Transform X -> W + AES_DECRYPT_ROUND(X0, X1, X2, X3, W0, W1, W2, W3, KEY) + +.Lentry: + subs ROUND, ROUND,#2 + C Transform W -> X + AES_DECRYPT_ROUND(W0, W1, W2, W3, X0, X1, X2, X3, KEY) + + bne .Lround_loop + + lsr ROUND, MASK, #2 C Put the needed mask in the unused ROUND register + sub TABLE, TABLE, #AES_TABLE0 + C Final round + AES_FINAL_ROUND_V5(X0, X3, X2, X1, KEY, W0, ROUND) + AES_FINAL_ROUND_V5(X1, X0, X3, X2, KEY, W1, ROUND) + AES_FINAL_ROUND_V5(X2, X1, X0, X3, KEY, W2, ROUND) + AES_FINAL_ROUND_V5(X3, X2, X1, X0, KEY, W3, ROUND) + + pop {LENGTH, DST, SRC} + + AES_STORE(DST,W0) + AES_STORE(DST,W1) + AES_STORE(DST,W2) + AES_STORE(DST,W3) + + subs LENGTH, LENGTH, #16 + bhi .Lblock_loop + + add sp, sp, #4 C Drop saved r0 + pop {r4,r5,r6,r7,r8,r10,r11,pc} + +.Lend: + bx lr +EPILOGUE(_nettle_aes_decrypt) diff --git a/arm/aes-encrypt-internal.asm b/arm/aes-encrypt-internal.asm new file mode 100644 index 00000000..eb2f1489 --- /dev/null +++ b/arm/aes-encrypt-internal.asm @@ -0,0 +1,176 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013 Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + +include_src() + +C Benchmarked at at 725, 930, 990 cycles/block on cortex A9, +C for 128, 192 and 256 bit key sizes. + +C Possible improvements: More efficient load and store with +C aligned accesses. Better scheduling. + +C define(, ) +define(
, ) +define(, ) +define(, ) +define(, ) + +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) + +define(, ) C Overlaps LENGTH, SRC, DST +define(, ) +define(, ) +define(, ) C lr +define(, ) C Overlaps CTX input +define(, <[sp]>) + + +C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key) +C MASK should hold the constant 0x3fc. +define(, < + + and T0, MASK, $1, lsl #2 + ldr $5, [TABLE, T0] + and T0, MASK, $2, lsl #2 + ldr $6, [TABLE, T0] + and T0, MASK, $3, lsl #2 + ldr $7, [TABLE, T0] + and T0, MASK, $4, lsl #2 + ldr $8, [TABLE, T0] + + and T0, MASK, $2, ror #6 + add TABLE, TABLE, #1024 + ldr T0, [TABLE, T0] + eor $5, $5, T0 + and T0, MASK, $3, ror #6 + ldr T0, [TABLE, T0] + eor $6, $6, T0 + and T0, MASK, $4, ror #6 + ldr T0, [TABLE, T0] + eor $7, $7, T0 + and T0, MASK, $1, ror #6 + ldr T0, [TABLE, T0] + eor $8, $8, T0 + + and T0, MASK, $3, ror #14 + add TABLE, TABLE, #1024 + ldr T0, [TABLE, T0] + eor $5, $5, T0 + and T0, MASK, $4, ror #14 + ldr T0, [TABLE, T0] + eor $6, $6, T0 + and T0, MASK, $1, ror #14 + ldr T0, [TABLE, T0] + eor $7, $7, T0 + and T0, MASK, $2, ror #14 + ldr T0, [TABLE, T0] + eor $8, $8, T0 + + and T0, MASK, $4, ror #22 + add TABLE, TABLE, #1024 + ldr T0, [TABLE, T0] + eor $5, $5, T0 + and T0, MASK, $1, ror #22 + ldr T0, [TABLE, T0] + eor $6, $6, T0 + and T0, MASK, $2, ror #22 + ldr T0, [TABLE, T0] + eor $7, $7, T0 + and T0, MASK, $3, ror #22 + ldr T0, [TABLE, T0] + + ldm $9!, {$1,$2,$3,$4} + eor $8, $8, T0 + sub TABLE, TABLE, #3072 + eor $5, $5, $1 + eor $6, $6, $2 + eor $7, $7, $3 + eor $8, $8, $4 +>) + + .file "aes-encrypt-internal.asm" + + C _aes_encrypt(struct aes_context *ctx, + C const struct aes_table *T, + C size_t length, uint8_t *dst, + C uint8_t *src) + .text + ALIGN(4) +PROLOGUE(_nettle_aes_encrypt) + teq LENGTH, #0 + beq .Lend + ldr SRC, [sp] + + push {r0, r4,r5,r6,r7,r8,r10,r11,lr} + mov MASK, #0x3fc + ALIGN(16) +.Lblock_loop: + ldr KEY, CTX + ldr ROUND, [KEY, #+AES_NROUNDS] + AES_LOAD(SRC,KEY,W0) + AES_LOAD(SRC,KEY,W1) + AES_LOAD(SRC,KEY,W2) + AES_LOAD(SRC,KEY,W3) + + push {LENGTH, DST, SRC} + add TABLE, TABLE, #AES_TABLE0 + + b .Lentry + ALIGN(16) +.Lround_loop: + C Transform X -> W + AES_ENCRYPT_ROUND(X0, X1, X2, X3, W0, W1, W2, W3, KEY) + +.Lentry: + subs ROUND, ROUND,#2 + C Transform W -> X + AES_ENCRYPT_ROUND(W0, W1, W2, W3, X0, X1, X2, X3, KEY) + + bne .Lround_loop + + lsr ROUND, MASK, #2 C Put the needed mask in the unused ROUND register + sub TABLE, TABLE, #AES_TABLE0 + C Final round + AES_FINAL_ROUND_V5(X0, X1, X2, X3, KEY, W0, ROUND) + AES_FINAL_ROUND_V5(X1, X2, X3, X0, KEY, W1, ROUND) + AES_FINAL_ROUND_V5(X2, X3, X0, X1, KEY, W2, ROUND) + AES_FINAL_ROUND_V5(X3, X0, X1, X2, KEY, W3, ROUND) + + pop {LENGTH, DST, SRC} + + AES_STORE(DST,W0) + AES_STORE(DST,W1) + AES_STORE(DST,W2) + AES_STORE(DST,W3) + + subs LENGTH, LENGTH, #16 + bhi .Lblock_loop + + add sp, sp, #4 C Drop saved r0 + pop {r4,r5,r6,r7,r8,r10,r11,pc} + +.Lend: + bx lr +EPILOGUE(_nettle_aes_encrypt) diff --git a/arm/aes.m4 b/arm/aes.m4 index a509b754..91f340a1 100644 --- a/arm/aes.m4 +++ b/arm/aes.m4 @@ -38,3 +38,21 @@ define(, < ldr T0, [$5], #+4 eor $6, $6, T0 >) + +C AES_FINAL_ROUND_V5(a,b,c,d,key,res,mask) +C Avoids the uxtb instruction, introduced in ARMv6. +C The mask argument should hold the constant 0xff +define(, < + and T0, $7, $1 + ldrb $6, [TABLE, T0] + and T0, $7, $2, ror #8 + ldrb T0, [TABLE, T0] + eor $6, $6, T0, lsl #8 + and T0, $7, $3, ror #16 + ldrb T0, [TABLE, T0] + eor $6, $6, T0, lsl #16 + ldrb T0, [TABLE, $4, lsr #24] + eor $6, $6, T0, lsl #24 + ldr T0, [$5], #+4 + eor $6, T0 +>) -- GitLab