From 226c885c9f971e4e5fa07e10e43c96671fc8352a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= <nisse@lysator.liu.se> Date: Wed, 29 Jun 2011 15:38:15 +0200 Subject: [PATCH] New file. Rev: nettle/x86_64/serpent-decrypt.asm:1.1 --- x86_64/serpent-decrypt.asm | 327 +++++++++++++++++++++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 x86_64/serpent-decrypt.asm diff --git a/x86_64/serpent-decrypt.asm b/x86_64/serpent-decrypt.asm new file mode 100644 index 00000000..6836c7be --- /dev/null +++ b/x86_64/serpent-decrypt.asm @@ -0,0 +1,327 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2011 Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +C MA 02111-1307, USA. + +C Register usage: + +C Single block serpent state, two copies +define(<x0>, <%eax>) +define(<x1>, <%ebx>) +define(<x2>, <%ebp>) +define(<x3>, <%r8d>) + +define(<y0>, <%r9d>) +define(<y1>, <%r10d>) +define(<y2>, <%r11d>) +define(<y3>, <%r12d>) + +C Quadruple block serpent state, two copies +define(<X0>, <%xmm0>) +define(<X1>, <%xmm1>) +define(<X2>, <%xmm2>) +define(<X3>, <%xmm3>) + +define(<Y0>, <%xmm4>) +define(<Y1>, <%xmm5>) +define(<Y2>, <%xmm6>) +define(<Y3>, <%xmm7>) + +define(<MINUS1>, <%xmm8>) +define(<T0>, <%xmm9>) +define(<T1>, <%xmm10>) +define(<T2>, <%xmm11>) +define(<T3>, <%xmm12>) + +C Arguments +define(<CTX>, <%rdi>) +define(<N>, <%rsi>) +define(<DST>, <%rdx>) +define(<SRC>, <%rcx>) + +define(<CNT>, <%r13>) +define(<TMP32>, <%r14d>) + +C SBOX macros. Inputs $1 - $4 (destroyed), outputs $5 - $8 + +define(<SBOX0I>, < + mov $1, $5 + xor $3, $5 + mov $1, $7 + or $2, $7 + mov $3, $6 + xor $4, $6 + xor $6, $7 + and $3, $6 + or $2, $3 + xor $4, $2 + or $1, $6 + and $3, $2 + xor $2, $6 + or $7, $1 + xor $6, $1 + mov $7, $2 + xor $1, $2 + not $7 + or $7, $4 + xor $3, $4 + mov $1, $8 + xor $4, $8 + or $4, $2 + xor $2, $5 +>) + +define(<SBOX1I>, < + mov $2, $6 + or $4, $6 + xor $6, $3 + mov $1, $8 + xor $2, $8 + mov $1, $5 + or $6, $5 + and $8, $5 + xor $5, $2 + xor $6, $8 + and $4, $2 + mov $1, $7 + and $3, $7 + or $7, $6 + or $4, $7 + xor $5, $7 + not $7 + xor $2, $6 + xor $6, $5 + xor $3, $5 + or $7, $1 + xor $1, $5 +>) + +define(<SBOX2I>, < + mov $1, $5 + xor $4, $5 + mov $3, $7 + xor $4, $7 + mov $2, $6 + or $7, $6 + xor $6, $5 + mov $4, $6 + or $5, $6 + and $2, $6 + not $4 + mov $1, $8 + or $3, $8 + and $8, $7 + xor $7, $6 + and $2, $8 + and $3, $1 + or $4, $1 + xor $1, $8 + and $8, $3 + xor $1, $3 + mov $5, $7 + xor $6, $7 + xor $3, $7 +>) + +define(<SBOX3I>, < + mov $3, $8 + or $4, $8 + mov $2, $5 + and $8, $5 + mov $1, $7 + or $4, $7 + mov $3, $6 + xor $7, $6 + xor $6, $5 + xor $1, $4 + xor $4, $8 + xor $2, $7 + and $6, $7 + xor $4, $7 + xor $1, $6 + or $5, $4 + and $4, $6 + xor $2, $6 + and $7, $1 + or $2, $1 + xor $1, $8 +>) + +define(<SBOX4I>, < + mov $3, $6 + xor $4, $6 + mov $3, $7 + or $4, $7 + xor $2, $7 + or $4, $2 + mov $1, $5 + xor $7, $5 + xor $7, $4 + and $1, $7 + xor $7, $6 + xor $1, $7 + or $3, $7 + and $2, $1 + mov $1, $8 + xor $4, $8 + not $1 + or $6, $1 + xor $1, $5 + xor $2, $1 + xor $1, $7 +>) + +define(<SBOX5I>, < + mov $1, $6 + and $4, $6 + mov $3, $8 + xor $6, $8 + mov $2, $5 + and $8, $5 + mov $1, $7 + xor $4, $7 + xor $2, $4 + xor $7, $5 + and $1, $3 + and $5, $1 + or $2, $3 + xor $5, $6 + xor $3, $6 + mov $5, $7 + or $6, $7 + xor $8, $7 + xor $4, $7 + not $2 + or $1, $2 + xor $2, $8 +>) + +define(<SBOX6I>, < + mov $1, $7 + xor $3, $7 + not $3 + mov $2, $5 + xor $4, $5 + mov $1, $6 + or $3, $6 + xor $5, $6 + mov $2, $8 + and $7, $8 + or $4, $8 + or $3, $4 + or $2, $3 + and $1, $3 + mov $3, $5 + xor $8, $5 + not $5 + and $7, $8 + xor $3, $8 + xor $6, $1 + xor $1, $8 + and $5, $2 + xor $2, $7 + xor $4, $7 +>) + +define(<SBOX7I>, < + mov $1, $8 + and $2, $8 + mov $2, $7 + xor $4, $7 + or $8, $7 + mov $1, $6 + or $4, $6 + and $4, $6 + xor $6, $7 + or $3, $8 + mov $1, $5 + or $2, $5 + and $4, $5 + xor $5, $8 + xor $2, $5 + mov $4, $6 + xor $8, $6 + not $6 + or $5, $6 + xor $3, $5 + xor $1, $6 + or $6, $4 + xor $4, $5 +>) + +define(<LTI>, < + rol <$>10, $3 + rol <$>27, $1 + mov $2, TMP32 + shl <$>7, TMP32 + xor $4, $3 + xor TMP32, $3 + xor $2, $1 + xor $4, $1 + rol <$>25, $4 + rol <$>31, $2 + mov $1, TMP32 + shl <$>3, TMP32 + xor $3, $4 + xor TMP32, $4 + xor $1, $2 + xor $3, $2 + rol <$>29, $3 + rol <$>19, $1 +>) + + .file "serpent-decrypt.asm" + + C serpent_decrypt(struct serpent_context *ctx, + C unsigned length, uint8_t *dst, + C const uint8_t *src) + .text + ALIGN(4) +PROLOGUE(nettle_serpent_decrypt) + C save all registers that need to be saved + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + + lea (SRC, N), SRC + lea (DST, N), DST + neg N + jz .Lend + +.Lblock_loop: + movl (SRC, N), x0 + movl 4(SRC, N), x1 + movl 8(SRC, N), x2 + movl 12(SRC, N), x3 + + + movl x0, (DST, N) + movl x1, 4(DST, N) + movl x2, 8(DST, N) + movl x3, 12(DST, N) + add $16, N + jnc .Lblock_loop + +.Lend: + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + ret -- GitLab