diff --git a/x86_64/serpent-decrypt.asm b/x86_64/serpent-decrypt.asm new file mode 100644 index 0000000000000000000000000000000000000000..6836c7bec6fd4dbc2cf6859f5cd59be641fa5258 --- /dev/null +++ b/x86_64/serpent-decrypt.asm @@ -0,0 +1,327 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2011 Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +C MA 02111-1307, USA. + +C Register usage: + +C Single block serpent state, two copies +define(, <%eax>) +define(, <%ebx>) +define(, <%ebp>) +define(, <%r8d>) + +define(, <%r9d>) +define(, <%r10d>) +define(, <%r11d>) +define(, <%r12d>) + +C Quadruple block serpent state, two copies +define(, <%xmm0>) +define(, <%xmm1>) +define(, <%xmm2>) +define(, <%xmm3>) + +define(, <%xmm4>) +define(, <%xmm5>) +define(, <%xmm6>) +define(, <%xmm7>) + +define(, <%xmm8>) +define(, <%xmm9>) +define(, <%xmm10>) +define(, <%xmm11>) +define(, <%xmm12>) + +C Arguments +define(, <%rdi>) +define(, <%rsi>) +define(, <%rdx>) +define(, <%rcx>) + +define(, <%r13>) +define(, <%r14d>) + +C SBOX macros. Inputs $1 - $4 (destroyed), outputs $5 - $8 + +define(, < + mov $1, $5 + xor $3, $5 + mov $1, $7 + or $2, $7 + mov $3, $6 + xor $4, $6 + xor $6, $7 + and $3, $6 + or $2, $3 + xor $4, $2 + or $1, $6 + and $3, $2 + xor $2, $6 + or $7, $1 + xor $6, $1 + mov $7, $2 + xor $1, $2 + not $7 + or $7, $4 + xor $3, $4 + mov $1, $8 + xor $4, $8 + or $4, $2 + xor $2, $5 +>) + +define(, < + mov $2, $6 + or $4, $6 + xor $6, $3 + mov $1, $8 + xor $2, $8 + mov $1, $5 + or $6, $5 + and $8, $5 + xor $5, $2 + xor $6, $8 + and $4, $2 + mov $1, $7 + and $3, $7 + or $7, $6 + or $4, $7 + xor $5, $7 + not $7 + xor $2, $6 + xor $6, $5 + xor $3, $5 + or $7, $1 + xor $1, $5 +>) + +define(, < + mov $1, $5 + xor $4, $5 + mov $3, $7 + xor $4, $7 + mov $2, $6 + or $7, $6 + xor $6, $5 + mov $4, $6 + or $5, $6 + and $2, $6 + not $4 + mov $1, $8 + or $3, $8 + and $8, $7 + xor $7, $6 + and $2, $8 + and $3, $1 + or $4, $1 + xor $1, $8 + and $8, $3 + xor $1, $3 + mov $5, $7 + xor $6, $7 + xor $3, $7 +>) + +define(, < + mov $3, $8 + or $4, $8 + mov $2, $5 + and $8, $5 + mov $1, $7 + or $4, $7 + mov $3, $6 + xor $7, $6 + xor $6, $5 + xor $1, $4 + xor $4, $8 + xor $2, $7 + and $6, $7 + xor $4, $7 + xor $1, $6 + or $5, $4 + and $4, $6 + xor $2, $6 + and $7, $1 + or $2, $1 + xor $1, $8 +>) + +define(, < + mov $3, $6 + xor $4, $6 + mov $3, $7 + or $4, $7 + xor $2, $7 + or $4, $2 + mov $1, $5 + xor $7, $5 + xor $7, $4 + and $1, $7 + xor $7, $6 + xor $1, $7 + or $3, $7 + and $2, $1 + mov $1, $8 + xor $4, $8 + not $1 + or $6, $1 + xor $1, $5 + xor $2, $1 + xor $1, $7 +>) + +define(, < + mov $1, $6 + and $4, $6 + mov $3, $8 + xor $6, $8 + mov $2, $5 + and $8, $5 + mov $1, $7 + xor $4, $7 + xor $2, $4 + xor $7, $5 + and $1, $3 + and $5, $1 + or $2, $3 + xor $5, $6 + xor $3, $6 + mov $5, $7 + or $6, $7 + xor $8, $7 + xor $4, $7 + not $2 + or $1, $2 + xor $2, $8 +>) + +define(, < + mov $1, $7 + xor $3, $7 + not $3 + mov $2, $5 + xor $4, $5 + mov $1, $6 + or $3, $6 + xor $5, $6 + mov $2, $8 + and $7, $8 + or $4, $8 + or $3, $4 + or $2, $3 + and $1, $3 + mov $3, $5 + xor $8, $5 + not $5 + and $7, $8 + xor $3, $8 + xor $6, $1 + xor $1, $8 + and $5, $2 + xor $2, $7 + xor $4, $7 +>) + +define(, < + mov $1, $8 + and $2, $8 + mov $2, $7 + xor $4, $7 + or $8, $7 + mov $1, $6 + or $4, $6 + and $4, $6 + xor $6, $7 + or $3, $8 + mov $1, $5 + or $2, $5 + and $4, $5 + xor $5, $8 + xor $2, $5 + mov $4, $6 + xor $8, $6 + not $6 + or $5, $6 + xor $3, $5 + xor $1, $6 + or $6, $4 + xor $4, $5 +>) + +define(, < + rol <$>10, $3 + rol <$>27, $1 + mov $2, TMP32 + shl <$>7, TMP32 + xor $4, $3 + xor TMP32, $3 + xor $2, $1 + xor $4, $1 + rol <$>25, $4 + rol <$>31, $2 + mov $1, TMP32 + shl <$>3, TMP32 + xor $3, $4 + xor TMP32, $4 + xor $1, $2 + xor $3, $2 + rol <$>29, $3 + rol <$>19, $1 +>) + + .file "serpent-decrypt.asm" + + C serpent_decrypt(struct serpent_context *ctx, + C unsigned length, uint8_t *dst, + C const uint8_t *src) + .text + ALIGN(4) +PROLOGUE(nettle_serpent_decrypt) + C save all registers that need to be saved + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + + lea (SRC, N), SRC + lea (DST, N), DST + neg N + jz .Lend + +.Lblock_loop: + movl (SRC, N), x0 + movl 4(SRC, N), x1 + movl 8(SRC, N), x2 + movl 12(SRC, N), x3 + + + movl x0, (DST, N) + movl x1, 4(DST, N) + movl x2, 8(DST, N) + movl x3, 12(DST, N) + add $16, N + jnc .Lblock_loop + +.Lend: + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + ret