diff --git a/x86_64/camellia-crypt-internal.asm b/x86_64/camellia-crypt-internal.asm new file mode 100644 index 0000000000000000000000000000000000000000..19c21ea131194f02d3d972e09716c96b0583c29b --- /dev/null +++ b/x86_64/camellia-crypt-internal.asm @@ -0,0 +1,174 @@ +C -*- mode: asm; asm-comment-char: ?C; -*- +C nettle, low-level cryptographics library +C +C Copyright (C) 2010, Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +C MA 02111-1307, USA. + + +C Register usage: + +define(<CTX>, <%rdi>) +define(<TABLE>, <%rsi>) +define(<LENGTH>, <%rdx>) +define(<DST>, <%rcx>) +define(<SRC>, <%r8>) + +C Camellia state +define(<I0>, <%rax>) +define(<I1>, <%rbx>) C callee-save +define(<KEY>, <%r9>) +define(<TMP>, <%rbp>) C callee-save +define(<CNT>, <%r10>) +define(<IL>, <%r11>) +define(<IR>, <%r12>) C callee-save + +define(<SP1110>, <(TABLE,$1,4)>) +define(<SP0222>, <1024(TABLE,$1,4)>) +define(<SP3033>, <2048(TABLE,$1,4)>) +define(<SP4404>, <3072(TABLE,$1,4)>) + +C ROUND(x, y, key-offset) +define(<ROUND>, < + movzbl LREG($1), XREG(TMP) + movl SP1110(TMP), XREG(IR) + movzbl HREG($1), XREG(TMP) + xorl SP4404(TMP), XREG(IR) + ror <$>16, $1 + + movzbl LREG($1), XREG(TMP) + xorl SP3033(TMP), XREG(IR) + movzbl HREG($1), XREG(TMP) + xorl SP0222(TMP), XREG(IR) + ror <$>16, $1 + + movzbl LREG($1), XREG(TMP) + movl SP4404(TMP), XREG(IL) + movzbl HREG($1), XREG(TMP) + xorl SP3033(TMP), XREG(IL) + ror <$>16, $1 + + movzbl LREG($1), XREG(TMP) + xorl SP0222(TMP), XREG(IL) + movzbl HREG($1), XREG(TMP) + xorl SP1110(TMP), XREG(IL) + ror <$>16, $1 + + xorl XREG(IL), XREG(IR) + rorl <$>8, XREG(IL) + xorl XREG(IR), XREG(IL) + shl <$>32, IR + or IL, IR + xor $3(KEY), $2 + xor IR, $2 +>) + +C FL(x, key-offset) +define(<FL>, < + mov $1, TMP + shr <$>32, TMP + andl $2 + 4(KEY), XREG(TMP) + roll <$>1, XREG(TMP) +C xorl XREG(TMP), XREG($1) + xor TMP, $1 + movl $2(KEY), XREG(TMP) + orl XREG($1), XREG(TMP) + shl <$>32, TMP + xor TMP, $1 +>) +C FLINV(x0, key-offset) +define(<FLINV>, < + movl $2(KEY), XREG(TMP) + orl XREG($1), XREG(TMP) + shl <$>32, TMP + xor TMP, $1 + mov $1, TMP + shr <$>32, TMP + andl $2 + 4(KEY), XREG(TMP) + roll <$>1, XREG(TMP) +C xorl XREG(TMP), XREG($1) + xor TMP, $1 +>) + + .file "camellia-encrypt-internal.asm" + + C _camellia_crypt(struct camellia_context *ctx, + C const struct camellia_table *T, + C unsigned length, uint8_t *dst, + C uint8_t *src) + .text + ALIGN(4) +PROLOGUE(_nettle_camellia_crypt) + + test LENGTH, LENGTH + jz .Lend + + push %rbx + push %rbp + push %r12 + +.Lblock_loop: + C Load data, note that we'll happily do unaligned loads + mov (SRC), I0 + bswap I0 + mov 8(SRC), I1 + bswap I1 + add $16, SRC + mov CTX, KEY + movl (KEY), XREG(CNT) + sub $8, CNT + + C Whitening using first subkey + xor 8(KEY), I0 + add $16, KEY + + ROUND(I0, I1, 0) + ROUND(I1, I0, 8) + ROUND(I0, I1, 16) + ROUND(I1, I0, 24) + ROUND(I0, I1, 32) + ROUND(I1, I0, 40) + +.Lround_loop: + add $64, KEY + FL(I0, -16) + FLINV(I1, -8) + ROUND(I0, I1, 0) + ROUND(I1, I0, 8) + ROUND(I0, I1, 16) + ROUND(I1, I0, 24) + ROUND(I0, I1, 32) + ROUND(I1, I0, 40) + + sub $8, CNT + ja .Lround_loop + + bswap I0 + mov I0, 8(DST) + xor 48(KEY), I1 + bswap I1 + mov I1, (DST) + add $16, DST + sub $16, LENGTH + + ja .Lblock_loop + + pop %r12 + pop %rbp + pop %rbx +.Lend: + ret +EPILOGUE(_nettle_camellia_crypt)