Skip to content
Snippets Groups Projects
Commit 658334ab authored by Maamoun TK's avatar Maamoun TK
Browse files

Correct comments in x86_64/poly1305.m4

parent d1b47028
No related branches found
No related tags found
No related merge requests found
......@@ -224,7 +224,7 @@ PROLOGUE(_nettle_poly1305_blocks)
lea .Lmask26(%rip), %rax
vmovdqa 0(%rax), MASK26
cmp $POLY1305_BLOCK_THRESHOLD, BLOCKS
cmp $POLY1305_BLOCK_THRESHOLD_AVX2, BLOCKS
jb L1B
vmovq P1305_R0 (CTX), XMM(C0) C R0
......
C Threshold of processing multiple blocks in parallel
C of a multiple of 4 with a minimum 8 blocks
define(`POLY1305_BLOCK_THRESHOLD', `32')
C CTX is the address of context where key and pre-computed values are stored
C DATA is the address of input block
C PADBYTE is padding byte for input block
C GPR0 is the starting register of sequential general-purpose registers
C used in the macro of following layout
C GPR0, GPR1, GPR2 are inputs representing the previous state radix 2^64
C GPR3, GPR4 are temporary registers
C VR0 is the starting register of sequential vector resigers used in
C the macro of following layout
C VR0, VR1 are outputs representing the result state radix 2^64 sorted as follows
C (low 64-bit of VR0) + (low 64-bit of VR1) + (high 64-bit of VR1)
C VR2..VR12 are temporary registers
C BLOCK_R64(CTX, PAD, H0, H1, H2, T0, T1, F0, F1)
C using AVX2 code of a multiple of 4 with a minimum 8 blocks
define(`POLY1305_BLOCK_THRESHOLD_AVX2', `32')
C CTX contians key and pre-computed values
C Inputs T0, T1, T2 expect sum of prievous hash with padded input
C message in radix 64
C Output hash results of radix 64 are stored in H0, H1, T2 respectively
C F0, F1 are temporary general registers
C BLOCK_R64(CTX, H0, H1, T0, T1, T2, F0, F1)
define(`BLOCK_R64', `
mov P1305_R1 ($1), %rax
mul $4 C R1*$4
mul $4 C R1*T0
mov %rax, $7
mov %rdx, $8
mov $4, %rax C Last use of $4 input
mov $4, %rax C Last use of T0 input
mov P1305_R0 ($1), $4
mul $4 C R0*$4
mul $4 C R0*T0
mov %rax, $2
mov %rdx, $3
mov $5, %rax
mul $4 C R0*$5
mul $4 C R0*T1
add %rax, $7
adc %rdx, $8
mov P1305_S1 ($1), $4
mov $5, %rax C Last use of $5 input
mul $4 C S1*$5
mov $5, %rax C Last use of T1 input
mul $4 C S1*T1
add %rax, $2
adc %rdx, $3
mov $6, %rax
mul $4 C S1*$6
mul $4 C S1*T2
add %rax, $7
adc %rdx, $8
......@@ -48,11 +41,11 @@ define(`BLOCK_R64', `
shr `$'2, $4
mov P1305_S0 ($1), %rax
mul $4 C S0*($6 >> 2)
mul $4 C S0*(T2 >> 2)
add %rax, $2
adc %rdx, $3
imul P1305_R0 ($1), $6 C R0*($6 & 3)
imul P1305_R0 ($1), $6 C R0*(T2 & 3)
add $7, $3
adc $8, $6
')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment