poly1305-internal.asm 3.72 KB
Newer Older
Niels Möller's avatar
Niels Möller committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
C nettle, low-level cryptographics library
C 
C Copyright (C) 2013 Niels Möller
C
C The nettle library is free software; you can redistribute it and/or modify
C it under the terms of the GNU Lesser General Public License as published by
C the Free Software Foundation; either version 2.1 of the License, or (at your
C option) any later version.
C 
C The nettle library is distributed in the hope that it will be useful, but
C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
C or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
C License for more details.
C 
C You should have received a copy of the GNU Lesser General Public License
C along with the nettle library; see the file COPYING.LIB.  If not, write to
C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
C MA 02111-1301, USA.

	.file "poly1305-internal.asm"

C Registers mainly used by poly1305_block
define(<CTX>, <%rdi>)
define(<T0>, <%rcx>)
define(<T1>, <%rsi>)
define(<T2>, <%r8>)
define(<H0>, <%r9>)
define(<H1>, <%r10>)
define(<H2>, <%r11>)
	
	C poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16])
	.text
	C Registers:
	C  %rdi: ctx
	C  %rsi: key
	C  %r8: mask
	ALIGN(16)
PROLOGUE(nettle_poly1305_set_key)
	W64_ENTRY(2,0)
	mov	$0x0ffffffc0fffffff, %r8
	mov	(%rsi), %rax
	and	%r8, %rax
	and	$-4, %r8
	mov	%rax, (CTX)
	mov	8(%rsi), %rax
	and	%r8, %rax
	mov	%rax, P1305_R1 (CTX)
	shr	$2, %rax
	imul	$5, %rax
	mov	%rax, P1305_S1 (CTX)
	xor	XREG(%rax), XREG(%rax)
	mov	%rax, P1305_H0 (CTX)
	mov	%rax, P1305_H1 (CTX)
	mov	XREG(%rax), P1305_H2 (CTX)
	
	W64_EXIT(2,0)
	ret

EPILOGUE(nettle_poly1305_set_key)

C 64-bit multiplication mod 2^130 - 5
C
63
C (x_0 + B x_1 + B^2 x_2) * (r_0 + B r_1) =
Niels Möller's avatar
Niels Möller committed
64
65
66
67
68
69
70
71
72
73
74
C     1   B B^2 B^3 
C   x_0 r_0
C       x_0 r_1
C	x_1 r_0
C	    x_1 r_1
C	    x_2 r_0
C               x_2 r_1
C Then r_1 B^2 = r_1/4 (2^130) = 5/4 r_1.
C and  r_1 B^3 = 5/4 B r_1
C So we get
C
75
76
77
78
79
80
81
82
C  x_0 r_0 + x_1 (5/4 r_1) + B (x_0 r_1 + x_1 r_0 + x_2 5/4 r_1 + B x_2 r_0)
C     1   B B^2 B^3 
C   x_0 r_0
C   x_1 r'_1
C       x_0 r_1
C	x_1 r_0
C       x_2 r'_1
C           x_2 r_0
Niels Möller's avatar
Niels Möller committed
83

84
	C _poly1305_block (struct poly1305_ctx *ctx, const uint8_t m[16], unsigned hi)
Niels Möller's avatar
Niels Möller committed
85
	
86
87
PROLOGUE(_nettle_poly1305_block)
	W64_ENTRY(3, 0)
Niels Möller's avatar
Niels Möller committed
88
89
	mov	(%rsi), T0
	mov	8(%rsi), T1
90
	mov	XREG(%rdx),	XREG(T2)
91

Niels Möller's avatar
Niels Möller committed
92
93
94
95
96
97
98
99
	C Registers:
	C Inputs:  CTX, T0, T1, T2,
	C Outputs: H0, H1, H2, stored into the context.

	add	P1305_H0 (CTX), T0
	adc	P1305_H1 (CTX), T1
	adc	P1305_H2 (CTX), XREG(T2)
	mov	P1305_R0 (CTX), %rax
100
	mul	T0			C x0*r0
Niels Möller's avatar
Niels Möller committed
101
102
103
104
	mov	%rax, H0
	mov	%rdx, H1
	mov	P1305_S1 (CTX), %rax	C 5/4 r1
	mov	%rax, H2
105
106
107
	mul	T1			C x1*r1'
	imul	T2, H2			C x2*r1'
	imul	P1305_R0 (CTX), T2	C x2*r0
Niels Möller's avatar
Niels Möller committed
108
109
110
	add	%rax, H0
	adc	%rdx, H1
	mov	P1305_R0 (CTX), %rax
111
	mul	T1			C x1*r0
Niels Möller's avatar
Niels Möller committed
112
113
114
	add	%rax, H2
	adc	%rdx, T2
	mov	P1305_R1 (CTX), %rax
115
	mul	T0			C x0*r1
Niels Möller's avatar
Niels Möller committed
116
117
118
119
120
121
122
123
124
125
126
127
	add	%rax, H2
	adc	%rdx, T2
	mov	T2, %rax
	shr	$2, %rax
	imul	$5, %rax
	and	$3, XREG(T2)
	add	%rax, H0
	adc	H2, H1
	adc	$0, XREG(T2)
	mov	H0, P1305_H0 (CTX)
	mov	H1, P1305_H1 (CTX)
	mov	XREG(T2), P1305_H2 (CTX)
128
	W64_EXIT(3, 0)
Niels Möller's avatar
Niels Möller committed
129
	ret
130
EPILOGUE(_nettle_poly1305_block)
Niels Möller's avatar
Niels Möller committed
131

132
	C poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s)
Niels Möller's avatar
Niels Möller committed
133
134
	C Registers:
	C   %rdi: ctx
135
	C   %rsi: s
Niels Möller's avatar
Niels Möller committed
136
137
	
PROLOGUE(nettle_poly1305_digest)
138
	W64_ENTRY(2, 0)
Niels Möller's avatar
Niels Möller committed
139
140
141
142
143
144
145
146
147
148
149
150

	mov	P1305_H0 (CTX), H0
	mov	P1305_H1 (CTX), H1
	mov	P1305_H2 (CTX), XREG(H2)
	mov	XREG(H2), XREG(%rax)
	shr	$2, XREG(%rax)
	and	$3, H2
	imul	$5, XREG(%rax)
	add	%rax, H0
	adc	$0, H1
	adc	$0, XREG(H2)

151
152
C Use %rax instead of %rsi
define(<T1>, <%rax>)
Niels Möller's avatar
Niels Möller committed
153
154
155
156
157
158
159
160
161
162
	C Add 5, use result if >= 2^130
	mov	$5, T0
	xor	T1, T1
	add	H0, T0
	adc	H1, T1
	adc	$0, XREG(H2)
	cmp	$4, XREG(H2)
	cmovnc	T0, H0
	cmovnc	T1, H1

163
164
	add	H0, (%rsi)
	adc	H1, 8(%rsi)
Niels Möller's avatar
Niels Möller committed
165
166
167
168
169

	xor	XREG(%rax), XREG(%rax)
	mov	%rax, P1305_H0 (CTX)
	mov	%rax, P1305_H1 (CTX)
	mov	XREG(%rax), P1305_H2 (CTX)
170
	W64_EXIT(2, 0)
Niels Möller's avatar
Niels Möller committed
171
172
	ret