1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // +build arm,!gccgo,!appengine,!nacl
9 // This code was translated into a form compatible with 5a from the public
10 // domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
12 DATA ·poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
13 DATA ·poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
14 DATA ·poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
15 DATA ·poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
16 DATA ·poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
17 GLOBL ·poly1305_init_constants_armv6<>(SB), 8, $20
19 // Warning: the linker may use R11 to synthesize certain instructions. Please
20 // take care and verify that no synthetic instructions use it.
22 TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT, $0
23 // Needs 16 bytes of stack and 64 bytes of space pointed to by R0. (It
24 // might look like it's only 60 bytes of space but the final four bytes
25 // will be written by another function.) We need to skip over four
26 // bytes of stack because that's saving the value of 'g'.
29 MOVM.IA.W (R1), [R2-R5]
30 MOVW $·poly1305_init_constants_armv6<>(SB), R7
45 MOVM.IA.W [R2-R6], (R0)
51 MOVM.IA.W [R2-R6], (R0)
52 MOVM.IA.W (R1), [R2-R5]
58 #define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
59 MOVBU (offset+0)(Rsrc), Rtmp; \
60 MOVBU Rtmp, (offset+0)(Rdst); \
61 MOVBU (offset+1)(Rsrc), Rtmp; \
62 MOVBU Rtmp, (offset+1)(Rdst); \
63 MOVBU (offset+2)(Rsrc), Rtmp; \
64 MOVBU Rtmp, (offset+2)(Rdst); \
65 MOVBU (offset+3)(Rsrc), Rtmp; \
66 MOVBU Rtmp, (offset+3)(Rdst)
68 TEXT poly1305_blocks_armv6<>(SB), NOSPLIT, $0
69 // Needs 24 bytes of stack for saved registers and then 88 bytes of
70 // scratch space after that. We assume that 24 bytes at (R13) have
71 // already been used: four bytes for the link register saved in the
72 // prelude of poly1305_auth_armv6, four bytes for saving the value of g
73 // in that function and 16 bytes of scratch space used around
74 // poly1305_finish_ext_armv6_skip1.
76 MOVM.IB [R4-R8, R14], (R12)
83 WORD $0xe1180008 // TST R8, R8 not working see issue 5921
91 BLO poly1305_blocks_armv6_done
93 poly1305_blocks_armv6_mainloop:
94 WORD $0xe31e0003 // TST R14, #3 not working see issue 5921
95 BEQ poly1305_blocks_armv6_mainloop_aligned
97 MOVW_UNALIGNED(R14, g, R0, 0)
98 MOVW_UNALIGNED(R14, g, R0, 4)
99 MOVW_UNALIGNED(R14, g, R0, 8)
100 MOVW_UNALIGNED(R14, g, R0, 12)
103 B poly1305_blocks_armv6_mainloop_loaded
105 poly1305_blocks_armv6_mainloop_aligned:
106 MOVM.IA.W (R14), [R0-R3]
108 poly1305_blocks_armv6_mainloop_loaded:
117 BIC $0xfc000000, R0, R0
118 BIC $0xfc000000, g, g
120 BIC $0xfc000000, R11, R11
121 BIC $0xfc000000, R12, R12
129 MOVM.IA (R14), [R0-R4]
130 MULLU R4, R5, (R11, g)
131 MULLU R3, R5, (R14, R12)
132 MULALU R3, R6, (R11, g)
133 MULALU R2, R6, (R14, R12)
134 MULALU R2, R7, (R11, g)
135 MULALU R1, R7, (R14, R12)
138 MULALU R1, R8, (R11, g)
139 MULALU R0, R8, (R14, R12)
140 MULALU R0, R9, (R11, g)
141 MULALU R4, R9, (R14, R12)
146 MULLU R2, R5, (R11, g)
147 MULLU R1, R5, (R14, R12)
148 MULALU R1, R6, (R11, g)
149 MULALU R0, R6, (R14, R12)
150 MULALU R0, R7, (R11, g)
151 MULALU R4, R7, (R14, R12)
154 MULALU R4, R8, (R11, g)
155 MULALU R3, R8, (R14, R12)
156 MULALU R3, R9, (R11, g)
157 MULALU R2, R9, (R14, R12)
162 MULLU R0, R5, (R11, g)
163 MULALU R4, R6, (R11, g)
164 MULALU R3, R7, (R11, g)
165 MULALU R2, R8, (R11, g)
166 MULALU R1, R9, (R11, g)
168 MOVM.IA (R0), [R0-R7]
173 BIC $0xfc000000, g, g
174 BIC $0xfc000000, R4, R4
183 BIC $0xfc000000, R0, R0
184 BIC $0xfc000000, R6, R6
192 BIC $0xfc000000, g, R5
193 BIC $0xfc000000, R2, R7
197 BIC $0xfc000000, R4, R8
205 BHS poly1305_blocks_armv6_mainloop
207 poly1305_blocks_armv6_done:
215 MOVM.DA (R0), [R4-R8, R14]
218 #define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
219 MOVBU.P 1(Rsrc), Rtmp; \
220 MOVBU.P Rtmp, 1(Rdst); \
221 MOVBU.P 1(Rsrc), Rtmp; \
222 MOVBU.P Rtmp, 1(Rdst)
224 #define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
225 MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
226 MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
228 // func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
229 TEXT ·poly1305_auth_armv6(SB), $196-16
230 // The value 196, just above, is the sum of 64 (the size of the context
231 // structure) and 132 (the amount of stack needed).
233 // At this point, the stack pointer (R13) has been moved down. It
234 // points to the saved link register and there's 196 bytes of free
237 // The stack for this function looks like:
239 // +---------------------
241 // | 64 bytes of context structure
243 // +---------------------
245 // | 112 bytes for poly1305_blocks_armv6
247 // +---------------------
248 // | 16 bytes of final block, constructed at
249 // | poly1305_finish_ext_armv6_skip8
250 // +---------------------
251 // | four bytes of saved 'g'
252 // +---------------------
253 // | lr, saved by prelude <- R13 points here
254 // +---------------------
262 ADD $136, R13, R0 // 136 = 4 + 4 + 16 + 112
265 // poly1305_init_ext_armv6 will write to the stack from R13+4, but
266 // that's ok because none of the other values have been written yet.
267 BL poly1305_init_ext_armv6<>(SB)
269 BEQ poly1305_auth_armv6_noblocks
274 BL poly1305_blocks_armv6<>(SB)
276 poly1305_auth_armv6_noblocks:
287 BEQ poly1305_finish_ext_armv6_noremaining
289 ADD $8, R13, R9 // 8 = offset to 16 byte scratch space
294 WORD $0xe3110003 // TST R1, #3 not working see issue 5921
295 BEQ poly1305_finish_ext_armv6_aligned
296 WORD $0xe3120008 // TST R2, #8 not working see issue 5921
297 BEQ poly1305_finish_ext_armv6_skip8
298 MOVWP_UNALIGNED(R1, R9, g)
299 MOVWP_UNALIGNED(R1, R9, g)
301 poly1305_finish_ext_armv6_skip8:
302 WORD $0xe3120004 // TST $4, R2 not working see issue 5921
303 BEQ poly1305_finish_ext_armv6_skip4
304 MOVWP_UNALIGNED(R1, R9, g)
306 poly1305_finish_ext_armv6_skip4:
307 WORD $0xe3120002 // TST $2, R2 not working see issue 5921
308 BEQ poly1305_finish_ext_armv6_skip2
309 MOVHUP_UNALIGNED(R1, R9, g)
310 B poly1305_finish_ext_armv6_skip2
312 poly1305_finish_ext_armv6_aligned:
313 WORD $0xe3120008 // TST R2, #8 not working see issue 5921
314 BEQ poly1305_finish_ext_armv6_skip8_aligned
315 MOVM.IA.W (R1), [g-R11]
316 MOVM.IA.W [g-R11], (R9)
318 poly1305_finish_ext_armv6_skip8_aligned:
319 WORD $0xe3120004 // TST $4, R2 not working see issue 5921
320 BEQ poly1305_finish_ext_armv6_skip4_aligned
324 poly1305_finish_ext_armv6_skip4_aligned:
325 WORD $0xe3120002 // TST $2, R2 not working see issue 5921
326 BEQ poly1305_finish_ext_armv6_skip2
330 poly1305_finish_ext_armv6_skip2:
331 WORD $0xe3120001 // TST $1, R2 not working see issue 5921
332 BEQ poly1305_finish_ext_armv6_skip1
336 poly1305_finish_ext_armv6_skip1:
343 BL poly1305_blocks_armv6<>(SB)
345 poly1305_finish_ext_armv6_noremaining:
352 BIC $0xfc000000, R4, R4
356 BIC $0xfc000000, R0, R0
359 BIC $0xfc000000, R1, R1
362 BIC $0xfc000000, R2, R2
365 BIC $0xfc000000, R3, R3
369 BIC $0xfc000000, R6, R6
372 BIC $0xfc000000, R7, R7
375 BIC $0xfc000000, g, g
378 ADD R11>>26, R12, R12
379 BIC $0xfc000000, R11, R11
414 MOVM.IA [R0-R3], (R8)
424 MOVM.IA.W [R0-R7], (R12)
425 MOVM.IA [R0-R7], (R12)