0
|
1 /* sha1-asm.S */ |
|
2 /* |
|
3 This file is part of the AVR-Crypto-Lib. |
|
4 Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de) |
|
5 |
|
6 This program is free software: you can redistribute it and/or modify |
|
7 it under the terms of the GNU General Public License as published by |
|
8 the Free Software Foundation, either version 3 of the License, or |
|
9 (at your option) any later version. |
|
10 |
|
11 This program is distributed in the hope that it will be useful, |
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
14 GNU General Public License for more details. |
|
15 |
|
16 You should have received a copy of the GNU General Public License |
|
17 along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
18 */ |
|
19 /* |
|
20 * Author: Daniel Otte |
|
21 * |
|
22 * License: GPLv3 or later |
|
23 */ |
|
24 ; SHA1 implementation in assembler for AVR |
|
25 SHA1_BLOCK_BITS = 512 |
|
26 SHA1_HASH_BITS = 160 |
|
27 |
|
28 .macro precall |
|
29 /* push r18 - r27, r30 - r31*/ |
|
30 push r0 |
|
31 push r1 |
|
32 push r18 |
|
33 push r19 |
|
34 push r20 |
|
35 push r21 |
|
36 push r22 |
|
37 push r23 |
|
38 push r24 |
|
39 push r25 |
|
40 push r26 |
|
41 push r27 |
|
42 push r30 |
|
43 push r31 |
|
44 clr r1 |
|
45 .endm |
|
46 |
|
47 .macro postcall |
|
48 pop r31 |
|
49 pop r30 |
|
50 pop r27 |
|
51 pop r26 |
|
52 pop r25 |
|
53 pop r24 |
|
54 pop r23 |
|
55 pop r22 |
|
56 pop r21 |
|
57 pop r20 |
|
58 pop r19 |
|
59 pop r18 |
|
60 pop r1 |
|
61 pop r0 |
|
62 .endm |
|
63 |
|
64 |
|
65 .macro hexdump length |
|
66 push r27 |
|
67 push r26 |
|
68 ldi r25, '\r' |
|
69 mov r24, r25 |
|
70 call uart_putc |
|
71 ldi r25, '\n' |
|
72 mov r24, r25 |
|
73 call uart_putc |
|
74 pop r26 |
|
75 pop r27 |
|
76 movw r24, r26 |
|
77 .if \length > 16 |
|
78 ldi r22, lo8(16) |
|
79 ldi r23, hi8(16) |
|
80 push r27 |
|
81 push r26 |
|
82 call uart_hexdump |
|
83 pop r26 |
|
84 pop r27 |
|
85 adiw r26, 16 |
|
86 hexdump \length-16 |
|
87 .else |
|
88 ldi r22, lo8(\length) |
|
89 ldi r23, hi8(\length) |
|
90 call uart_hexdump |
|
91 .endif |
|
92 .endm |
|
93 |
|
94 .macro delay |
|
95 /* |
|
96 push r0 |
|
97 push r1 |
|
98 clr r0 |
|
99 1: clr r1 |
|
100 2: dec r1 |
|
101 brne 2b |
|
102 dec r0 |
|
103 brne 1b |
|
104 pop r1 |
|
105 pop r0 // */ |
|
106 .endm |
|
107 |
|
108 /* X points to Block */ |
|
109 .macro dbg_hexdump length |
|
110 /* |
|
111 precall |
|
112 hexdump \length |
|
113 postcall |
|
114 // */ |
|
115 .endm |
|
116 |
|
117 |
|
118 |
|
119 .section .text |
|
120 |
|
121 SPL = 0x3D |
|
122 SPH = 0x3E |
|
123 SREG = 0x3F |
|
124 |
|
125 |
|
126 ; |
|
127 ;sha1_ctx_t is: |
|
128 ; |
|
129 ; [h0][h1][h2][h3][h4][length] |
|
130 ; hn is 32 bit large, length is 64 bit large |
|
131 |
|
132 ;########################################################### |
|
133 |
|
134 .global sha1_ctx2hash |
|
135 ; === sha1_ctx2hash === |
|
136 ; this function converts a state into a normal hash (bytestring) |
|
137 ; param1: the 16-bit destination pointer |
|
138 ; given in r25,r24 (r25 is most significant) |
|
139 ; param2: the 16-bit pointer to sha1_ctx structure |
|
140 ; given in r23,r22 |
|
141 sha1_ctx2hash: |
|
142 movw r26, r22 |
|
143 movw r30, r24 |
|
144 ldi r21, 5 |
|
145 sbiw r26, 4 |
|
146 1: |
|
147 ldi r20, 4 |
|
148 adiw r26, 8 |
|
149 2: |
|
150 ld r0, -X |
|
151 st Z+, r0 |
|
152 dec r20 |
|
153 brne 2b |
|
154 |
|
155 dec r21 |
|
156 brne 1b |
|
157 |
|
158 ret |
|
159 |
|
160 ;########################################################### |
|
161 |
|
162 .global sha1 |
|
163 ; === sha1 === |
|
164 ; this function calculates SHA-1 hashes from messages in RAM |
|
165 ; param1: the 16-bit hash destination pointer |
|
166 ; given in r25,r24 (r25 is most significant) |
|
167 ; param2: the 16-bit pointer to message |
|
168 ; given in r23,r22 |
|
169 ; param3: 32-bit length value (length of message in bits) |
|
170 ; given in r21,r20,r19,r18 |
|
171 sha1: |
|
172 sha1_prolog: |
|
173 push r8 |
|
174 push r9 |
|
175 push r10 |
|
176 push r11 |
|
177 push r12 |
|
178 push r13 |
|
179 push r16 |
|
180 push r17 |
|
181 in r30, SPL |
|
182 in r31, SPH |
|
183 sbiw r30, 5*4+8 |
|
184 in r0, SREG |
|
185 cli |
|
186 out SPL, r30 |
|
187 out SREG, r0 |
|
188 out SPH, r31 |
|
189 |
|
190 push r25 |
|
191 push r24 |
|
192 adiw r30, 1 |
|
193 movw r16, r30 |
|
194 |
|
195 movw r8, r18 /* backup of length*/ |
|
196 movw r10, r20 |
|
197 |
|
198 movw r12, r22 /* backup pf msg-ptr */ |
|
199 |
|
200 movw r24, r16 |
|
201 rcall sha1_init |
|
202 /* if length >= 512 */ |
|
203 1: |
|
204 tst r11 |
|
205 brne 2f |
|
206 tst r10 |
|
207 breq 4f |
|
208 2: |
|
209 movw r24, r16 |
|
210 movw r22, r12 |
|
211 rcall sha1_nextBlock |
|
212 ldi r19, 64 |
|
213 add r12, r19 |
|
214 adc r13, r1 |
|
215 /* length -= 512 */ |
|
216 ldi r19, 0x02 |
|
217 sub r9, r19 |
|
218 sbc r10, r1 |
|
219 sbc r11, r1 |
|
220 rjmp 1b |
|
221 |
|
222 4: |
|
223 movw r24, r16 |
|
224 movw r22, r12 |
|
225 movw r20, r8 |
|
226 rcall sha1_lastBlock |
|
227 |
|
228 pop r24 |
|
229 pop r25 |
|
230 movw r22, r16 |
|
231 rcall sha1_ctx2hash |
|
232 |
|
233 sha1_epilog: |
|
234 in r30, SPL |
|
235 in r31, SPH |
|
236 adiw r30, 5*4+8 |
|
237 in r0, SREG |
|
238 cli |
|
239 out SPL, r30 |
|
240 out SREG, r0 |
|
241 out SPH, r31 |
|
242 pop r17 |
|
243 pop r16 |
|
244 pop r13 |
|
245 pop r12 |
|
246 pop r11 |
|
247 pop r10 |
|
248 pop r9 |
|
249 pop r8 |
|
250 ret |
|
251 |
|
252 ;########################################################### |
|
253 |
|
254 |
|
255 ; block MUST NOT be larger than 64 bytes |
|
256 |
|
257 .global sha1_lastBlock |
|
258 ; === sha1_lastBlock === |
|
259 ; this function does padding & Co. for calculating SHA-1 hashes |
|
260 ; param1: the 16-bit pointer to sha1_ctx structure |
|
261 ; given in r25,r24 (r25 is most significant) |
|
262 ; param2: an 16-bit pointer to 64 byte block to hash |
|
263 ; given in r23,r22 |
|
264 ; param3: an 16-bit integer specifing length of block in bits |
|
265 ; given in r21,r20 |
|
266 sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1) |
|
267 |
|
268 |
|
269 sha1_lastBlock: |
|
270 cpi r21, 0x02 |
|
271 brlo sha1_lastBlock_prolog |
|
272 push r25 |
|
273 push r24 |
|
274 push r23 |
|
275 push r22 |
|
276 push r21 |
|
277 push r20 |
|
278 rcall sha1_nextBlock |
|
279 pop r20 |
|
280 pop r21 |
|
281 pop r22 |
|
282 pop r23 |
|
283 pop r24 |
|
284 pop r25 |
|
285 subi r21, 2 |
|
286 ldi r19, 64 |
|
287 add r22, r19 |
|
288 adc r23, r1 |
|
289 rjmp sha1_lastBlock |
|
290 sha1_lastBlock_prolog: |
|
291 /* allocate space on stack */ |
|
292 in r30, SPL |
|
293 in r31, SPH |
|
294 in r0, SREG |
|
295 subi r30, lo8(64) |
|
296 sbci r31, hi8(64) /* ??? */ |
|
297 cli |
|
298 out SPL, r30 |
|
299 out SREG, r0 |
|
300 out SPH, r31 |
|
301 |
|
302 adiw r30, 1 /* SP points to next free byte on stack */ |
|
303 mov r18, r20 /* r20 = LSB(length) */ |
|
304 lsr r18 |
|
305 lsr r18 |
|
306 lsr r18 |
|
307 bst r21, 0 /* may be we should explain this ... */ |
|
308 bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ |
|
309 |
|
310 |
|
311 movw r26, r22 /* X points to begin of msg */ |
|
312 tst r18 |
|
313 breq sha1_lastBlock_post_copy |
|
314 mov r1, r18 |
|
315 sha1_lastBlock_copy_loop: |
|
316 ld r0, X+ |
|
317 st Z+, r0 |
|
318 dec r1 |
|
319 brne sha1_lastBlock_copy_loop |
|
320 sha1_lastBlock_post_copy: |
|
321 sha1_lastBlock_insert_stuffing_bit: |
|
322 ldi r19, 0x80 |
|
323 mov r0,r19 |
|
324 ldi r19, 0x07 |
|
325 and r19, r20 /* if we are in bitmode */ |
|
326 breq 2f /* no bitmode */ |
|
327 1: |
|
328 lsr r0 |
|
329 dec r19 |
|
330 brne 1b |
|
331 ld r19, X |
|
332 /* maybe we should do some ANDing here, just for safety */ |
|
333 or r0, r19 |
|
334 2: |
|
335 st Z+, r0 |
|
336 inc r18 |
|
337 |
|
338 /* checking stuff here */ |
|
339 cpi r18, 64-8+1 |
|
340 brsh 0f |
|
341 rjmp sha1_lastBlock_insert_zeros |
|
342 0: |
|
343 /* oh shit, we landed here */ |
|
344 /* first we have to fill it up with zeros */ |
|
345 ldi r19, 64 |
|
346 sub r19, r18 |
|
347 breq 2f |
|
348 1: |
|
349 st Z+, r1 |
|
350 dec r19 |
|
351 brne 1b |
|
352 2: |
|
353 sbiw r30, 63 |
|
354 sbiw r30, 1 |
|
355 movw r22, r30 |
|
356 |
|
357 push r31 |
|
358 push r30 |
|
359 push r25 |
|
360 push r24 |
|
361 push r21 |
|
362 push r20 |
|
363 rcall sha1_nextBlock |
|
364 pop r20 |
|
365 pop r21 |
|
366 pop r24 |
|
367 pop r25 |
|
368 pop r30 |
|
369 pop r31 |
|
370 |
|
371 /* now we should subtract 512 from length */ |
|
372 movw r26, r24 |
|
373 adiw r26, 4*5+1 /* we can skip the lowest byte */ |
|
374 ld r19, X |
|
375 subi r19, hi8(512) |
|
376 st X+, r19 |
|
377 ldi r18, 6 |
|
378 1: |
|
379 ld r19, X |
|
380 sbci r19, 0 |
|
381 st X+, r19 |
|
382 dec r18 |
|
383 brne 1b |
|
384 |
|
385 ; clr r18 /* not neccessary ;-) */ |
|
386 /* reset Z pointer to begin of block */ |
|
387 |
|
388 sha1_lastBlock_insert_zeros: |
|
389 ldi r19, 64-8 |
|
390 sub r19, r18 |
|
391 breq sha1_lastBlock_insert_length |
|
392 clr r1 |
|
393 1: |
|
394 st Z+, r1 /* r1 is still zero */ |
|
395 dec r19 |
|
396 brne 1b |
|
397 |
|
398 ; rjmp sha1_lastBlock_epilog |
|
399 sha1_lastBlock_insert_length: |
|
400 movw r26, r24 /* X points to state */ |
|
401 adiw r26, 5*4 /* X points to (state.length) */ |
|
402 adiw r30, 8 /* Z points one after the last byte of block */ |
|
403 ld r0, X+ |
|
404 add r0, r20 |
|
405 st -Z, r0 |
|
406 ld r0, X+ |
|
407 adc r0, r21 |
|
408 st -Z, r0 |
|
409 ldi r19, 6 |
|
410 1: |
|
411 ld r0, X+ |
|
412 adc r0, r1 |
|
413 st -Z, r0 |
|
414 dec r19 |
|
415 brne 1b |
|
416 |
|
417 sbiw r30, 64-8 |
|
418 movw r22, r30 |
|
419 rcall sha1_nextBlock |
|
420 |
|
421 sha1_lastBlock_epilog: |
|
422 in r30, SPL |
|
423 in r31, SPH |
|
424 in r0, SREG |
|
425 adiw r30, 63 ; lo8(64) |
|
426 adiw r30, 1 ; hi8(64) |
|
427 cli |
|
428 out SPL, r30 |
|
429 out SREG, r0 |
|
430 out SPH, r31 |
|
431 clr r1 |
|
432 ret |
|
433 |
|
434 /**/ |
|
435 ;########################################################### |
|
436 |
|
437 .global sha1_nextBlock |
|
438 ; === sha1_nextBlock === |
|
439 ; this is the core function for calculating SHA-1 hashes |
|
440 ; param1: the 16-bit pointer to sha1_ctx structure |
|
441 ; given in r25,r24 (r25 is most significant) |
|
442 ; param2: an 16-bit pointer to 64 byte block to hash |
|
443 ; given in r23,r22 |
|
444 sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte) |
|
445 |
|
446 xtmp = 0 |
|
447 xNULL = 1 |
|
448 W1 = 10 |
|
449 W2 = 11 |
|
450 T1 = 12 |
|
451 T2 = 13 |
|
452 T3 = 14 |
|
453 T4 = 15 |
|
454 LoopC = 16 |
|
455 S = 17 |
|
456 tmp1 = 18 |
|
457 tmp2 = 19 |
|
458 tmp3 = 20 |
|
459 tmp4 = 21 |
|
460 F1 = 22 |
|
461 F2 = 23 |
|
462 F3 = 24 |
|
463 F4 = 25 |
|
464 |
|
465 /* byteorder: high number <--> high significance */ |
|
466 sha1_nextBlock: |
|
467 ; initial, let's make some space ready for local vars |
|
468 /* replace push & pop by mem ops? */ |
|
469 push r10 |
|
470 push r11 |
|
471 push r12 |
|
472 push r13 |
|
473 push r14 |
|
474 push r15 |
|
475 push r16 |
|
476 push r17 |
|
477 push r28 |
|
478 push r29 |
|
479 in r20, SPL |
|
480 in r21, SPH |
|
481 movw r18, r20 ;backup SP |
|
482 ; movw r26, r20 ; X points to free space on stack /* maybe removeable? */ |
|
483 movw r30, r22 ; Z points to message |
|
484 subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63 |
|
485 sbci r21, hi8(sha1_nextBlock_localSpace) |
|
486 movw r26, r20 ; X points to free space on stack |
|
487 in r0, SREG |
|
488 cli ; we want to be uninterrupted while updating SP |
|
489 out SPL, r20 |
|
490 out SREG, r0 |
|
491 out SPH, r21 |
|
492 |
|
493 push r18 |
|
494 push r19 /* push old SP on new stack */ |
|
495 push r24 |
|
496 push r25 /* param1 will be needed later */ |
|
497 |
|
498 /* load a[] with state */ |
|
499 movw 28, r24 /* load pointer to state in Y */ |
|
500 adiw r26, 1 ; X++ |
|
501 |
|
502 ldi LoopC, 5*4 |
|
503 1: ld tmp1, Y+ |
|
504 st X+, tmp1 |
|
505 dec LoopC |
|
506 brne 1b |
|
507 |
|
508 movw W1, r26 /* save pointer to w[0] */ |
|
509 /* load w[] with endian fixed message */ |
|
510 /* we might also use the changeendian32() function at bottom */ |
|
511 movw r30, r22 /* mv param2 (ponter to msg) to Z */ |
|
512 ldi LoopC, 16 |
|
513 1: |
|
514 ldd tmp1, Z+3 |
|
515 st X+, tmp1 |
|
516 ldd tmp1, Z+2 |
|
517 st X+, tmp1 |
|
518 ldd tmp1, Z+1 |
|
519 st X+, tmp1 |
|
520 ld tmp1, Z |
|
521 st X+, tmp1 |
|
522 adiw r30, 4 |
|
523 dec LoopC |
|
524 brne 1b |
|
525 |
|
526 ;clr LoopC /* LoopC is named t in FIPS 180-2 */ |
|
527 clr xtmp |
|
528 sha1_nextBlock_mainloop: |
|
529 mov S, LoopC |
|
530 lsl S |
|
531 lsl S |
|
532 andi S, 0x3C /* S is a bytepointer so *4 */ |
|
533 /* load w[s] */ |
|
534 movw r26, W1 |
|
535 add r26, S /* X points at w[s] */ |
|
536 adc r27, xNULL |
|
537 ld T1, X+ |
|
538 ld T2, X+ |
|
539 ld T3, X+ |
|
540 ld T4, X+ |
|
541 |
|
542 /* |
|
543 push r26 |
|
544 push r27 |
|
545 push T4 |
|
546 push T3 |
|
547 push T2 |
|
548 push T1 |
|
549 in r26, SPL |
|
550 in r27, SPH |
|
551 adiw r26, 1 |
|
552 dbg_hexdump 4 |
|
553 pop T1 |
|
554 pop T2 |
|
555 pop T3 |
|
556 pop T4 |
|
557 pop r27 |
|
558 pop r26 |
|
559 */ |
|
560 |
|
561 cpi LoopC, 16 |
|
562 brlt sha1_nextBlock_mainloop_core |
|
563 /* update w[s] */ |
|
564 ldi tmp1, 2*4 |
|
565 rcall 1f |
|
566 ldi tmp1, 8*4 |
|
567 rcall 1f |
|
568 ldi tmp1, 13*4 |
|
569 rcall 1f |
|
570 rjmp 2f |
|
571 1: /* this might be "outsourced" to save the jump above */ |
|
572 add tmp1, S |
|
573 andi tmp1, 0x3f |
|
574 movw r26, W1 |
|
575 add r26, tmp1 |
|
576 adc r27, xNULL |
|
577 ld tmp2, X+ |
|
578 eor T1, tmp2 |
|
579 ld tmp2, X+ |
|
580 eor T2, tmp2 |
|
581 ld tmp2, X+ |
|
582 eor T3, tmp2 |
|
583 ld tmp2, X+ |
|
584 eor T4, tmp2 |
|
585 ret |
|
586 2: /* now we just hav to do a ROTL(T) and save T back */ |
|
587 mov tmp2, T4 |
|
588 rol tmp2 |
|
589 rol T1 |
|
590 rol T2 |
|
591 rol T3 |
|
592 rol T4 |
|
593 movw r26, W1 |
|
594 add r26, S |
|
595 adc r27, xNULL |
|
596 st X+, T1 |
|
597 st X+, T2 |
|
598 st X+, T3 |
|
599 st X+, T4 |
|
600 |
|
601 sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/ |
|
602 /* T already contains w[s] */ |
|
603 movw r26, W1 |
|
604 sbiw r26, 4*1 /* X points at a[4] aka e */ |
|
605 ld tmp1, X+ |
|
606 add T1, tmp1 |
|
607 ld tmp1, X+ |
|
608 adc T2, tmp1 |
|
609 ld tmp1, X+ |
|
610 adc T3, tmp1 |
|
611 ld tmp1, X+ |
|
612 adc T4, tmp1 /* T = w[s]+e */ |
|
613 sbiw r26, 4*5 /* X points at a[0] aka a */ |
|
614 ld F1, X+ |
|
615 ld F2, X+ |
|
616 ld F3, X+ |
|
617 ld F4, X+ |
|
618 mov tmp1, F4 /* X points at a[1] aka b */ |
|
619 ldi tmp2, 5 |
|
620 1: |
|
621 rol tmp1 |
|
622 rol F1 |
|
623 rol F2 |
|
624 rol F3 |
|
625 rol F4 |
|
626 dec tmp2 |
|
627 brne 1b |
|
628 |
|
629 add T1, F1 |
|
630 adc T2, F2 |
|
631 adc T3, F3 |
|
632 adc T4, F4 /* T = ROTL(a,5) + e + w[s] */ |
|
633 |
|
634 /* now we have to do this fucking conditional stuff */ |
|
635 ldi r30, lo8(sha1_nextBlock_xTable) |
|
636 ldi r31, hi8(sha1_nextBlock_xTable) |
|
637 add r30, xtmp |
|
638 adc r31, xNULL |
|
639 lpm tmp1, Z |
|
640 cp tmp1, LoopC |
|
641 brne 1f |
|
642 inc xtmp |
|
643 1: ldi r30, lo8(sha1_nextBlock_KTable) |
|
644 ldi r31, hi8(sha1_nextBlock_KTable) |
|
645 lsl xtmp |
|
646 lsl xtmp |
|
647 add r30, xtmp |
|
648 adc r31, xNULL |
|
649 lsr xtmp |
|
650 lsr xtmp |
|
651 |
|
652 lpm tmp1, Z+ |
|
653 add T1, tmp1 |
|
654 lpm tmp1, Z+ |
|
655 adc T2, tmp1 |
|
656 lpm tmp1, Z+ |
|
657 adc T3, tmp1 |
|
658 lpm tmp1, Z+ |
|
659 adc T4, tmp1 |
|
660 /* T = ROTL(a,5) + e + kt + w[s] */ |
|
661 |
|
662 /* Z-4 is just pointing to kt ... */ |
|
663 movw r28, r26 /* copy X in Y */ |
|
664 adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */ |
|
665 lsr r31 |
|
666 ror r30 |
|
667 |
|
668 icall |
|
669 mov F1, tmp1 |
|
670 icall |
|
671 mov F2, tmp1 |
|
672 icall |
|
673 mov F3, tmp1 |
|
674 icall |
|
675 |
|
676 add T1, F1 |
|
677 adc T2, F2 |
|
678 adc T3, F3 |
|
679 adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */ |
|
680 /* X points still at a[1] aka b, Y points at a[2] aka c */ |
|
681 /* update a[] */ |
|
682 sha1_nextBlock_update_a: |
|
683 /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/ |
|
684 //adiw r28, 3*4 /* Y should point at a[4] aka e */ |
|
685 movw r28, W1 |
|
686 sbiw r28, 4 |
|
687 |
|
688 ldi tmp2, 4*4 |
|
689 1: |
|
690 ld tmp1, -Y |
|
691 std Y+4, tmp1 |
|
692 dec tmp2 |
|
693 brne 1b |
|
694 /* Y points at a[0] aka a*/ |
|
695 |
|
696 movw r28, W1 |
|
697 sbiw r28, 5*4 |
|
698 /* store T in a[0] aka a */ |
|
699 st Y+, T1 |
|
700 st Y+, T2 |
|
701 st Y+, T3 |
|
702 st Y+, T4 |
|
703 /* Y points at a[1] aka b*/ |
|
704 |
|
705 /* rotate c */ |
|
706 ldd T1, Y+1*4 |
|
707 ldd T2, Y+1*4+1 |
|
708 ldd T3, Y+1*4+2 |
|
709 ldd T4, Y+1*4+3 |
|
710 mov tmp1, T1 |
|
711 ldi tmp2, 2 |
|
712 1: ror tmp1 |
|
713 ror T4 |
|
714 ror T3 |
|
715 ror T2 |
|
716 ror T1 |
|
717 dec tmp2 |
|
718 brne 1b |
|
719 std Y+1*4+0, T1 |
|
720 std Y+1*4+1, T2 |
|
721 std Y+1*4+2, T3 |
|
722 std Y+1*4+3, T4 |
|
723 /* |
|
724 push r27 |
|
725 push r26 |
|
726 movw r26, W1 |
|
727 sbiw r26, 4*5 |
|
728 dbg_hexdump 4*5 |
|
729 pop r26 |
|
730 pop r27 |
|
731 */ |
|
732 inc LoopC |
|
733 cpi LoopC, 80 |
|
734 brge 1f |
|
735 rjmp sha1_nextBlock_mainloop |
|
736 /**************************************/ |
|
737 1: |
|
738 /* littel patch */ |
|
739 sbiw r28, 4 |
|
740 |
|
741 /* add a[] to state and inc length */ |
|
742 pop r27 |
|
743 pop r26 /* now X points to state (and Y still at a[0]) */ |
|
744 ldi tmp4, 5 |
|
745 1: clc |
|
746 ldi tmp3, 4 |
|
747 2: ld tmp1, X |
|
748 ld tmp2, Y+ |
|
749 adc tmp1, tmp2 |
|
750 st X+, tmp1 |
|
751 dec tmp3 |
|
752 brne 2b |
|
753 dec tmp4 |
|
754 brne 1b |
|
755 |
|
756 /* now length += 512 */ |
|
757 adiw r26, 1 /* we skip the least significant byte */ |
|
758 ld tmp1, X |
|
759 ldi tmp2, hi8(512) /* 2 */ |
|
760 add tmp1, tmp2 |
|
761 st X+, tmp1 |
|
762 ldi tmp2, 6 |
|
763 1: |
|
764 ld tmp1, X |
|
765 adc tmp1, xNULL |
|
766 st X+, tmp1 |
|
767 dec tmp2 |
|
768 brne 1b |
|
769 |
|
770 ; EPILOG |
|
771 sha1_nextBlock_epilog: |
|
772 /* now we should clean up the stack */ |
|
773 pop r21 |
|
774 pop r20 |
|
775 in r0, SREG |
|
776 cli ; we want to be uninterrupted while updating SP |
|
777 out SPL, r20 |
|
778 out SREG, r0 |
|
779 out SPH, r21 |
|
780 |
|
781 clr r1 |
|
782 pop r29 |
|
783 pop r28 |
|
784 pop r17 |
|
785 pop r16 |
|
786 pop r15 |
|
787 pop r14 |
|
788 pop r13 |
|
789 pop r12 |
|
790 pop r11 |
|
791 pop r10 |
|
792 ret |
|
793 |
|
794 sha1_nextBlock_xTable: |
|
795 .byte 20,40,60,0 |
|
796 sha1_nextBlock_KTable: |
|
797 .int 0x5a827999 |
|
798 .int 0x6ed9eba1 |
|
799 .int 0x8f1bbcdc |
|
800 .int 0xca62c1d6 |
|
801 sha1_nextBlock_JumpTable: |
|
802 rjmp sha1_nextBlock_Ch |
|
803 nop |
|
804 rjmp sha1_nextBlock_Parity |
|
805 nop |
|
806 rjmp sha1_nextBlock_Maj |
|
807 nop |
|
808 rjmp sha1_nextBlock_Parity |
|
809 |
|
810 /* X and Y still point at a[1] aka b ; return value in tmp1 */ |
|
811 sha1_nextBlock_Ch: |
|
812 ld tmp1, Y+ |
|
813 mov tmp2, tmp1 |
|
814 com tmp2 |
|
815 ldd tmp3, Y+3 /* load from c */ |
|
816 and tmp1, tmp3 |
|
817 ldd tmp3, Y+7 /* load from d */ |
|
818 and tmp2, tmp3 |
|
819 eor tmp1, tmp2 |
|
820 ret |
|
821 |
|
822 sha1_nextBlock_Maj: |
|
823 ld tmp1, Y+ |
|
824 mov tmp2, tmp1 |
|
825 ldd tmp3, Y+3 /* load from c */ |
|
826 and tmp1, tmp3 |
|
827 ldd tmp4, Y+7 /* load from d */ |
|
828 and tmp2, tmp4 |
|
829 eor tmp1, tmp2 |
|
830 and tmp3, tmp4 |
|
831 eor tmp1, tmp3 |
|
832 ret |
|
833 |
|
834 sha1_nextBlock_Parity: |
|
835 ld tmp1, Y+ |
|
836 ldd tmp2, Y+3 /* load from c */ |
|
837 eor tmp1, tmp2 |
|
838 ldd tmp2, Y+7 /* load from d */ |
|
839 eor tmp1, tmp2 |
|
840 ret |
|
841 /* |
|
842 ch_str: .asciz "\r\nCh" |
|
843 maj_str: .asciz "\r\nMaj" |
|
844 parity_str: .asciz "\r\nParity" |
|
845 */ |
|
846 ;########################################################### |
|
847 |
|
848 .global sha1_init |
|
849 ;void sha1_init(sha1_ctx_t *state){ |
|
850 ; DEBUG_S("\r\nSHA1_INIT"); |
|
851 ; state->h[0] = 0x67452301; |
|
852 ; state->h[1] = 0xefcdab89; |
|
853 ; state->h[2] = 0x98badcfe; |
|
854 ; state->h[3] = 0x10325476; |
|
855 ; state->h[4] = 0xc3d2e1f0; |
|
856 ; state->length = 0; |
|
857 ;} |
|
858 ; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram |
|
859 ; modifys: Z(r30,r31), Func1, r22 |
|
860 sha1_init: |
|
861 movw r26, r24 ; (24,25) --> (26,27) load X with param1 |
|
862 ldi r30, lo8((sha1_init_vector)) |
|
863 ldi r31, hi8((sha1_init_vector)) |
|
864 ldi r22, 5*4 /* bytes to copy */ |
|
865 sha1_init_vloop: |
|
866 lpm r23, Z+ |
|
867 st X+, r23 |
|
868 dec r22 |
|
869 brne sha1_init_vloop |
|
870 ldi r22, 8 |
|
871 sha1_init_lloop: |
|
872 st X+, r1 |
|
873 dec r22 |
|
874 brne sha1_init_lloop |
|
875 ret |
|
876 |
|
877 sha1_init_vector: |
|
878 .int 0x67452301; |
|
879 .int 0xefcdab89; |
|
880 .int 0x98badcfe; |
|
881 .int 0x10325476; |
|
882 .int 0xc3d2e1f0; |
|
883 |