Mercurial > pihelp
comparison sha1-asm.S @ 0:8705acff2494
lots of stuff
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Sat, 01 Jun 2013 01:38:42 +0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8705acff2494 |
---|---|
1 /* sha1-asm.S */ | |
2 /* | |
3 This file is part of the AVR-Crypto-Lib. | |
4 Copyright (C) 2008 Daniel Otte ([email protected]) | |
5 | |
6 This program is free software: you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation, either version 3 of the License, or | |
9 (at your option) any later version. | |
10 | |
11 This program is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 */ | |
19 /* | |
20 * Author: Daniel Otte | |
21 * | |
22 * License: GPLv3 or later | |
23 */ | |
24 ; SHA1 implementation in assembler for AVR | |
25 SHA1_BLOCK_BITS = 512 | |
26 SHA1_HASH_BITS = 160 | |
27 | |
28 .macro precall | |
29 /* push r18 - r27, r30 - r31*/ | |
30 push r0 | |
31 push r1 | |
32 push r18 | |
33 push r19 | |
34 push r20 | |
35 push r21 | |
36 push r22 | |
37 push r23 | |
38 push r24 | |
39 push r25 | |
40 push r26 | |
41 push r27 | |
42 push r30 | |
43 push r31 | |
44 clr r1 | |
45 .endm | |
46 | |
47 .macro postcall | |
48 pop r31 | |
49 pop r30 | |
50 pop r27 | |
51 pop r26 | |
52 pop r25 | |
53 pop r24 | |
54 pop r23 | |
55 pop r22 | |
56 pop r21 | |
57 pop r20 | |
58 pop r19 | |
59 pop r18 | |
60 pop r1 | |
61 pop r0 | |
62 .endm | |
63 | |
64 | |
65 .macro hexdump length | |
66 push r27 | |
67 push r26 | |
68 ldi r25, '\r' | |
69 mov r24, r25 | |
70 call uart_putc | |
71 ldi r25, '\n' | |
72 mov r24, r25 | |
73 call uart_putc | |
74 pop r26 | |
75 pop r27 | |
76 movw r24, r26 | |
77 .if \length > 16 | |
78 ldi r22, lo8(16) | |
79 ldi r23, hi8(16) | |
80 push r27 | |
81 push r26 | |
82 call uart_hexdump | |
83 pop r26 | |
84 pop r27 | |
85 adiw r26, 16 | |
86 hexdump \length-16 | |
87 .else | |
88 ldi r22, lo8(\length) | |
89 ldi r23, hi8(\length) | |
90 call uart_hexdump | |
91 .endif | |
92 .endm | |
93 | |
94 .macro delay | |
95 /* | |
96 push r0 | |
97 push r1 | |
98 clr r0 | |
99 1: clr r1 | |
100 2: dec r1 | |
101 brne 2b | |
102 dec r0 | |
103 brne 1b | |
104 pop r1 | |
105 pop r0 // */ | |
106 .endm | |
107 | |
108 /* X points to Block */ | |
109 .macro dbg_hexdump length | |
110 /* | |
111 precall | |
112 hexdump \length | |
113 postcall | |
114 // */ | |
115 .endm | |
116 | |
117 | |
118 | |
119 .section .text | |
120 | |
121 SPL = 0x3D | |
122 SPH = 0x3E | |
123 SREG = 0x3F | |
124 | |
125 | |
126 ; | |
127 ;sha1_ctx_t is: | |
128 ; | |
129 ; [h0][h1][h2][h3][h4][length] | |
130 ; hn is 32 bit large, length is 64 bit large | |
131 | |
132 ;########################################################### | |
133 | |
134 .global sha1_ctx2hash | |
135 ; === sha1_ctx2hash === | |
136 ; this function converts a state into a normal hash (bytestring) | |
137 ; param1: the 16-bit destination pointer | |
138 ; given in r25,r24 (r25 is most significant) | |
139 ; param2: the 16-bit pointer to sha1_ctx structure | |
140 ; given in r23,r22 | |
141 sha1_ctx2hash: | |
142 movw r26, r22 | |
143 movw r30, r24 | |
144 ldi r21, 5 | |
145 sbiw r26, 4 | |
146 1: | |
147 ldi r20, 4 | |
148 adiw r26, 8 | |
149 2: | |
150 ld r0, -X | |
151 st Z+, r0 | |
152 dec r20 | |
153 brne 2b | |
154 | |
155 dec r21 | |
156 brne 1b | |
157 | |
158 ret | |
159 | |
160 ;########################################################### | |
161 | |
162 .global sha1 | |
163 ; === sha1 === | |
164 ; this function calculates SHA-1 hashes from messages in RAM | |
165 ; param1: the 16-bit hash destination pointer | |
166 ; given in r25,r24 (r25 is most significant) | |
167 ; param2: the 16-bit pointer to message | |
168 ; given in r23,r22 | |
169 ; param3: 32-bit length value (length of message in bits) | |
170 ; given in r21,r20,r19,r18 | |
171 sha1: | |
172 sha1_prolog: | |
173 push r8 | |
174 push r9 | |
175 push r10 | |
176 push r11 | |
177 push r12 | |
178 push r13 | |
179 push r16 | |
180 push r17 | |
181 in r30, SPL | |
182 in r31, SPH | |
183 sbiw r30, 5*4+8 | |
184 in r0, SREG | |
185 cli | |
186 out SPL, r30 | |
187 out SREG, r0 | |
188 out SPH, r31 | |
189 | |
190 push r25 | |
191 push r24 | |
192 adiw r30, 1 | |
193 movw r16, r30 | |
194 | |
195 movw r8, r18 /* backup of length*/ | |
196 movw r10, r20 | |
197 | |
198 movw r12, r22 /* backup pf msg-ptr */ | |
199 | |
200 movw r24, r16 | |
201 rcall sha1_init | |
202 /* if length >= 512 */ | |
203 1: | |
204 tst r11 | |
205 brne 2f | |
206 tst r10 | |
207 breq 4f | |
208 2: | |
209 movw r24, r16 | |
210 movw r22, r12 | |
211 rcall sha1_nextBlock | |
212 ldi r19, 64 | |
213 add r12, r19 | |
214 adc r13, r1 | |
215 /* length -= 512 */ | |
216 ldi r19, 0x02 | |
217 sub r9, r19 | |
218 sbc r10, r1 | |
219 sbc r11, r1 | |
220 rjmp 1b | |
221 | |
222 4: | |
223 movw r24, r16 | |
224 movw r22, r12 | |
225 movw r20, r8 | |
226 rcall sha1_lastBlock | |
227 | |
228 pop r24 | |
229 pop r25 | |
230 movw r22, r16 | |
231 rcall sha1_ctx2hash | |
232 | |
233 sha1_epilog: | |
234 in r30, SPL | |
235 in r31, SPH | |
236 adiw r30, 5*4+8 | |
237 in r0, SREG | |
238 cli | |
239 out SPL, r30 | |
240 out SREG, r0 | |
241 out SPH, r31 | |
242 pop r17 | |
243 pop r16 | |
244 pop r13 | |
245 pop r12 | |
246 pop r11 | |
247 pop r10 | |
248 pop r9 | |
249 pop r8 | |
250 ret | |
251 | |
252 ;########################################################### | |
253 | |
254 | |
255 ; block MUST NOT be larger than 64 bytes | |
256 | |
257 .global sha1_lastBlock | |
258 ; === sha1_lastBlock === | |
259 ; this function does padding & Co. for calculating SHA-1 hashes | |
260 ; param1: the 16-bit pointer to sha1_ctx structure | |
261 ; given in r25,r24 (r25 is most significant) | |
262 ; param2: an 16-bit pointer to 64 byte block to hash | |
263 ; given in r23,r22 | |
264 ; param3: an 16-bit integer specifing length of block in bits | |
265 ; given in r21,r20 | |
266 sha1_lastBlock_localSpace = (SHA1_BLOCK_BITS/8+1) | |
267 | |
268 | |
269 sha1_lastBlock: | |
270 cpi r21, 0x02 | |
271 brlo sha1_lastBlock_prolog | |
272 push r25 | |
273 push r24 | |
274 push r23 | |
275 push r22 | |
276 push r21 | |
277 push r20 | |
278 rcall sha1_nextBlock | |
279 pop r20 | |
280 pop r21 | |
281 pop r22 | |
282 pop r23 | |
283 pop r24 | |
284 pop r25 | |
285 subi r21, 2 | |
286 ldi r19, 64 | |
287 add r22, r19 | |
288 adc r23, r1 | |
289 rjmp sha1_lastBlock | |
290 sha1_lastBlock_prolog: | |
291 /* allocate space on stack */ | |
292 in r30, SPL | |
293 in r31, SPH | |
294 in r0, SREG | |
295 subi r30, lo8(64) | |
296 sbci r31, hi8(64) /* ??? */ | |
297 cli | |
298 out SPL, r30 | |
299 out SREG, r0 | |
300 out SPH, r31 | |
301 | |
302 adiw r30, 1 /* SP points to next free byte on stack */ | |
303 mov r18, r20 /* r20 = LSB(length) */ | |
304 lsr r18 | |
305 lsr r18 | |
306 lsr r18 | |
307 bst r21, 0 /* may be we should explain this ... */ | |
308 bld r18, 5 /* now: r18 == length/8 (aka. length in bytes) */ | |
309 | |
310 | |
311 movw r26, r22 /* X points to begin of msg */ | |
312 tst r18 | |
313 breq sha1_lastBlock_post_copy | |
314 mov r1, r18 | |
315 sha1_lastBlock_copy_loop: | |
316 ld r0, X+ | |
317 st Z+, r0 | |
318 dec r1 | |
319 brne sha1_lastBlock_copy_loop | |
320 sha1_lastBlock_post_copy: | |
321 sha1_lastBlock_insert_stuffing_bit: | |
322 ldi r19, 0x80 | |
323 mov r0,r19 | |
324 ldi r19, 0x07 | |
325 and r19, r20 /* if we are in bitmode */ | |
326 breq 2f /* no bitmode */ | |
327 1: | |
328 lsr r0 | |
329 dec r19 | |
330 brne 1b | |
331 ld r19, X | |
332 /* maybe we should do some ANDing here, just for safety */ | |
333 or r0, r19 | |
334 2: | |
335 st Z+, r0 | |
336 inc r18 | |
337 | |
338 /* checking stuff here */ | |
339 cpi r18, 64-8+1 | |
340 brsh 0f | |
341 rjmp sha1_lastBlock_insert_zeros | |
342 0: | |
343 /* oh shit, we landed here */ | |
344 /* first we have to fill it up with zeros */ | |
345 ldi r19, 64 | |
346 sub r19, r18 | |
347 breq 2f | |
348 1: | |
349 st Z+, r1 | |
350 dec r19 | |
351 brne 1b | |
352 2: | |
353 sbiw r30, 63 | |
354 sbiw r30, 1 | |
355 movw r22, r30 | |
356 | |
357 push r31 | |
358 push r30 | |
359 push r25 | |
360 push r24 | |
361 push r21 | |
362 push r20 | |
363 rcall sha1_nextBlock | |
364 pop r20 | |
365 pop r21 | |
366 pop r24 | |
367 pop r25 | |
368 pop r30 | |
369 pop r31 | |
370 | |
371 /* now we should subtract 512 from length */ | |
372 movw r26, r24 | |
373 adiw r26, 4*5+1 /* we can skip the lowest byte */ | |
374 ld r19, X | |
375 subi r19, hi8(512) | |
376 st X+, r19 | |
377 ldi r18, 6 | |
378 1: | |
379 ld r19, X | |
380 sbci r19, 0 | |
381 st X+, r19 | |
382 dec r18 | |
383 brne 1b | |
384 | |
385 ; clr r18 /* not neccessary ;-) */ | |
386 /* reset Z pointer to begin of block */ | |
387 | |
388 sha1_lastBlock_insert_zeros: | |
389 ldi r19, 64-8 | |
390 sub r19, r18 | |
391 breq sha1_lastBlock_insert_length | |
392 clr r1 | |
393 1: | |
394 st Z+, r1 /* r1 is still zero */ | |
395 dec r19 | |
396 brne 1b | |
397 | |
398 ; rjmp sha1_lastBlock_epilog | |
399 sha1_lastBlock_insert_length: | |
400 movw r26, r24 /* X points to state */ | |
401 adiw r26, 5*4 /* X points to (state.length) */ | |
402 adiw r30, 8 /* Z points one after the last byte of block */ | |
403 ld r0, X+ | |
404 add r0, r20 | |
405 st -Z, r0 | |
406 ld r0, X+ | |
407 adc r0, r21 | |
408 st -Z, r0 | |
409 ldi r19, 6 | |
410 1: | |
411 ld r0, X+ | |
412 adc r0, r1 | |
413 st -Z, r0 | |
414 dec r19 | |
415 brne 1b | |
416 | |
417 sbiw r30, 64-8 | |
418 movw r22, r30 | |
419 rcall sha1_nextBlock | |
420 | |
421 sha1_lastBlock_epilog: | |
422 in r30, SPL | |
423 in r31, SPH | |
424 in r0, SREG | |
425 adiw r30, 63 ; lo8(64) | |
426 adiw r30, 1 ; hi8(64) | |
427 cli | |
428 out SPL, r30 | |
429 out SREG, r0 | |
430 out SPH, r31 | |
431 clr r1 | |
432 ret | |
433 | |
434 /**/ | |
435 ;########################################################### | |
436 | |
437 .global sha1_nextBlock | |
438 ; === sha1_nextBlock === | |
439 ; this is the core function for calculating SHA-1 hashes | |
440 ; param1: the 16-bit pointer to sha1_ctx structure | |
441 ; given in r25,r24 (r25 is most significant) | |
442 ; param2: an 16-bit pointer to 64 byte block to hash | |
443 ; given in r23,r22 | |
444 sha1_nextBlock_localSpace = (16+5+1)*4 ; 16 32-bit values for w array and 5 32-bit values for a array (total 84 byte) | |
445 | |
446 xtmp = 0 | |
447 xNULL = 1 | |
448 W1 = 10 | |
449 W2 = 11 | |
450 T1 = 12 | |
451 T2 = 13 | |
452 T3 = 14 | |
453 T4 = 15 | |
454 LoopC = 16 | |
455 S = 17 | |
456 tmp1 = 18 | |
457 tmp2 = 19 | |
458 tmp3 = 20 | |
459 tmp4 = 21 | |
460 F1 = 22 | |
461 F2 = 23 | |
462 F3 = 24 | |
463 F4 = 25 | |
464 | |
465 /* byteorder: high number <--> high significance */ | |
466 sha1_nextBlock: | |
467 ; initial, let's make some space ready for local vars | |
468 /* replace push & pop by mem ops? */ | |
469 push r10 | |
470 push r11 | |
471 push r12 | |
472 push r13 | |
473 push r14 | |
474 push r15 | |
475 push r16 | |
476 push r17 | |
477 push r28 | |
478 push r29 | |
479 in r20, SPL | |
480 in r21, SPH | |
481 movw r18, r20 ;backup SP | |
482 ; movw r26, r20 ; X points to free space on stack /* maybe removeable? */ | |
483 movw r30, r22 ; Z points to message | |
484 subi r20, lo8(sha1_nextBlock_localSpace) ;sbiw can do only up to 63 | |
485 sbci r21, hi8(sha1_nextBlock_localSpace) | |
486 movw r26, r20 ; X points to free space on stack | |
487 in r0, SREG | |
488 cli ; we want to be uninterrupted while updating SP | |
489 out SPL, r20 | |
490 out SREG, r0 | |
491 out SPH, r21 | |
492 | |
493 push r18 | |
494 push r19 /* push old SP on new stack */ | |
495 push r24 | |
496 push r25 /* param1 will be needed later */ | |
497 | |
498 /* load a[] with state */ | |
499 movw 28, r24 /* load pointer to state in Y */ | |
500 adiw r26, 1 ; X++ | |
501 | |
502 ldi LoopC, 5*4 | |
503 1: ld tmp1, Y+ | |
504 st X+, tmp1 | |
505 dec LoopC | |
506 brne 1b | |
507 | |
508 movw W1, r26 /* save pointer to w[0] */ | |
509 /* load w[] with endian fixed message */ | |
510 /* we might also use the changeendian32() function at bottom */ | |
511 movw r30, r22 /* mv param2 (ponter to msg) to Z */ | |
512 ldi LoopC, 16 | |
513 1: | |
514 ldd tmp1, Z+3 | |
515 st X+, tmp1 | |
516 ldd tmp1, Z+2 | |
517 st X+, tmp1 | |
518 ldd tmp1, Z+1 | |
519 st X+, tmp1 | |
520 ld tmp1, Z | |
521 st X+, tmp1 | |
522 adiw r30, 4 | |
523 dec LoopC | |
524 brne 1b | |
525 | |
526 ;clr LoopC /* LoopC is named t in FIPS 180-2 */ | |
527 clr xtmp | |
528 sha1_nextBlock_mainloop: | |
529 mov S, LoopC | |
530 lsl S | |
531 lsl S | |
532 andi S, 0x3C /* S is a bytepointer so *4 */ | |
533 /* load w[s] */ | |
534 movw r26, W1 | |
535 add r26, S /* X points at w[s] */ | |
536 adc r27, xNULL | |
537 ld T1, X+ | |
538 ld T2, X+ | |
539 ld T3, X+ | |
540 ld T4, X+ | |
541 | |
542 /* | |
543 push r26 | |
544 push r27 | |
545 push T4 | |
546 push T3 | |
547 push T2 | |
548 push T1 | |
549 in r26, SPL | |
550 in r27, SPH | |
551 adiw r26, 1 | |
552 dbg_hexdump 4 | |
553 pop T1 | |
554 pop T2 | |
555 pop T3 | |
556 pop T4 | |
557 pop r27 | |
558 pop r26 | |
559 */ | |
560 | |
561 cpi LoopC, 16 | |
562 brlt sha1_nextBlock_mainloop_core | |
563 /* update w[s] */ | |
564 ldi tmp1, 2*4 | |
565 rcall 1f | |
566 ldi tmp1, 8*4 | |
567 rcall 1f | |
568 ldi tmp1, 13*4 | |
569 rcall 1f | |
570 rjmp 2f | |
571 1: /* this might be "outsourced" to save the jump above */ | |
572 add tmp1, S | |
573 andi tmp1, 0x3f | |
574 movw r26, W1 | |
575 add r26, tmp1 | |
576 adc r27, xNULL | |
577 ld tmp2, X+ | |
578 eor T1, tmp2 | |
579 ld tmp2, X+ | |
580 eor T2, tmp2 | |
581 ld tmp2, X+ | |
582 eor T3, tmp2 | |
583 ld tmp2, X+ | |
584 eor T4, tmp2 | |
585 ret | |
586 2: /* now we just hav to do a ROTL(T) and save T back */ | |
587 mov tmp2, T4 | |
588 rol tmp2 | |
589 rol T1 | |
590 rol T2 | |
591 rol T3 | |
592 rol T4 | |
593 movw r26, W1 | |
594 add r26, S | |
595 adc r27, xNULL | |
596 st X+, T1 | |
597 st X+, T2 | |
598 st X+, T3 | |
599 st X+, T4 | |
600 | |
601 sha1_nextBlock_mainloop_core: /* ther core function; T=ROTL5(a) ....*/ | |
602 /* T already contains w[s] */ | |
603 movw r26, W1 | |
604 sbiw r26, 4*1 /* X points at a[4] aka e */ | |
605 ld tmp1, X+ | |
606 add T1, tmp1 | |
607 ld tmp1, X+ | |
608 adc T2, tmp1 | |
609 ld tmp1, X+ | |
610 adc T3, tmp1 | |
611 ld tmp1, X+ | |
612 adc T4, tmp1 /* T = w[s]+e */ | |
613 sbiw r26, 4*5 /* X points at a[0] aka a */ | |
614 ld F1, X+ | |
615 ld F2, X+ | |
616 ld F3, X+ | |
617 ld F4, X+ | |
618 mov tmp1, F4 /* X points at a[1] aka b */ | |
619 ldi tmp2, 5 | |
620 1: | |
621 rol tmp1 | |
622 rol F1 | |
623 rol F2 | |
624 rol F3 | |
625 rol F4 | |
626 dec tmp2 | |
627 brne 1b | |
628 | |
629 add T1, F1 | |
630 adc T2, F2 | |
631 adc T3, F3 | |
632 adc T4, F4 /* T = ROTL(a,5) + e + w[s] */ | |
633 | |
634 /* now we have to do this fucking conditional stuff */ | |
635 ldi r30, lo8(sha1_nextBlock_xTable) | |
636 ldi r31, hi8(sha1_nextBlock_xTable) | |
637 add r30, xtmp | |
638 adc r31, xNULL | |
639 lpm tmp1, Z | |
640 cp tmp1, LoopC | |
641 brne 1f | |
642 inc xtmp | |
643 1: ldi r30, lo8(sha1_nextBlock_KTable) | |
644 ldi r31, hi8(sha1_nextBlock_KTable) | |
645 lsl xtmp | |
646 lsl xtmp | |
647 add r30, xtmp | |
648 adc r31, xNULL | |
649 lsr xtmp | |
650 lsr xtmp | |
651 | |
652 lpm tmp1, Z+ | |
653 add T1, tmp1 | |
654 lpm tmp1, Z+ | |
655 adc T2, tmp1 | |
656 lpm tmp1, Z+ | |
657 adc T3, tmp1 | |
658 lpm tmp1, Z+ | |
659 adc T4, tmp1 | |
660 /* T = ROTL(a,5) + e + kt + w[s] */ | |
661 | |
662 /* Z-4 is just pointing to kt ... */ | |
663 movw r28, r26 /* copy X in Y */ | |
664 adiw r30, 3*4 /* now Z points to the rigth locatin in our jump-vector-table */ | |
665 lsr r31 | |
666 ror r30 | |
667 | |
668 icall | |
669 mov F1, tmp1 | |
670 icall | |
671 mov F2, tmp1 | |
672 icall | |
673 mov F3, tmp1 | |
674 icall | |
675 | |
676 add T1, F1 | |
677 adc T2, F2 | |
678 adc T3, F3 | |
679 adc T4, tmp1 /* T = ROTL5(a) + f_t(b,c,d) + e + k_t + w[s] */ | |
680 /* X points still at a[1] aka b, Y points at a[2] aka c */ | |
681 /* update a[] */ | |
682 sha1_nextBlock_update_a: | |
683 /*first we move all vars in a[] "one up" e=d, d=c, c=b, b=a*/ | |
684 //adiw r28, 3*4 /* Y should point at a[4] aka e */ | |
685 movw r28, W1 | |
686 sbiw r28, 4 | |
687 | |
688 ldi tmp2, 4*4 | |
689 1: | |
690 ld tmp1, -Y | |
691 std Y+4, tmp1 | |
692 dec tmp2 | |
693 brne 1b | |
694 /* Y points at a[0] aka a*/ | |
695 | |
696 movw r28, W1 | |
697 sbiw r28, 5*4 | |
698 /* store T in a[0] aka a */ | |
699 st Y+, T1 | |
700 st Y+, T2 | |
701 st Y+, T3 | |
702 st Y+, T4 | |
703 /* Y points at a[1] aka b*/ | |
704 | |
705 /* rotate c */ | |
706 ldd T1, Y+1*4 | |
707 ldd T2, Y+1*4+1 | |
708 ldd T3, Y+1*4+2 | |
709 ldd T4, Y+1*4+3 | |
710 mov tmp1, T1 | |
711 ldi tmp2, 2 | |
712 1: ror tmp1 | |
713 ror T4 | |
714 ror T3 | |
715 ror T2 | |
716 ror T1 | |
717 dec tmp2 | |
718 brne 1b | |
719 std Y+1*4+0, T1 | |
720 std Y+1*4+1, T2 | |
721 std Y+1*4+2, T3 | |
722 std Y+1*4+3, T4 | |
723 /* | |
724 push r27 | |
725 push r26 | |
726 movw r26, W1 | |
727 sbiw r26, 4*5 | |
728 dbg_hexdump 4*5 | |
729 pop r26 | |
730 pop r27 | |
731 */ | |
732 inc LoopC | |
733 cpi LoopC, 80 | |
734 brge 1f | |
735 rjmp sha1_nextBlock_mainloop | |
736 /**************************************/ | |
737 1: | |
738 /* littel patch */ | |
739 sbiw r28, 4 | |
740 | |
741 /* add a[] to state and inc length */ | |
742 pop r27 | |
743 pop r26 /* now X points to state (and Y still at a[0]) */ | |
744 ldi tmp4, 5 | |
745 1: clc | |
746 ldi tmp3, 4 | |
747 2: ld tmp1, X | |
748 ld tmp2, Y+ | |
749 adc tmp1, tmp2 | |
750 st X+, tmp1 | |
751 dec tmp3 | |
752 brne 2b | |
753 dec tmp4 | |
754 brne 1b | |
755 | |
756 /* now length += 512 */ | |
757 adiw r26, 1 /* we skip the least significant byte */ | |
758 ld tmp1, X | |
759 ldi tmp2, hi8(512) /* 2 */ | |
760 add tmp1, tmp2 | |
761 st X+, tmp1 | |
762 ldi tmp2, 6 | |
763 1: | |
764 ld tmp1, X | |
765 adc tmp1, xNULL | |
766 st X+, tmp1 | |
767 dec tmp2 | |
768 brne 1b | |
769 | |
770 ; EPILOG | |
771 sha1_nextBlock_epilog: | |
772 /* now we should clean up the stack */ | |
773 pop r21 | |
774 pop r20 | |
775 in r0, SREG | |
776 cli ; we want to be uninterrupted while updating SP | |
777 out SPL, r20 | |
778 out SREG, r0 | |
779 out SPH, r21 | |
780 | |
781 clr r1 | |
782 pop r29 | |
783 pop r28 | |
784 pop r17 | |
785 pop r16 | |
786 pop r15 | |
787 pop r14 | |
788 pop r13 | |
789 pop r12 | |
790 pop r11 | |
791 pop r10 | |
792 ret | |
793 | |
794 sha1_nextBlock_xTable: | |
795 .byte 20,40,60,0 | |
796 sha1_nextBlock_KTable: | |
797 .int 0x5a827999 | |
798 .int 0x6ed9eba1 | |
799 .int 0x8f1bbcdc | |
800 .int 0xca62c1d6 | |
801 sha1_nextBlock_JumpTable: | |
802 rjmp sha1_nextBlock_Ch | |
803 nop | |
804 rjmp sha1_nextBlock_Parity | |
805 nop | |
806 rjmp sha1_nextBlock_Maj | |
807 nop | |
808 rjmp sha1_nextBlock_Parity | |
809 | |
810 /* X and Y still point at a[1] aka b ; return value in tmp1 */ | |
811 sha1_nextBlock_Ch: | |
812 ld tmp1, Y+ | |
813 mov tmp2, tmp1 | |
814 com tmp2 | |
815 ldd tmp3, Y+3 /* load from c */ | |
816 and tmp1, tmp3 | |
817 ldd tmp3, Y+7 /* load from d */ | |
818 and tmp2, tmp3 | |
819 eor tmp1, tmp2 | |
820 ret | |
821 | |
822 sha1_nextBlock_Maj: | |
823 ld tmp1, Y+ | |
824 mov tmp2, tmp1 | |
825 ldd tmp3, Y+3 /* load from c */ | |
826 and tmp1, tmp3 | |
827 ldd tmp4, Y+7 /* load from d */ | |
828 and tmp2, tmp4 | |
829 eor tmp1, tmp2 | |
830 and tmp3, tmp4 | |
831 eor tmp1, tmp3 | |
832 ret | |
833 | |
834 sha1_nextBlock_Parity: | |
835 ld tmp1, Y+ | |
836 ldd tmp2, Y+3 /* load from c */ | |
837 eor tmp1, tmp2 | |
838 ldd tmp2, Y+7 /* load from d */ | |
839 eor tmp1, tmp2 | |
840 ret | |
841 /* | |
842 ch_str: .asciz "\r\nCh" | |
843 maj_str: .asciz "\r\nMaj" | |
844 parity_str: .asciz "\r\nParity" | |
845 */ | |
846 ;########################################################### | |
847 | |
848 .global sha1_init | |
849 ;void sha1_init(sha1_ctx_t *state){ | |
850 ; DEBUG_S("\r\nSHA1_INIT"); | |
851 ; state->h[0] = 0x67452301; | |
852 ; state->h[1] = 0xefcdab89; | |
853 ; state->h[2] = 0x98badcfe; | |
854 ; state->h[3] = 0x10325476; | |
855 ; state->h[4] = 0xc3d2e1f0; | |
856 ; state->length = 0; | |
857 ;} | |
858 ; param1: (Func3,r24) 16-bit pointer to sha1_ctx_t struct in ram | |
859 ; modifys: Z(r30,r31), Func1, r22 | |
860 sha1_init: | |
861 movw r26, r24 ; (24,25) --> (26,27) load X with param1 | |
862 ldi r30, lo8((sha1_init_vector)) | |
863 ldi r31, hi8((sha1_init_vector)) | |
864 ldi r22, 5*4 /* bytes to copy */ | |
865 sha1_init_vloop: | |
866 lpm r23, Z+ | |
867 st X+, r23 | |
868 dec r22 | |
869 brne sha1_init_vloop | |
870 ldi r22, 8 | |
871 sha1_init_lloop: | |
872 st X+, r1 | |
873 dec r22 | |
874 brne sha1_init_lloop | |
875 ret | |
876 | |
877 sha1_init_vector: | |
878 .int 0x67452301; | |
879 .int 0xefcdab89; | |
880 .int 0x98badcfe; | |
881 .int 0x10325476; | |
882 .int 0xc3d2e1f0; | |
883 |