comparison bn_fast_s_mp_sqr.c @ 190:d8254fc979e9 libtommath-orig LTM_0.35

Initial import of libtommath 0.35
author Matt Johnston <matt@ucc.asn.au>
date Fri, 06 May 2005 08:59:30 +0000
parents d29b64170cf0
children
comparison
equal deleted inserted replaced
142:d29b64170cf0 190:d8254fc979e9
13 * guarantee it works. 13 * guarantee it works.
14 * 14 *
15 * Tom St Denis, [email protected], http://math.libtomcrypt.org 15 * Tom St Denis, [email protected], http://math.libtomcrypt.org
16 */ 16 */
17 17
18 /* fast squaring
19 *
20 * This is the comba method where the columns of the product
21 * are computed first then the carries are computed. This
22 * has the effect of making a very simple inner loop that
23 * is executed the most
24 *
25 * W2 represents the outer products and W the inner.
26 *
27 * A further optimizations is made because the inner
28 * products are of the form "A * B * 2". The *2 part does
29 * not need to be computed until the end which is good
30 * because 64-bit shifts are slow!
31 *
32 * Based on Algorithm 14.16 on pp.597 of HAC.
33 *
34 */
35 /* the jist of squaring... 18 /* the jist of squaring...
36 19 * you do like mult except the offset of the tmpx [one that
37 you do like mult except the offset of the tmpx [one that starts closer to zero] 20 * starts closer to zero] can't equal the offset of tmpy.
38 can't equal the offset of tmpy. So basically you set up iy like before then you min it with 21 * So basically you set up iy like before then you min it with
39 (ty-tx) so that it never happens. You double all those you add in the inner loop 22 * (ty-tx) so that it never happens. You double all those
23 * you add in the inner loop
40 24
41 After that loop you do the squares and add them in. 25 After that loop you do the squares and add them in.
42
43 Remove W2 and don't memset W
44
45 */ 26 */
46 27
47 int fast_s_mp_sqr (mp_int * a, mp_int * b) 28 int fast_s_mp_sqr (mp_int * a, mp_int * b)
48 { 29 {
49 int olduse, res, pa, ix, iz; 30 int olduse, res, pa, ix, iz;
58 } 39 }
59 } 40 }
60 41
61 /* number of output digits to produce */ 42 /* number of output digits to produce */
62 W1 = 0; 43 W1 = 0;
63 for (ix = 0; ix <= pa; ix++) { 44 for (ix = 0; ix < pa; ix++) {
64 int tx, ty, iy; 45 int tx, ty, iy;
65 mp_word _W; 46 mp_word _W;
66 mp_digit *tmpy; 47 mp_digit *tmpy;
67 48
68 /* clear counter */ 49 /* clear counter */
74 55
75 /* setup temp aliases */ 56 /* setup temp aliases */
76 tmpx = a->dp + tx; 57 tmpx = a->dp + tx;
77 tmpy = a->dp + ty; 58 tmpy = a->dp + ty;
78 59
79 /* this is the number of times the loop will iterrate, essentially its 60 /* this is the number of times the loop will iterrate, essentially
80 while (tx++ < a->used && ty-- >= 0) { ... } 61 while (tx++ < a->used && ty-- >= 0) { ... }
81 */ 62 */
82 iy = MIN(a->used-tx, ty+1); 63 iy = MIN(a->used-tx, ty+1);
83 64
84 /* now for squaring tx can never equal ty 65 /* now for squaring tx can never equal ty
99 if ((ix&1) == 0) { 80 if ((ix&1) == 0) {
100 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); 81 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
101 } 82 }
102 83
103 /* store it */ 84 /* store it */
104 W[ix] = _W; 85 W[ix] = (mp_digit)(_W & MP_MASK);
105 86
106 /* make next carry */ 87 /* make next carry */
107 W1 = _W >> ((mp_word)DIGIT_BIT); 88 W1 = _W >> ((mp_word)DIGIT_BIT);
108 } 89 }
109 90