Mercurial > dropbear
comparison bn_fast_s_mp_sqr.c @ 190:d8254fc979e9 libtommath-orig LTM_0.35
Initial import of libtommath 0.35
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Fri, 06 May 2005 08:59:30 +0000 |
parents | d29b64170cf0 |
children |
comparison
equal
deleted
inserted
replaced
142:d29b64170cf0 | 190:d8254fc979e9 |
---|---|
13 * guarantee it works. | 13 * guarantee it works. |
14 * | 14 * |
15 * Tom St Denis, [email protected], http://math.libtomcrypt.org | 15 * Tom St Denis, [email protected], http://math.libtomcrypt.org |
16 */ | 16 */ |
17 | 17 |
18 /* fast squaring | |
19 * | |
20 * This is the comba method where the columns of the product | |
21 * are computed first then the carries are computed. This | |
22 * has the effect of making a very simple inner loop that | |
23 * is executed the most | |
24 * | |
25 * W2 represents the outer products and W the inner. | |
26 * | |
27 * A further optimizations is made because the inner | |
28 * products are of the form "A * B * 2". The *2 part does | |
29 * not need to be computed until the end which is good | |
30 * because 64-bit shifts are slow! | |
31 * | |
32 * Based on Algorithm 14.16 on pp.597 of HAC. | |
33 * | |
34 */ | |
35 /* the jist of squaring... | 18 /* the jist of squaring... |
36 | 19 * you do like mult except the offset of the tmpx [one that |
37 you do like mult except the offset of the tmpx [one that starts closer to zero] | 20 * starts closer to zero] can't equal the offset of tmpy. |
38 can't equal the offset of tmpy. So basically you set up iy like before then you min it with | 21 * So basically you set up iy like before then you min it with |
39 (ty-tx) so that it never happens. You double all those you add in the inner loop | 22 * (ty-tx) so that it never happens. You double all those |
23 * you add in the inner loop | |
40 | 24 |
41 After that loop you do the squares and add them in. | 25 After that loop you do the squares and add them in. |
42 | |
43 Remove W2 and don't memset W | |
44 | |
45 */ | 26 */ |
46 | 27 |
47 int fast_s_mp_sqr (mp_int * a, mp_int * b) | 28 int fast_s_mp_sqr (mp_int * a, mp_int * b) |
48 { | 29 { |
49 int olduse, res, pa, ix, iz; | 30 int olduse, res, pa, ix, iz; |
58 } | 39 } |
59 } | 40 } |
60 | 41 |
61 /* number of output digits to produce */ | 42 /* number of output digits to produce */ |
62 W1 = 0; | 43 W1 = 0; |
63 for (ix = 0; ix <= pa; ix++) { | 44 for (ix = 0; ix < pa; ix++) { |
64 int tx, ty, iy; | 45 int tx, ty, iy; |
65 mp_word _W; | 46 mp_word _W; |
66 mp_digit *tmpy; | 47 mp_digit *tmpy; |
67 | 48 |
68 /* clear counter */ | 49 /* clear counter */ |
74 | 55 |
75 /* setup temp aliases */ | 56 /* setup temp aliases */ |
76 tmpx = a->dp + tx; | 57 tmpx = a->dp + tx; |
77 tmpy = a->dp + ty; | 58 tmpy = a->dp + ty; |
78 | 59 |
79 /* this is the number of times the loop will iterrate, essentially its | 60 /* this is the number of times the loop will iterrate, essentially |
80 while (tx++ < a->used && ty-- >= 0) { ... } | 61 while (tx++ < a->used && ty-- >= 0) { ... } |
81 */ | 62 */ |
82 iy = MIN(a->used-tx, ty+1); | 63 iy = MIN(a->used-tx, ty+1); |
83 | 64 |
84 /* now for squaring tx can never equal ty | 65 /* now for squaring tx can never equal ty |
99 if ((ix&1) == 0) { | 80 if ((ix&1) == 0) { |
100 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); | 81 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); |
101 } | 82 } |
102 | 83 |
103 /* store it */ | 84 /* store it */ |
104 W[ix] = _W; | 85 W[ix] = (mp_digit)(_W & MP_MASK); |
105 | 86 |
106 /* make next carry */ | 87 /* make next carry */ |
107 W1 = _W >> ((mp_word)DIGIT_BIT); | 88 W1 = _W >> ((mp_word)DIGIT_BIT); |
108 } | 89 } |
109 | 90 |