annotate bn_fast_s_mp_sqr.c @ 2:86e0b50a9b58 libtommath-orig ltm-0.30-orig

ltm 0.30 orig import
author Matt Johnston <matt@ucc.asn.au>
date Mon, 31 May 2004 18:25:22 +0000
parents
children d29b64170cf0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
1 /* LibTomMath, multiple-precision integer library -- Tom St Denis
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
2 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
3 * LibTomMath is a library that provides multiple-precision
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
4 * integer arithmetic as well as number theoretic functionality.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
5 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
6 * The library was designed directly after the MPI library by
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
7 * Michael Fromberger but has been written from scratch with
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
8 * additional optimizations in place.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
9 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
10 * The library is free for all purposes without any express
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
11 * guarantee it works.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
12 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
13 * Tom St Denis, [email protected], http://math.libtomcrypt.org
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
14 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
15 #include <tommath.h>
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
16
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
17 /* fast squaring
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
18 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
19 * This is the comba method where the columns of the product
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
20 * are computed first then the carries are computed. This
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
21 * has the effect of making a very simple inner loop that
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
22 * is executed the most
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
23 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
24 * W2 represents the outer products and W the inner.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
25 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
26 * A further optimizations is made because the inner
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
27 * products are of the form "A * B * 2". The *2 part does
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
28 * not need to be computed until the end which is good
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
29 * because 64-bit shifts are slow!
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
30 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
31 * Based on Algorithm 14.16 on pp.597 of HAC.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
32 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
33 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
34 int fast_s_mp_sqr (mp_int * a, mp_int * b)
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
35 {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
36 int olduse, newused, res, ix, pa;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
37 mp_word W2[MP_WARRAY], W[MP_WARRAY];
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
38
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
39 /* calculate size of product and allocate as required */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
40 pa = a->used;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
41 newused = pa + pa + 1;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
42 if (b->alloc < newused) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
43 if ((res = mp_grow (b, newused)) != MP_OKAY) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
44 return res;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
45 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
46 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
47
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
48 /* zero temp buffer (columns)
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
49 * Note that there are two buffers. Since squaring requires
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
50 * a outer and inner product and the inner product requires
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
51 * computing a product and doubling it (a relatively expensive
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
52 * op to perform n**2 times if you don't have to) the inner and
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
53 * outer products are computed in different buffers. This way
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
54 * the inner product can be doubled using n doublings instead of
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
55 * n**2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
56 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
57 memset (W, 0, newused * sizeof (mp_word));
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
58 memset (W2, 0, newused * sizeof (mp_word));
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
59
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
60 /* This computes the inner product. To simplify the inner N**2 loop
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
61 * the multiplication by two is done afterwards in the N loop.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
62 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
63 for (ix = 0; ix < pa; ix++) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
64 /* compute the outer product
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
65 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
66 * Note that every outer product is computed
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
67 * for a particular column only once which means that
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
68 * there is no need todo a double precision addition
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
69 * into the W2[] array.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
70 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
71 W2[ix + ix] = ((mp_word)a->dp[ix]) * ((mp_word)a->dp[ix]);
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
72
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
73 {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
74 register mp_digit tmpx, *tmpy;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
75 register mp_word *_W;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
76 register int iy;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
77
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
78 /* copy of left side */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
79 tmpx = a->dp[ix];
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
80
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
81 /* alias for right side */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
82 tmpy = a->dp + (ix + 1);
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
83
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
84 /* the column to store the result in */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
85 _W = W + (ix + ix + 1);
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
86
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
87 /* inner products */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
88 for (iy = ix + 1; iy < pa; iy++) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
89 *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
90 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
91 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
92 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
93
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
94 /* setup dest */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
95 olduse = b->used;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
96 b->used = newused;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
97
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
98 /* now compute digits
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
99 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
100 * We have to double the inner product sums, add in the
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
101 * outer product sums, propagate carries and convert
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
102 * to single precision.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
103 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
104 {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
105 register mp_digit *tmpb;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
106
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
107 /* double first value, since the inner products are
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
108 * half of what they should be
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
109 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
110 W[0] += W[0] + W2[0];
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
111
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
112 tmpb = b->dp;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
113 for (ix = 1; ix < newused; ix++) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
114 /* double/add next digit */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
115 W[ix] += W[ix] + W2[ix];
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
116
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
117 /* propagate carry forwards [from the previous digit] */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
118 W[ix] = W[ix] + (W[ix - 1] >> ((mp_word) DIGIT_BIT));
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
119
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
120 /* store the current digit now that the carry isn't
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
121 * needed
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
122 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
123 *tmpb++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
124 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
125 /* set the last value. Note even if the carry is zero
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
126 * this is required since the next step will not zero
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
127 * it if b originally had a value at b->dp[2*a.used]
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
128 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
129 *tmpb++ = (mp_digit) (W[(newused) - 1] & ((mp_word) MP_MASK));
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
130
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
131 /* clear high digits of b if there were any originally */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
132 for (; ix < olduse; ix++) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
133 *tmpb++ = 0;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
134 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
135 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
136
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
137 mp_clamp (b);
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
138 return MP_OKAY;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
139 }