annotate bn_fast_s_mp_sqr.c @ 142:d29b64170cf0 libtommath-orig

import of libtommath 0.32
author Matt Johnston <matt@ucc.asn.au>
date Sun, 19 Dec 2004 11:33:56 +0000
parents 86e0b50a9b58
children d8254fc979e9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
1 #include <tommath.h>
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
2 #ifdef BN_FAST_S_MP_SQR_C
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
4 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
5 * LibTomMath is a library that provides multiple-precision
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
6 * integer arithmetic as well as number theoretic functionality.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
7 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
8 * The library was designed directly after the MPI library by
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
9 * Michael Fromberger but has been written from scratch with
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
10 * additional optimizations in place.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
11 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
12 * The library is free for all purposes without any express
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
13 * guarantee it works.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
14 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
15 * Tom St Denis, [email protected], http://math.libtomcrypt.org
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
16 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
17
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
18 /* fast squaring
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
19 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
20 * This is the comba method where the columns of the product
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
21 * are computed first then the carries are computed. This
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
22 * has the effect of making a very simple inner loop that
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
23 * is executed the most
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
24 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
25 * W2 represents the outer products and W the inner.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
26 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
27 * A further optimizations is made because the inner
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
28 * products are of the form "A * B * 2". The *2 part does
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
29 * not need to be computed until the end which is good
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
30 * because 64-bit shifts are slow!
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
31 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
32 * Based on Algorithm 14.16 on pp.597 of HAC.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
33 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
34 */
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
35 /* the jist of squaring...
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
36
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
37 you do like mult except the offset of the tmpx [one that starts closer to zero]
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
38 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
39 (ty-tx) so that it never happens. You double all those you add in the inner loop
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
40
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
41 After that loop you do the squares and add them in.
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
42
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
43 Remove W2 and don't memset W
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
44
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
45 */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
46
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
47 int fast_s_mp_sqr (mp_int * a, mp_int * b)
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
48 {
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
49 int olduse, res, pa, ix, iz;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
50 mp_digit W[MP_WARRAY], *tmpx;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
51 mp_word W1;
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
52
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
53 /* grow the destination as required */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
54 pa = a->used + a->used;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
55 if (b->alloc < pa) {
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
56 if ((res = mp_grow (b, pa)) != MP_OKAY) {
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
57 return res;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
58 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
59 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
60
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
61 /* number of output digits to produce */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
62 W1 = 0;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
63 for (ix = 0; ix <= pa; ix++) {
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
64 int tx, ty, iy;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
65 mp_word _W;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
66 mp_digit *tmpy;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
67
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
68 /* clear counter */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
69 _W = 0;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
70
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
71 /* get offsets into the two bignums */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
72 ty = MIN(a->used-1, ix);
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
73 tx = ix - ty;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
74
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
75 /* setup temp aliases */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
76 tmpx = a->dp + tx;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
77 tmpy = a->dp + ty;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
78
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
79 /* this is the number of times the loop will iterrate, essentially its
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
80 while (tx++ < a->used && ty-- >= 0) { ... }
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
81 */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
82 iy = MIN(a->used-tx, ty+1);
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
83
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
84 /* now for squaring tx can never equal ty
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
85 * we halve the distance since they approach at a rate of 2x
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
86 * and we have to round because odd cases need to be executed
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
87 */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
88 iy = MIN(iy, (ty-tx+1)>>1);
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
89
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
90 /* execute loop */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
91 for (iz = 0; iz < iy; iz++) {
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
92 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
93 }
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
94
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
95 /* double the inner product and add carry */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
96 _W = _W + _W + W1;
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
97
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
98 /* even columns have the square term in them */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
99 if ((ix&1) == 0) {
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
100 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
101 }
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
102
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
103 /* store it */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
104 W[ix] = _W;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
105
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
106 /* make next carry */
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
107 W1 = _W >> ((mp_word)DIGIT_BIT);
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
108 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
109
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
110 /* setup dest */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
111 olduse = b->used;
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
112 b->used = a->used+a->used;
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
113
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
114 {
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
115 mp_digit *tmpb;
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
116 tmpb = b->dp;
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
117 for (ix = 0; ix < pa; ix++) {
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
118 *tmpb++ = W[ix] & MP_MASK;
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
119 }
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
120
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
121 /* clear unused digits [that existed in the old copy of c] */
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
122 for (; ix < olduse; ix++) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
123 *tmpb++ = 0;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
124 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
125 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
126 mp_clamp (b);
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
127 return MP_OKAY;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
128 }
142
d29b64170cf0 import of libtommath 0.32
Matt Johnston <matt@ucc.asn.au>
parents: 2
diff changeset
129 #endif