comparison libtommath/bn_mp_toom_mul.c @ 1436:60fc6476e044

Update to libtommath v1.0
author Matt Johnston <matt@ucc.asn.au>
date Sat, 24 Jun 2017 22:37:14 +0800
parents 5ff8218bcee9
children 8bba51a55704
comparison
equal deleted inserted replaced
1435:f849a5ca2efc 1436:60fc6476e044
1 #include <tommath.h> 1 #include <tommath_private.h>
2 #ifdef BN_MP_TOOM_MUL_C 2 #ifdef BN_MP_TOOM_MUL_C
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis 3 /* LibTomMath, multiple-precision integer library -- Tom St Denis
4 * 4 *
5 * LibTomMath is a library that provides multiple-precision 5 * LibTomMath is a library that provides multiple-precision
6 * integer arithmetic as well as number theoretic functionality. 6 * integer arithmetic as well as number theoretic functionality.
10 * additional optimizations in place. 10 * additional optimizations in place.
11 * 11 *
12 * The library is free for all purposes without any express 12 * The library is free for all purposes without any express
13 * guarantee it works. 13 * guarantee it works.
14 * 14 *
15 * Tom St Denis, [email protected], http://math.libtomcrypt.com 15 * Tom St Denis, [email protected], http://libtom.org
16 */ 16 */
17 17
18 /* multiplication using the Toom-Cook 3-way algorithm 18 /* multiplication using the Toom-Cook 3-way algorithm
19 * 19 *
20 * Much more complicated than Karatsuba but has a lower 20 * Much more complicated than Karatsuba but has a lower
21 * asymptotic running time of O(N**1.464). This algorithm is 21 * asymptotic running time of O(N**1.464). This algorithm is
22 * only particularly useful on VERY large inputs 22 * only particularly useful on VERY large inputs
23 * (we're talking 1000s of digits here...). 23 * (we're talking 1000s of digits here...).
24 */ 24 */
25 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) 25 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
26 { 26 {
27 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; 27 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
28 int res, B; 28 int res, B;
29 29
30 /* init temps */ 30 /* init temps */
31 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 31 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
32 &a0, &a1, &a2, &b0, &b1, 32 &a0, &a1, &a2, &b0, &b1,
33 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { 33 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
34 return res; 34 return res;
35 } 35 }
36 36
37 /* B */ 37 /* B */
38 B = MIN(a->used, b->used) / 3; 38 B = MIN(a->used, b->used) / 3;
39 39
40 /* a = a2 * B**2 + a1 * B + a0 */ 40 /* a = a2 * B**2 + a1 * B + a0 */
41 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 41 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
42 goto ERR; 42 goto ERR;
43 } 43 }
44 44
45 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 45 if ((res = mp_copy(a, &a1)) != MP_OKAY) {
46 goto ERR; 46 goto ERR;
47 } 47 }
48 mp_rshd(&a1, B); 48 mp_rshd(&a1, B);
49 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); 49 if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) {
50 goto ERR;
51 }
50 52
51 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 53 if ((res = mp_copy(a, &a2)) != MP_OKAY) {
52 goto ERR; 54 goto ERR;
53 } 55 }
54 mp_rshd(&a2, B*2); 56 mp_rshd(&a2, B*2);
55 57
56 /* b = b2 * B**2 + b1 * B + b0 */ 58 /* b = b2 * B**2 + b1 * B + b0 */
57 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { 59 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
58 goto ERR; 60 goto ERR;
59 } 61 }
60 62
61 if ((res = mp_copy(b, &b1)) != MP_OKAY) { 63 if ((res = mp_copy(b, &b1)) != MP_OKAY) {
62 goto ERR; 64 goto ERR;
63 } 65 }
64 mp_rshd(&b1, B); 66 mp_rshd(&b1, B);
65 mp_mod_2d(&b1, DIGIT_BIT * B, &b1); 67 (void)mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
66 68
67 if ((res = mp_copy(b, &b2)) != MP_OKAY) { 69 if ((res = mp_copy(b, &b2)) != MP_OKAY) {
68 goto ERR; 70 goto ERR;
69 } 71 }
70 mp_rshd(&b2, B*2); 72 mp_rshd(&b2, B*2);
71 73
72 /* w0 = a0*b0 */ 74 /* w0 = a0*b0 */
73 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { 75 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
74 goto ERR; 76 goto ERR;
75 } 77 }
76 78
77 /* w4 = a2 * b2 */ 79 /* w4 = a2 * b2 */
78 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { 80 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
79 goto ERR; 81 goto ERR;
80 } 82 }
81 83
82 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ 84 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
83 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { 85 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
84 goto ERR; 86 goto ERR;
85 } 87 }
86 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 88 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
90 goto ERR; 92 goto ERR;
91 } 93 }
92 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 94 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
93 goto ERR; 95 goto ERR;
94 } 96 }
95 97
96 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { 98 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
97 goto ERR; 99 goto ERR;
98 } 100 }
99 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 101 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
100 goto ERR; 102 goto ERR;
103 goto ERR; 105 goto ERR;
104 } 106 }
105 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { 107 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
106 goto ERR; 108 goto ERR;
107 } 109 }
108 110
109 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { 111 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
110 goto ERR; 112 goto ERR;
111 } 113 }
112 114
113 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ 115 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
114 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { 116 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
115 goto ERR; 117 goto ERR;
116 } 118 }
117 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 119 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
121 goto ERR; 123 goto ERR;
122 } 124 }
123 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 125 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
124 goto ERR; 126 goto ERR;
125 } 127 }
126 128
127 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { 129 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
128 goto ERR; 130 goto ERR;
129 } 131 }
130 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 132 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
131 goto ERR; 133 goto ERR;
134 goto ERR; 136 goto ERR;
135 } 137 }
136 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 138 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
137 goto ERR; 139 goto ERR;
138 } 140 }
139 141
140 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { 142 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
141 goto ERR; 143 goto ERR;
142 } 144 }
143 145
144 146
145 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ 147 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
146 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { 148 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
147 goto ERR; 149 goto ERR;
148 } 150 }
156 goto ERR; 158 goto ERR;
157 } 159 }
158 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { 160 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
159 goto ERR; 161 goto ERR;
160 } 162 }
161 163
162 /* now solve the matrix 164 /* now solve the matrix
163 165
164 0 0 0 0 1 166 0 0 0 0 1
165 1 2 4 8 16 167 1 2 4 8 16
166 1 1 1 1 1 168 1 1 1 1 1
167 16 8 4 2 1 169 16 8 4 2 1
168 1 0 0 0 0 170 1 0 0 0 0
169 171
170 using 12 subtractions, 4 shifts, 172 using 12 subtractions, 4 shifts,
171 2 small divisions and 1 small multiplication 173 2 small divisions and 1 small multiplication
172 */ 174 */
173 175
174 /* r1 - r4 */ 176 /* r1 - r4 */
175 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { 177 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
176 goto ERR; 178 goto ERR;
177 } 179 }
178 /* r3 - r0 */ 180 /* r3 - r0 */
179 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { 181 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
180 goto ERR; 182 goto ERR;
181 } 183 }
182 /* r1/2 */ 184 /* r1/2 */
183 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { 185 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
184 goto ERR; 186 goto ERR;
185 } 187 }
186 /* r3/2 */ 188 /* r3/2 */
187 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { 189 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
188 goto ERR; 190 goto ERR;
189 } 191 }
190 /* r2 - r0 - r4 */ 192 /* r2 - r0 - r4 */
191 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { 193 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
192 goto ERR; 194 goto ERR;
193 } 195 }
194 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { 196 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
195 goto ERR; 197 goto ERR;
196 } 198 }
197 /* r1 - r2 */ 199 /* r1 - r2 */
198 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 200 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
199 goto ERR; 201 goto ERR;
200 } 202 }
201 /* r3 - r2 */ 203 /* r3 - r2 */
202 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 204 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
203 goto ERR; 205 goto ERR;
204 } 206 }
205 /* r1 - 8r0 */ 207 /* r1 - 8r0 */
206 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { 208 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
207 goto ERR; 209 goto ERR;
208 } 210 }
209 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { 211 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
210 goto ERR; 212 goto ERR;
211 } 213 }
212 /* r3 - 8r4 */ 214 /* r3 - 8r4 */
213 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { 215 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
214 goto ERR; 216 goto ERR;
215 } 217 }
216 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { 218 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
217 goto ERR; 219 goto ERR;
218 } 220 }
219 /* 3r2 - r1 - r3 */ 221 /* 3r2 - r1 - r3 */
220 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { 222 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
221 goto ERR; 223 goto ERR;
222 } 224 }
223 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { 225 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
224 goto ERR; 226 goto ERR;
225 } 227 }
226 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { 228 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
227 goto ERR; 229 goto ERR;
228 } 230 }
229 /* r1 - r2 */ 231 /* r1 - r2 */
230 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 232 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
231 goto ERR; 233 goto ERR;
232 } 234 }
233 /* r3 - r2 */ 235 /* r3 - r2 */
234 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 236 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
235 goto ERR; 237 goto ERR;
236 } 238 }
237 /* r1/3 */ 239 /* r1/3 */
238 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { 240 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
239 goto ERR; 241 goto ERR;
240 } 242 }
241 /* r3/3 */ 243 /* r3/3 */
242 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { 244 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
243 goto ERR; 245 goto ERR;
244 } 246 }
245 247
246 /* at this point shift W[n] by B*n */ 248 /* at this point shift W[n] by B*n */
247 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { 249 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
248 goto ERR; 250 goto ERR;
249 } 251 }
250 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { 252 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
251 goto ERR; 253 goto ERR;
252 } 254 }
253 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { 255 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
254 goto ERR; 256 goto ERR;
255 } 257 }
256 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { 258 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
257 goto ERR; 259 goto ERR;
258 } 260 }
259 261
260 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { 262 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
261 goto ERR; 263 goto ERR;
262 } 264 }
263 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { 265 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
264 goto ERR; 266 goto ERR;
265 } 267 }
266 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 268 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
267 goto ERR; 269 goto ERR;
268 } 270 }
269 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { 271 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
270 goto ERR; 272 goto ERR;
271 } 273 }
272 274
273 ERR: 275 ERR:
274 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 276 mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
275 &a0, &a1, &a2, &b0, &b1, 277 &a0, &a1, &a2, &b0, &b1,
276 &b2, &tmp1, &tmp2, NULL); 278 &b2, &tmp1, &tmp2, NULL);
277 return res; 279 return res;
278 } 280 }
279 281
280 #endif 282 #endif
281 283
282 /* $Source: /cvs/libtom/libtommath/bn_mp_toom_mul.c,v $ */ 284 /* $Source$ */
283 /* $Revision: 1.3 $ */ 285 /* $Revision$ */
284 /* $Date: 2006/03/31 14:18:44 $ */ 286 /* $Date$ */