Mercurial > dropbear
comparison libtommath/bn_mp_toom_mul.c @ 1439:8d24733026c5 coverity
merge
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Sat, 24 Jun 2017 23:33:16 +0800 |
parents | 60fc6476e044 |
children | 8bba51a55704 |
comparison
equal
deleted
inserted
replaced
1400:238a439670f5 | 1439:8d24733026c5 |
---|---|
1 #include <tommath.h> | 1 #include <tommath_private.h> |
2 #ifdef BN_MP_TOOM_MUL_C | 2 #ifdef BN_MP_TOOM_MUL_C |
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis | 3 /* LibTomMath, multiple-precision integer library -- Tom St Denis |
4 * | 4 * |
5 * LibTomMath is a library that provides multiple-precision | 5 * LibTomMath is a library that provides multiple-precision |
6 * integer arithmetic as well as number theoretic functionality. | 6 * integer arithmetic as well as number theoretic functionality. |
10 * additional optimizations in place. | 10 * additional optimizations in place. |
11 * | 11 * |
12 * The library is free for all purposes without any express | 12 * The library is free for all purposes without any express |
13 * guarantee it works. | 13 * guarantee it works. |
14 * | 14 * |
15 * Tom St Denis, [email protected], http://math.libtomcrypt.com | 15 * Tom St Denis, [email protected], http://libtom.org |
16 */ | 16 */ |
17 | 17 |
18 /* multiplication using the Toom-Cook 3-way algorithm | 18 /* multiplication using the Toom-Cook 3-way algorithm |
19 * | 19 * |
20 * Much more complicated than Karatsuba but has a lower | 20 * Much more complicated than Karatsuba but has a lower |
21 * asymptotic running time of O(N**1.464). This algorithm is | 21 * asymptotic running time of O(N**1.464). This algorithm is |
22 * only particularly useful on VERY large inputs | 22 * only particularly useful on VERY large inputs |
23 * (we're talking 1000s of digits here...). | 23 * (we're talking 1000s of digits here...). |
24 */ | 24 */ |
25 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) | 25 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) |
26 { | 26 { |
27 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; | 27 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; |
28 int res, B; | 28 int res, B; |
29 | 29 |
30 /* init temps */ | 30 /* init temps */ |
31 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, | 31 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, |
32 &a0, &a1, &a2, &b0, &b1, | 32 &a0, &a1, &a2, &b0, &b1, |
33 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { | 33 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { |
34 return res; | 34 return res; |
35 } | 35 } |
36 | 36 |
37 /* B */ | 37 /* B */ |
38 B = MIN(a->used, b->used) / 3; | 38 B = MIN(a->used, b->used) / 3; |
39 | 39 |
40 /* a = a2 * B**2 + a1 * B + a0 */ | 40 /* a = a2 * B**2 + a1 * B + a0 */ |
41 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { | 41 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { |
42 goto ERR; | 42 goto ERR; |
43 } | 43 } |
44 | 44 |
45 if ((res = mp_copy(a, &a1)) != MP_OKAY) { | 45 if ((res = mp_copy(a, &a1)) != MP_OKAY) { |
46 goto ERR; | 46 goto ERR; |
47 } | 47 } |
48 mp_rshd(&a1, B); | 48 mp_rshd(&a1, B); |
49 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); | 49 if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) { |
50 goto ERR; | |
51 } | |
50 | 52 |
51 if ((res = mp_copy(a, &a2)) != MP_OKAY) { | 53 if ((res = mp_copy(a, &a2)) != MP_OKAY) { |
52 goto ERR; | 54 goto ERR; |
53 } | 55 } |
54 mp_rshd(&a2, B*2); | 56 mp_rshd(&a2, B*2); |
55 | 57 |
56 /* b = b2 * B**2 + b1 * B + b0 */ | 58 /* b = b2 * B**2 + b1 * B + b0 */ |
57 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { | 59 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { |
58 goto ERR; | 60 goto ERR; |
59 } | 61 } |
60 | 62 |
61 if ((res = mp_copy(b, &b1)) != MP_OKAY) { | 63 if ((res = mp_copy(b, &b1)) != MP_OKAY) { |
62 goto ERR; | 64 goto ERR; |
63 } | 65 } |
64 mp_rshd(&b1, B); | 66 mp_rshd(&b1, B); |
65 mp_mod_2d(&b1, DIGIT_BIT * B, &b1); | 67 (void)mp_mod_2d(&b1, DIGIT_BIT * B, &b1); |
66 | 68 |
67 if ((res = mp_copy(b, &b2)) != MP_OKAY) { | 69 if ((res = mp_copy(b, &b2)) != MP_OKAY) { |
68 goto ERR; | 70 goto ERR; |
69 } | 71 } |
70 mp_rshd(&b2, B*2); | 72 mp_rshd(&b2, B*2); |
71 | 73 |
72 /* w0 = a0*b0 */ | 74 /* w0 = a0*b0 */ |
73 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { | 75 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { |
74 goto ERR; | 76 goto ERR; |
75 } | 77 } |
76 | 78 |
77 /* w4 = a2 * b2 */ | 79 /* w4 = a2 * b2 */ |
78 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { | 80 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { |
79 goto ERR; | 81 goto ERR; |
80 } | 82 } |
81 | 83 |
82 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ | 84 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ |
83 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { | 85 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { |
84 goto ERR; | 86 goto ERR; |
85 } | 87 } |
86 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { | 88 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { |
90 goto ERR; | 92 goto ERR; |
91 } | 93 } |
92 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { | 94 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { |
93 goto ERR; | 95 goto ERR; |
94 } | 96 } |
95 | 97 |
96 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { | 98 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { |
97 goto ERR; | 99 goto ERR; |
98 } | 100 } |
99 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { | 101 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { |
100 goto ERR; | 102 goto ERR; |
103 goto ERR; | 105 goto ERR; |
104 } | 106 } |
105 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { | 107 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { |
106 goto ERR; | 108 goto ERR; |
107 } | 109 } |
108 | 110 |
109 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { | 111 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { |
110 goto ERR; | 112 goto ERR; |
111 } | 113 } |
112 | 114 |
113 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ | 115 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ |
114 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { | 116 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { |
115 goto ERR; | 117 goto ERR; |
116 } | 118 } |
117 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { | 119 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { |
121 goto ERR; | 123 goto ERR; |
122 } | 124 } |
123 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { | 125 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { |
124 goto ERR; | 126 goto ERR; |
125 } | 127 } |
126 | 128 |
127 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { | 129 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { |
128 goto ERR; | 130 goto ERR; |
129 } | 131 } |
130 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { | 132 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { |
131 goto ERR; | 133 goto ERR; |
134 goto ERR; | 136 goto ERR; |
135 } | 137 } |
136 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { | 138 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { |
137 goto ERR; | 139 goto ERR; |
138 } | 140 } |
139 | 141 |
140 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { | 142 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { |
141 goto ERR; | 143 goto ERR; |
142 } | 144 } |
143 | 145 |
144 | 146 |
145 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ | 147 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ |
146 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { | 148 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { |
147 goto ERR; | 149 goto ERR; |
148 } | 150 } |
156 goto ERR; | 158 goto ERR; |
157 } | 159 } |
158 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { | 160 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { |
159 goto ERR; | 161 goto ERR; |
160 } | 162 } |
161 | 163 |
162 /* now solve the matrix | 164 /* now solve the matrix |
163 | 165 |
164 0 0 0 0 1 | 166 0 0 0 0 1 |
165 1 2 4 8 16 | 167 1 2 4 8 16 |
166 1 1 1 1 1 | 168 1 1 1 1 1 |
167 16 8 4 2 1 | 169 16 8 4 2 1 |
168 1 0 0 0 0 | 170 1 0 0 0 0 |
169 | 171 |
170 using 12 subtractions, 4 shifts, | 172 using 12 subtractions, 4 shifts, |
171 2 small divisions and 1 small multiplication | 173 2 small divisions and 1 small multiplication |
172 */ | 174 */ |
173 | 175 |
174 /* r1 - r4 */ | 176 /* r1 - r4 */ |
175 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { | 177 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { |
176 goto ERR; | 178 goto ERR; |
177 } | 179 } |
178 /* r3 - r0 */ | 180 /* r3 - r0 */ |
179 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { | 181 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { |
180 goto ERR; | 182 goto ERR; |
181 } | 183 } |
182 /* r1/2 */ | 184 /* r1/2 */ |
183 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { | 185 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { |
184 goto ERR; | 186 goto ERR; |
185 } | 187 } |
186 /* r3/2 */ | 188 /* r3/2 */ |
187 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { | 189 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { |
188 goto ERR; | 190 goto ERR; |
189 } | 191 } |
190 /* r2 - r0 - r4 */ | 192 /* r2 - r0 - r4 */ |
191 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { | 193 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { |
192 goto ERR; | 194 goto ERR; |
193 } | 195 } |
194 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { | 196 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { |
195 goto ERR; | 197 goto ERR; |
196 } | 198 } |
197 /* r1 - r2 */ | 199 /* r1 - r2 */ |
198 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { | 200 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { |
199 goto ERR; | 201 goto ERR; |
200 } | 202 } |
201 /* r3 - r2 */ | 203 /* r3 - r2 */ |
202 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { | 204 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { |
203 goto ERR; | 205 goto ERR; |
204 } | 206 } |
205 /* r1 - 8r0 */ | 207 /* r1 - 8r0 */ |
206 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { | 208 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { |
207 goto ERR; | 209 goto ERR; |
208 } | 210 } |
209 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { | 211 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { |
210 goto ERR; | 212 goto ERR; |
211 } | 213 } |
212 /* r3 - 8r4 */ | 214 /* r3 - 8r4 */ |
213 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { | 215 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { |
214 goto ERR; | 216 goto ERR; |
215 } | 217 } |
216 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { | 218 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { |
217 goto ERR; | 219 goto ERR; |
218 } | 220 } |
219 /* 3r2 - r1 - r3 */ | 221 /* 3r2 - r1 - r3 */ |
220 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { | 222 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { |
221 goto ERR; | 223 goto ERR; |
222 } | 224 } |
223 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { | 225 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { |
224 goto ERR; | 226 goto ERR; |
225 } | 227 } |
226 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { | 228 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { |
227 goto ERR; | 229 goto ERR; |
228 } | 230 } |
229 /* r1 - r2 */ | 231 /* r1 - r2 */ |
230 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { | 232 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { |
231 goto ERR; | 233 goto ERR; |
232 } | 234 } |
233 /* r3 - r2 */ | 235 /* r3 - r2 */ |
234 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { | 236 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { |
235 goto ERR; | 237 goto ERR; |
236 } | 238 } |
237 /* r1/3 */ | 239 /* r1/3 */ |
238 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { | 240 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { |
239 goto ERR; | 241 goto ERR; |
240 } | 242 } |
241 /* r3/3 */ | 243 /* r3/3 */ |
242 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { | 244 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { |
243 goto ERR; | 245 goto ERR; |
244 } | 246 } |
245 | 247 |
246 /* at this point shift W[n] by B*n */ | 248 /* at this point shift W[n] by B*n */ |
247 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { | 249 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { |
248 goto ERR; | 250 goto ERR; |
249 } | 251 } |
250 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { | 252 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { |
251 goto ERR; | 253 goto ERR; |
252 } | 254 } |
253 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { | 255 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { |
254 goto ERR; | 256 goto ERR; |
255 } | 257 } |
256 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { | 258 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { |
257 goto ERR; | 259 goto ERR; |
258 } | 260 } |
259 | 261 |
260 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { | 262 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { |
261 goto ERR; | 263 goto ERR; |
262 } | 264 } |
263 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { | 265 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { |
264 goto ERR; | 266 goto ERR; |
265 } | 267 } |
266 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { | 268 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { |
267 goto ERR; | 269 goto ERR; |
268 } | 270 } |
269 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { | 271 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { |
270 goto ERR; | 272 goto ERR; |
271 } | 273 } |
272 | 274 |
273 ERR: | 275 ERR: |
274 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, | 276 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, |
275 &a0, &a1, &a2, &b0, &b1, | 277 &a0, &a1, &a2, &b0, &b1, |
276 &b2, &tmp1, &tmp2, NULL); | 278 &b2, &tmp1, &tmp2, NULL); |
277 return res; | 279 return res; |
278 } | 280 } |
279 | 281 |
280 #endif | 282 #endif |
281 | 283 |
282 /* $Source: /cvs/libtom/libtommath/bn_mp_toom_mul.c,v $ */ | 284 /* $Source$ */ |
283 /* $Revision: 1.3 $ */ | 285 /* $Revision$ */ |
284 /* $Date: 2006/03/31 14:18:44 $ */ | 286 /* $Date$ */ |