comparison libtommath/bn_mp_toom_mul.c @ 1655:f52919ffd3b1

update ltm to 1.1.0 and enable FIPS 186.4 compliant key-generation (#79) * make key-generation compliant to FIPS 186.4 * fix includes in tommath_class.h * update fuzzcorpus instead of error-out * fixup fuzzing make-targets * update Makefile.in * apply necessary patches to ltm sources * clean-up not required ltm files * update to vanilla ltm 1.1.0 this already only contains the required files * remove set/get double
author Steffen Jaeckel <s_jaeckel@gmx.de>
date Mon, 16 Sep 2019 15:50:38 +0200
parents 8bba51a55704
children
comparison
equal deleted inserted replaced
1654:cc0fc5131c5c 1655:f52919ffd3b1
1 #include <tommath_private.h> 1 #include "tommath_private.h"
2 #ifdef BN_MP_TOOM_MUL_C 2 #ifdef BN_MP_TOOM_MUL_C
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis 3 /* LibTomMath, multiple-precision integer library -- Tom St Denis
4 * 4 *
5 * LibTomMath is a library that provides multiple-precision 5 * LibTomMath is a library that provides multiple-precision
6 * integer arithmetic as well as number theoretic functionality. 6 * integer arithmetic as well as number theoretic functionality.
7 * 7 *
8 * The library was designed directly after the MPI library by 8 * The library was designed directly after the MPI library by
9 * Michael Fromberger but has been written from scratch with 9 * Michael Fromberger but has been written from scratch with
10 * additional optimizations in place. 10 * additional optimizations in place.
11 * 11 *
12 * The library is free for all purposes without any express 12 * SPDX-License-Identifier: Unlicense
13 * guarantee it works.
14 *
15 * Tom St Denis, [email protected], http://libtom.org
16 */ 13 */
17 14
18 /* multiplication using the Toom-Cook 3-way algorithm 15 /* multiplication using the Toom-Cook 3-way algorithm
19 * 16 *
20 * Much more complicated than Karatsuba but has a lower 17 * Much more complicated than Karatsuba but has a lower
21 * asymptotic running time of O(N**1.464). This algorithm is 18 * asymptotic running time of O(N**1.464). This algorithm is
22 * only particularly useful on VERY large inputs 19 * only particularly useful on VERY large inputs
23 * (we're talking 1000s of digits here...). 20 * (we're talking 1000s of digits here...).
24 */ 21 */
25 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) 22 int mp_toom_mul(const mp_int *a, const mp_int *b, mp_int *c)
26 { 23 {
27 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; 24 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
28 int res, B; 25 int res, B;
29 26
30 /* init temps */ 27 /* init temps */
31 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 28 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
32 &a0, &a1, &a2, &b0, &b1, 29 &a0, &a1, &a2, &b0, &b1,
33 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { 30 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
34 return res; 31 return res;
35 } 32 }
36 33
37 /* B */ 34 /* B */
38 B = MIN(a->used, b->used) / 3; 35 B = MIN(a->used, b->used) / 3;
39 36
40 /* a = a2 * B**2 + a1 * B + a0 */ 37 /* a = a2 * B**2 + a1 * B + a0 */
41 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 38 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
42 goto ERR; 39 goto LBL_ERR;
43 } 40 }
44 41
45 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 42 if ((res = mp_copy(a, &a1)) != MP_OKAY) {
46 goto ERR; 43 goto LBL_ERR;
47 } 44 }
48 mp_rshd(&a1, B); 45 mp_rshd(&a1, B);
49 if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) { 46 if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) {
50 goto ERR; 47 goto LBL_ERR;
51 } 48 }
52 49
53 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 50 if ((res = mp_copy(a, &a2)) != MP_OKAY) {
54 goto ERR; 51 goto LBL_ERR;
55 } 52 }
56 mp_rshd(&a2, B*2); 53 mp_rshd(&a2, B*2);
57 54
58 /* b = b2 * B**2 + b1 * B + b0 */ 55 /* b = b2 * B**2 + b1 * B + b0 */
59 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { 56 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
60 goto ERR; 57 goto LBL_ERR;
61 } 58 }
62 59
63 if ((res = mp_copy(b, &b1)) != MP_OKAY) { 60 if ((res = mp_copy(b, &b1)) != MP_OKAY) {
64 goto ERR; 61 goto LBL_ERR;
65 } 62 }
66 mp_rshd(&b1, B); 63 mp_rshd(&b1, B);
67 (void)mp_mod_2d(&b1, DIGIT_BIT * B, &b1); 64 (void)mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
68 65
69 if ((res = mp_copy(b, &b2)) != MP_OKAY) { 66 if ((res = mp_copy(b, &b2)) != MP_OKAY) {
70 goto ERR; 67 goto LBL_ERR;
71 } 68 }
72 mp_rshd(&b2, B*2); 69 mp_rshd(&b2, B*2);
73 70
74 /* w0 = a0*b0 */ 71 /* w0 = a0*b0 */
75 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { 72 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
76 goto ERR; 73 goto LBL_ERR;
77 } 74 }
78 75
79 /* w4 = a2 * b2 */ 76 /* w4 = a2 * b2 */
80 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { 77 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
81 goto ERR; 78 goto LBL_ERR;
82 } 79 }
83 80
84 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ 81 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
85 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { 82 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
86 goto ERR; 83 goto LBL_ERR;
87 } 84 }
88 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 85 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
89 goto ERR; 86 goto LBL_ERR;
90 } 87 }
91 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 88 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
92 goto ERR; 89 goto LBL_ERR;
93 } 90 }
94 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 91 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
95 goto ERR; 92 goto LBL_ERR;
96 } 93 }
97 94
98 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { 95 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
99 goto ERR; 96 goto LBL_ERR;
100 } 97 }
101 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 98 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
102 goto ERR; 99 goto LBL_ERR;
103 } 100 }
104 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { 101 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
105 goto ERR; 102 goto LBL_ERR;
106 } 103 }
107 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { 104 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
108 goto ERR; 105 goto LBL_ERR;
109 } 106 }
110 107
111 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { 108 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
112 goto ERR; 109 goto LBL_ERR;
113 } 110 }
114 111
115 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ 112 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
116 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { 113 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
117 goto ERR; 114 goto LBL_ERR;
118 } 115 }
119 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 116 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
120 goto ERR; 117 goto LBL_ERR;
121 } 118 }
122 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 119 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
123 goto ERR; 120 goto LBL_ERR;
124 } 121 }
125 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 122 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
126 goto ERR; 123 goto LBL_ERR;
127 } 124 }
128 125
129 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { 126 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
130 goto ERR; 127 goto LBL_ERR;
131 } 128 }
132 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 129 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
133 goto ERR; 130 goto LBL_ERR;
134 } 131 }
135 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { 132 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
136 goto ERR; 133 goto LBL_ERR;
137 } 134 }
138 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 135 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
139 goto ERR; 136 goto LBL_ERR;
140 } 137 }
141 138
142 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { 139 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
143 goto ERR; 140 goto LBL_ERR;
144 } 141 }
145 142
146 143
147 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ 144 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
148 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { 145 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
149 goto ERR; 146 goto LBL_ERR;
150 } 147 }
151 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 148 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
152 goto ERR; 149 goto LBL_ERR;
153 } 150 }
154 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) { 151 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
155 goto ERR; 152 goto LBL_ERR;
156 } 153 }
157 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 154 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
158 goto ERR; 155 goto LBL_ERR;
159 } 156 }
160 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { 157 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
161 goto ERR; 158 goto LBL_ERR;
162 } 159 }
163 160
164 /* now solve the matrix 161 /* now solve the matrix
165 162
166 0 0 0 0 1 163 0 0 0 0 1
167 1 2 4 8 16 164 1 2 4 8 16
168 1 1 1 1 1 165 1 1 1 1 1
169 16 8 4 2 1 166 16 8 4 2 1
170 1 0 0 0 0 167 1 0 0 0 0
171 168
172 using 12 subtractions, 4 shifts, 169 using 12 subtractions, 4 shifts,
173 2 small divisions and 1 small multiplication 170 2 small divisions and 1 small multiplication
174 */ 171 */
175 172
176 /* r1 - r4 */ 173 /* r1 - r4 */
177 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { 174 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
178 goto ERR; 175 goto LBL_ERR;
179 } 176 }
180 /* r3 - r0 */ 177 /* r3 - r0 */
181 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { 178 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
182 goto ERR; 179 goto LBL_ERR;
183 } 180 }
184 /* r1/2 */ 181 /* r1/2 */
185 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { 182 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
186 goto ERR; 183 goto LBL_ERR;
187 } 184 }
188 /* r3/2 */ 185 /* r3/2 */
189 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { 186 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
190 goto ERR; 187 goto LBL_ERR;
191 } 188 }
192 /* r2 - r0 - r4 */ 189 /* r2 - r0 - r4 */
193 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { 190 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
194 goto ERR; 191 goto LBL_ERR;
195 } 192 }
196 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { 193 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
197 goto ERR; 194 goto LBL_ERR;
198 } 195 }
199 /* r1 - r2 */ 196 /* r1 - r2 */
200 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 197 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
201 goto ERR; 198 goto LBL_ERR;
202 } 199 }
203 /* r3 - r2 */ 200 /* r3 - r2 */
204 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 201 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
205 goto ERR; 202 goto LBL_ERR;
206 } 203 }
207 /* r1 - 8r0 */ 204 /* r1 - 8r0 */
208 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { 205 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
209 goto ERR; 206 goto LBL_ERR;
210 } 207 }
211 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { 208 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
212 goto ERR; 209 goto LBL_ERR;
213 } 210 }
214 /* r3 - 8r4 */ 211 /* r3 - 8r4 */
215 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { 212 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
216 goto ERR; 213 goto LBL_ERR;
217 } 214 }
218 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { 215 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
219 goto ERR; 216 goto LBL_ERR;
220 } 217 }
221 /* 3r2 - r1 - r3 */ 218 /* 3r2 - r1 - r3 */
222 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { 219 if ((res = mp_mul_d(&w2, 3uL, &w2)) != MP_OKAY) {
223 goto ERR; 220 goto LBL_ERR;
224 } 221 }
225 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { 222 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
226 goto ERR; 223 goto LBL_ERR;
227 } 224 }
228 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { 225 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
229 goto ERR; 226 goto LBL_ERR;
230 } 227 }
231 /* r1 - r2 */ 228 /* r1 - r2 */
232 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 229 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
233 goto ERR; 230 goto LBL_ERR;
234 } 231 }
235 /* r3 - r2 */ 232 /* r3 - r2 */
236 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 233 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
237 goto ERR; 234 goto LBL_ERR;
238 } 235 }
239 /* r1/3 */ 236 /* r1/3 */
240 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { 237 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
241 goto ERR; 238 goto LBL_ERR;
242 } 239 }
243 /* r3/3 */ 240 /* r3/3 */
244 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { 241 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
245 goto ERR; 242 goto LBL_ERR;
246 } 243 }
247 244
248 /* at this point shift W[n] by B*n */ 245 /* at this point shift W[n] by B*n */
249 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { 246 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
250 goto ERR; 247 goto LBL_ERR;
251 } 248 }
252 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { 249 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
253 goto ERR; 250 goto LBL_ERR;
254 } 251 }
255 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { 252 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
256 goto ERR; 253 goto LBL_ERR;
257 } 254 }
258 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { 255 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
259 goto ERR; 256 goto LBL_ERR;
260 } 257 }
261 258
262 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { 259 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
263 goto ERR; 260 goto LBL_ERR;
264 } 261 }
265 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { 262 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
266 goto ERR; 263 goto LBL_ERR;
267 } 264 }
268 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 265 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
269 goto ERR; 266 goto LBL_ERR;
270 } 267 }
271 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { 268 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
272 goto ERR; 269 goto LBL_ERR;
273 } 270 }
274 271
275 ERR: 272 LBL_ERR:
276 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 273 mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
277 &a0, &a1, &a2, &b0, &b1, 274 &a0, &a1, &a2, &b0, &b1,
278 &b2, &tmp1, &tmp2, NULL); 275 &b2, &tmp1, &tmp2, NULL);
279 return res; 276 return res;
280 } 277 }
281 278
282 #endif 279 #endif
283 280
284 /* ref: $Format:%D$ */ 281 /* ref: HEAD -> master, tag: v1.1.0 */
285 /* git commit: $Format:%H$ */ 282 /* git commit: 08549ad6bc8b0cede0b357a9c341c5c6473a9c55 */
286 /* commit time: $Format:%ai$ */ 283 /* commit time: 2019-01-28 20:32:32 +0100 */