Mercurial > dropbear
comparison libtommath/bn_mp_toom_mul.c @ 1655:f52919ffd3b1
update ltm to 1.1.0 and enable FIPS 186.4 compliant key-generation (#79)
* make key-generation compliant to FIPS 186.4
* fix includes in tommath_class.h
* update fuzzcorpus instead of error-out
* fixup fuzzing make-targets
* update Makefile.in
* apply necessary patches to ltm sources
* clean-up not required ltm files
* update to vanilla ltm 1.1.0
this already only contains the required files
* remove set/get double
author | Steffen Jaeckel <s_jaeckel@gmx.de> |
---|---|
date | Mon, 16 Sep 2019 15:50:38 +0200 |
parents | 8bba51a55704 |
children |
comparison
equal
deleted
inserted
replaced
1654:cc0fc5131c5c | 1655:f52919ffd3b1 |
---|---|
1 #include <tommath_private.h> | 1 #include "tommath_private.h" |
2 #ifdef BN_MP_TOOM_MUL_C | 2 #ifdef BN_MP_TOOM_MUL_C |
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis | 3 /* LibTomMath, multiple-precision integer library -- Tom St Denis |
4 * | 4 * |
5 * LibTomMath is a library that provides multiple-precision | 5 * LibTomMath is a library that provides multiple-precision |
6 * integer arithmetic as well as number theoretic functionality. | 6 * integer arithmetic as well as number theoretic functionality. |
7 * | 7 * |
8 * The library was designed directly after the MPI library by | 8 * The library was designed directly after the MPI library by |
9 * Michael Fromberger but has been written from scratch with | 9 * Michael Fromberger but has been written from scratch with |
10 * additional optimizations in place. | 10 * additional optimizations in place. |
11 * | 11 * |
12 * The library is free for all purposes without any express | 12 * SPDX-License-Identifier: Unlicense |
13 * guarantee it works. | |
14 * | |
15 * Tom St Denis, [email protected], http://libtom.org | |
16 */ | 13 */ |
17 | 14 |
18 /* multiplication using the Toom-Cook 3-way algorithm | 15 /* multiplication using the Toom-Cook 3-way algorithm |
19 * | 16 * |
20 * Much more complicated than Karatsuba but has a lower | 17 * Much more complicated than Karatsuba but has a lower |
21 * asymptotic running time of O(N**1.464). This algorithm is | 18 * asymptotic running time of O(N**1.464). This algorithm is |
22 * only particularly useful on VERY large inputs | 19 * only particularly useful on VERY large inputs |
23 * (we're talking 1000s of digits here...). | 20 * (we're talking 1000s of digits here...). |
24 */ | 21 */ |
25 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) | 22 int mp_toom_mul(const mp_int *a, const mp_int *b, mp_int *c) |
26 { | 23 { |
27 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; | 24 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; |
28 int res, B; | 25 int res, B; |
29 | 26 |
30 /* init temps */ | 27 /* init temps */ |
31 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, | 28 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, |
32 &a0, &a1, &a2, &b0, &b1, | 29 &a0, &a1, &a2, &b0, &b1, |
33 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { | 30 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { |
34 return res; | 31 return res; |
35 } | 32 } |
36 | 33 |
37 /* B */ | 34 /* B */ |
38 B = MIN(a->used, b->used) / 3; | 35 B = MIN(a->used, b->used) / 3; |
39 | 36 |
40 /* a = a2 * B**2 + a1 * B + a0 */ | 37 /* a = a2 * B**2 + a1 * B + a0 */ |
41 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { | 38 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { |
42 goto ERR; | 39 goto LBL_ERR; |
43 } | 40 } |
44 | 41 |
45 if ((res = mp_copy(a, &a1)) != MP_OKAY) { | 42 if ((res = mp_copy(a, &a1)) != MP_OKAY) { |
46 goto ERR; | 43 goto LBL_ERR; |
47 } | 44 } |
48 mp_rshd(&a1, B); | 45 mp_rshd(&a1, B); |
49 if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) { | 46 if ((res = mp_mod_2d(&a1, DIGIT_BIT * B, &a1)) != MP_OKAY) { |
50 goto ERR; | 47 goto LBL_ERR; |
51 } | 48 } |
52 | 49 |
53 if ((res = mp_copy(a, &a2)) != MP_OKAY) { | 50 if ((res = mp_copy(a, &a2)) != MP_OKAY) { |
54 goto ERR; | 51 goto LBL_ERR; |
55 } | 52 } |
56 mp_rshd(&a2, B*2); | 53 mp_rshd(&a2, B*2); |
57 | 54 |
58 /* b = b2 * B**2 + b1 * B + b0 */ | 55 /* b = b2 * B**2 + b1 * B + b0 */ |
59 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { | 56 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { |
60 goto ERR; | 57 goto LBL_ERR; |
61 } | 58 } |
62 | 59 |
63 if ((res = mp_copy(b, &b1)) != MP_OKAY) { | 60 if ((res = mp_copy(b, &b1)) != MP_OKAY) { |
64 goto ERR; | 61 goto LBL_ERR; |
65 } | 62 } |
66 mp_rshd(&b1, B); | 63 mp_rshd(&b1, B); |
67 (void)mp_mod_2d(&b1, DIGIT_BIT * B, &b1); | 64 (void)mp_mod_2d(&b1, DIGIT_BIT * B, &b1); |
68 | 65 |
69 if ((res = mp_copy(b, &b2)) != MP_OKAY) { | 66 if ((res = mp_copy(b, &b2)) != MP_OKAY) { |
70 goto ERR; | 67 goto LBL_ERR; |
71 } | 68 } |
72 mp_rshd(&b2, B*2); | 69 mp_rshd(&b2, B*2); |
73 | 70 |
74 /* w0 = a0*b0 */ | 71 /* w0 = a0*b0 */ |
75 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { | 72 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { |
76 goto ERR; | 73 goto LBL_ERR; |
77 } | 74 } |
78 | 75 |
79 /* w4 = a2 * b2 */ | 76 /* w4 = a2 * b2 */ |
80 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { | 77 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { |
81 goto ERR; | 78 goto LBL_ERR; |
82 } | 79 } |
83 | 80 |
84 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ | 81 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ |
85 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { | 82 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { |
86 goto ERR; | 83 goto LBL_ERR; |
87 } | 84 } |
88 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { | 85 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { |
89 goto ERR; | 86 goto LBL_ERR; |
90 } | 87 } |
91 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { | 88 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { |
92 goto ERR; | 89 goto LBL_ERR; |
93 } | 90 } |
94 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { | 91 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { |
95 goto ERR; | 92 goto LBL_ERR; |
96 } | 93 } |
97 | 94 |
98 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { | 95 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { |
99 goto ERR; | 96 goto LBL_ERR; |
100 } | 97 } |
101 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { | 98 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { |
102 goto ERR; | 99 goto LBL_ERR; |
103 } | 100 } |
104 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { | 101 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { |
105 goto ERR; | 102 goto LBL_ERR; |
106 } | 103 } |
107 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { | 104 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { |
108 goto ERR; | 105 goto LBL_ERR; |
109 } | 106 } |
110 | 107 |
111 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { | 108 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { |
112 goto ERR; | 109 goto LBL_ERR; |
113 } | 110 } |
114 | 111 |
115 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ | 112 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ |
116 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { | 113 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { |
117 goto ERR; | 114 goto LBL_ERR; |
118 } | 115 } |
119 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { | 116 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { |
120 goto ERR; | 117 goto LBL_ERR; |
121 } | 118 } |
122 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { | 119 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { |
123 goto ERR; | 120 goto LBL_ERR; |
124 } | 121 } |
125 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { | 122 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { |
126 goto ERR; | 123 goto LBL_ERR; |
127 } | 124 } |
128 | 125 |
129 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { | 126 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { |
130 goto ERR; | 127 goto LBL_ERR; |
131 } | 128 } |
132 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { | 129 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { |
133 goto ERR; | 130 goto LBL_ERR; |
134 } | 131 } |
135 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { | 132 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { |
136 goto ERR; | 133 goto LBL_ERR; |
137 } | 134 } |
138 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { | 135 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { |
139 goto ERR; | 136 goto LBL_ERR; |
140 } | 137 } |
141 | 138 |
142 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { | 139 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { |
143 goto ERR; | 140 goto LBL_ERR; |
144 } | 141 } |
145 | 142 |
146 | 143 |
147 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ | 144 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ |
148 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { | 145 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { |
149 goto ERR; | 146 goto LBL_ERR; |
150 } | 147 } |
151 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { | 148 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { |
152 goto ERR; | 149 goto LBL_ERR; |
153 } | 150 } |
154 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) { | 151 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) { |
155 goto ERR; | 152 goto LBL_ERR; |
156 } | 153 } |
157 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { | 154 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { |
158 goto ERR; | 155 goto LBL_ERR; |
159 } | 156 } |
160 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { | 157 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { |
161 goto ERR; | 158 goto LBL_ERR; |
162 } | 159 } |
163 | 160 |
164 /* now solve the matrix | 161 /* now solve the matrix |
165 | 162 |
166 0 0 0 0 1 | 163 0 0 0 0 1 |
167 1 2 4 8 16 | 164 1 2 4 8 16 |
168 1 1 1 1 1 | 165 1 1 1 1 1 |
169 16 8 4 2 1 | 166 16 8 4 2 1 |
170 1 0 0 0 0 | 167 1 0 0 0 0 |
171 | 168 |
172 using 12 subtractions, 4 shifts, | 169 using 12 subtractions, 4 shifts, |
173 2 small divisions and 1 small multiplication | 170 2 small divisions and 1 small multiplication |
174 */ | 171 */ |
175 | 172 |
176 /* r1 - r4 */ | 173 /* r1 - r4 */ |
177 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { | 174 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { |
178 goto ERR; | 175 goto LBL_ERR; |
179 } | 176 } |
180 /* r3 - r0 */ | 177 /* r3 - r0 */ |
181 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { | 178 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { |
182 goto ERR; | 179 goto LBL_ERR; |
183 } | 180 } |
184 /* r1/2 */ | 181 /* r1/2 */ |
185 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { | 182 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { |
186 goto ERR; | 183 goto LBL_ERR; |
187 } | 184 } |
188 /* r3/2 */ | 185 /* r3/2 */ |
189 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { | 186 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { |
190 goto ERR; | 187 goto LBL_ERR; |
191 } | 188 } |
192 /* r2 - r0 - r4 */ | 189 /* r2 - r0 - r4 */ |
193 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { | 190 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { |
194 goto ERR; | 191 goto LBL_ERR; |
195 } | 192 } |
196 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { | 193 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { |
197 goto ERR; | 194 goto LBL_ERR; |
198 } | 195 } |
199 /* r1 - r2 */ | 196 /* r1 - r2 */ |
200 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { | 197 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { |
201 goto ERR; | 198 goto LBL_ERR; |
202 } | 199 } |
203 /* r3 - r2 */ | 200 /* r3 - r2 */ |
204 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { | 201 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { |
205 goto ERR; | 202 goto LBL_ERR; |
206 } | 203 } |
207 /* r1 - 8r0 */ | 204 /* r1 - 8r0 */ |
208 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { | 205 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { |
209 goto ERR; | 206 goto LBL_ERR; |
210 } | 207 } |
211 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { | 208 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { |
212 goto ERR; | 209 goto LBL_ERR; |
213 } | 210 } |
214 /* r3 - 8r4 */ | 211 /* r3 - 8r4 */ |
215 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { | 212 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { |
216 goto ERR; | 213 goto LBL_ERR; |
217 } | 214 } |
218 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { | 215 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { |
219 goto ERR; | 216 goto LBL_ERR; |
220 } | 217 } |
221 /* 3r2 - r1 - r3 */ | 218 /* 3r2 - r1 - r3 */ |
222 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { | 219 if ((res = mp_mul_d(&w2, 3uL, &w2)) != MP_OKAY) { |
223 goto ERR; | 220 goto LBL_ERR; |
224 } | 221 } |
225 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { | 222 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { |
226 goto ERR; | 223 goto LBL_ERR; |
227 } | 224 } |
228 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { | 225 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { |
229 goto ERR; | 226 goto LBL_ERR; |
230 } | 227 } |
231 /* r1 - r2 */ | 228 /* r1 - r2 */ |
232 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { | 229 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { |
233 goto ERR; | 230 goto LBL_ERR; |
234 } | 231 } |
235 /* r3 - r2 */ | 232 /* r3 - r2 */ |
236 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { | 233 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { |
237 goto ERR; | 234 goto LBL_ERR; |
238 } | 235 } |
239 /* r1/3 */ | 236 /* r1/3 */ |
240 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { | 237 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { |
241 goto ERR; | 238 goto LBL_ERR; |
242 } | 239 } |
243 /* r3/3 */ | 240 /* r3/3 */ |
244 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { | 241 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { |
245 goto ERR; | 242 goto LBL_ERR; |
246 } | 243 } |
247 | 244 |
248 /* at this point shift W[n] by B*n */ | 245 /* at this point shift W[n] by B*n */ |
249 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { | 246 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { |
250 goto ERR; | 247 goto LBL_ERR; |
251 } | 248 } |
252 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { | 249 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { |
253 goto ERR; | 250 goto LBL_ERR; |
254 } | 251 } |
255 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { | 252 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { |
256 goto ERR; | 253 goto LBL_ERR; |
257 } | 254 } |
258 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { | 255 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { |
259 goto ERR; | 256 goto LBL_ERR; |
260 } | 257 } |
261 | 258 |
262 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { | 259 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { |
263 goto ERR; | 260 goto LBL_ERR; |
264 } | 261 } |
265 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { | 262 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { |
266 goto ERR; | 263 goto LBL_ERR; |
267 } | 264 } |
268 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { | 265 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { |
269 goto ERR; | 266 goto LBL_ERR; |
270 } | 267 } |
271 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { | 268 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { |
272 goto ERR; | 269 goto LBL_ERR; |
273 } | 270 } |
274 | 271 |
275 ERR: | 272 LBL_ERR: |
276 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, | 273 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, |
277 &a0, &a1, &a2, &b0, &b1, | 274 &a0, &a1, &a2, &b0, &b1, |
278 &b2, &tmp1, &tmp2, NULL); | 275 &b2, &tmp1, &tmp2, NULL); |
279 return res; | 276 return res; |
280 } | 277 } |
281 | 278 |
282 #endif | 279 #endif |
283 | 280 |
284 /* ref: $Format:%D$ */ | 281 /* ref: HEAD -> master, tag: v1.1.0 */ |
285 /* git commit: $Format:%H$ */ | 282 /* git commit: 08549ad6bc8b0cede0b357a9c341c5c6473a9c55 */ |
286 /* commit time: $Format:%ai$ */ | 283 /* commit time: 2019-01-28 20:32:32 +0100 */ |