comparison libtommath/bn_s_mp_toom_mul.c @ 1692:1051e4eea25a

Update LibTomMath to 1.2.0 (#84) * update C files * update other files * update headers * update makefiles * remove mp_set/get_double() * use ltm 1.2.0 API * update ltm_desc * use bundled tommath if system-tommath is too old * XMALLOC etc. were changed to MP_MALLOC etc.
author Steffen Jaeckel <s@jaeckel.eu>
date Tue, 26 May 2020 17:36:47 +0200
parents
children
comparison
equal deleted inserted replaced
1691:2d3745d58843 1692:1051e4eea25a
1 #include "tommath_private.h"
2 #ifdef BN_S_MP_TOOM_MUL_C
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
4 /* SPDX-License-Identifier: Unlicense */
5
6 /* multiplication using the Toom-Cook 3-way algorithm
7 *
8 * Much more complicated than Karatsuba but has a lower
9 * asymptotic running time of O(N**1.464). This algorithm is
10 * only particularly useful on VERY large inputs
11 * (we're talking 1000s of digits here...).
12 */
13
14 /*
15 This file contains code from J. Arndt's book "Matters Computational"
16 and the accompanying FXT-library with permission of the author.
17 */
18
19 /*
20 Setup from
21
22 Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae."
23 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007.
24
25 The interpolation from above needed one temporary variable more
26 than the interpolation here:
27
28 Bodrato, Marco, and Alberto Zanoni. "What about Toom-Cook matrices optimality."
29 Centro Vito Volterra Universita di Roma Tor Vergata (2006)
30 */
31
32 mp_err s_mp_toom_mul(const mp_int *a, const mp_int *b, mp_int *c)
33 {
34 mp_int S1, S2, T1, a0, a1, a2, b0, b1, b2;
35 int B, count;
36 mp_err err;
37
38 /* init temps */
39 if ((err = mp_init_multi(&S1, &S2, &T1, NULL)) != MP_OKAY) {
40 return err;
41 }
42
43 /* B */
44 B = MP_MIN(a->used, b->used) / 3;
45
46 /** a = a2 * x^2 + a1 * x + a0; */
47 if ((err = mp_init_size(&a0, B)) != MP_OKAY) goto LBL_ERRa0;
48
49 for (count = 0; count < B; count++) {
50 a0.dp[count] = a->dp[count];
51 a0.used++;
52 }
53 mp_clamp(&a0);
54 if ((err = mp_init_size(&a1, B)) != MP_OKAY) goto LBL_ERRa1;
55 for (; count < (2 * B); count++) {
56 a1.dp[count - B] = a->dp[count];
57 a1.used++;
58 }
59 mp_clamp(&a1);
60 if ((err = mp_init_size(&a2, B + (a->used - (3 * B)))) != MP_OKAY) goto LBL_ERRa2;
61 for (; count < a->used; count++) {
62 a2.dp[count - (2 * B)] = a->dp[count];
63 a2.used++;
64 }
65 mp_clamp(&a2);
66
67 /** b = b2 * x^2 + b1 * x + b0; */
68 if ((err = mp_init_size(&b0, B)) != MP_OKAY) goto LBL_ERRb0;
69 for (count = 0; count < B; count++) {
70 b0.dp[count] = b->dp[count];
71 b0.used++;
72 }
73 mp_clamp(&b0);
74 if ((err = mp_init_size(&b1, B)) != MP_OKAY) goto LBL_ERRb1;
75 for (; count < (2 * B); count++) {
76 b1.dp[count - B] = b->dp[count];
77 b1.used++;
78 }
79 mp_clamp(&b1);
80 if ((err = mp_init_size(&b2, B + (b->used - (3 * B)))) != MP_OKAY) goto LBL_ERRb2;
81 for (; count < b->used; count++) {
82 b2.dp[count - (2 * B)] = b->dp[count];
83 b2.used++;
84 }
85 mp_clamp(&b2);
86
87 /** \\ S1 = (a2+a1+a0) * (b2+b1+b0); */
88 /** T1 = a2 + a1; */
89 if ((err = mp_add(&a2, &a1, &T1)) != MP_OKAY) goto LBL_ERR;
90
91 /** S2 = T1 + a0; */
92 if ((err = mp_add(&T1, &a0, &S2)) != MP_OKAY) goto LBL_ERR;
93
94 /** c = b2 + b1; */
95 if ((err = mp_add(&b2, &b1, c)) != MP_OKAY) goto LBL_ERR;
96
97 /** S1 = c + b0; */
98 if ((err = mp_add(c, &b0, &S1)) != MP_OKAY) goto LBL_ERR;
99
100 /** S1 = S1 * S2; */
101 if ((err = mp_mul(&S1, &S2, &S1)) != MP_OKAY) goto LBL_ERR;
102
103 /** \\S2 = (4*a2+2*a1+a0) * (4*b2+2*b1+b0); */
104 /** T1 = T1 + a2; */
105 if ((err = mp_add(&T1, &a2, &T1)) != MP_OKAY) goto LBL_ERR;
106
107 /** T1 = T1 << 1; */
108 if ((err = mp_mul_2(&T1, &T1)) != MP_OKAY) goto LBL_ERR;
109
110 /** T1 = T1 + a0; */
111 if ((err = mp_add(&T1, &a0, &T1)) != MP_OKAY) goto LBL_ERR;
112
113 /** c = c + b2; */
114 if ((err = mp_add(c, &b2, c)) != MP_OKAY) goto LBL_ERR;
115
116 /** c = c << 1; */
117 if ((err = mp_mul_2(c, c)) != MP_OKAY) goto LBL_ERR;
118
119 /** c = c + b0; */
120 if ((err = mp_add(c, &b0, c)) != MP_OKAY) goto LBL_ERR;
121
122 /** S2 = T1 * c; */
123 if ((err = mp_mul(&T1, c, &S2)) != MP_OKAY) goto LBL_ERR;
124
125 /** \\S3 = (a2-a1+a0) * (b2-b1+b0); */
126 /** a1 = a2 - a1; */
127 if ((err = mp_sub(&a2, &a1, &a1)) != MP_OKAY) goto LBL_ERR;
128
129 /** a1 = a1 + a0; */
130 if ((err = mp_add(&a1, &a0, &a1)) != MP_OKAY) goto LBL_ERR;
131
132 /** b1 = b2 - b1; */
133 if ((err = mp_sub(&b2, &b1, &b1)) != MP_OKAY) goto LBL_ERR;
134
135 /** b1 = b1 + b0; */
136 if ((err = mp_add(&b1, &b0, &b1)) != MP_OKAY) goto LBL_ERR;
137
138 /** a1 = a1 * b1; */
139 if ((err = mp_mul(&a1, &b1, &a1)) != MP_OKAY) goto LBL_ERR;
140
141 /** b1 = a2 * b2; */
142 if ((err = mp_mul(&a2, &b2, &b1)) != MP_OKAY) goto LBL_ERR;
143
144 /** \\S2 = (S2 - S3)/3; */
145 /** S2 = S2 - a1; */
146 if ((err = mp_sub(&S2, &a1, &S2)) != MP_OKAY) goto LBL_ERR;
147
148 /** S2 = S2 / 3; \\ this is an exact division */
149 if ((err = mp_div_3(&S2, &S2, NULL)) != MP_OKAY) goto LBL_ERR;
150
151 /** a1 = S1 - a1; */
152 if ((err = mp_sub(&S1, &a1, &a1)) != MP_OKAY) goto LBL_ERR;
153
154 /** a1 = a1 >> 1; */
155 if ((err = mp_div_2(&a1, &a1)) != MP_OKAY) goto LBL_ERR;
156
157 /** a0 = a0 * b0; */
158 if ((err = mp_mul(&a0, &b0, &a0)) != MP_OKAY) goto LBL_ERR;
159
160 /** S1 = S1 - a0; */
161 if ((err = mp_sub(&S1, &a0, &S1)) != MP_OKAY) goto LBL_ERR;
162
163 /** S2 = S2 - S1; */
164 if ((err = mp_sub(&S2, &S1, &S2)) != MP_OKAY) goto LBL_ERR;
165
166 /** S2 = S2 >> 1; */
167 if ((err = mp_div_2(&S2, &S2)) != MP_OKAY) goto LBL_ERR;
168
169 /** S1 = S1 - a1; */
170 if ((err = mp_sub(&S1, &a1, &S1)) != MP_OKAY) goto LBL_ERR;
171
172 /** S1 = S1 - b1; */
173 if ((err = mp_sub(&S1, &b1, &S1)) != MP_OKAY) goto LBL_ERR;
174
175 /** T1 = b1 << 1; */
176 if ((err = mp_mul_2(&b1, &T1)) != MP_OKAY) goto LBL_ERR;
177
178 /** S2 = S2 - T1; */
179 if ((err = mp_sub(&S2, &T1, &S2)) != MP_OKAY) goto LBL_ERR;
180
181 /** a1 = a1 - S2; */
182 if ((err = mp_sub(&a1, &S2, &a1)) != MP_OKAY) goto LBL_ERR;
183
184
185 /** P = b1*x^4+ S2*x^3+ S1*x^2+ a1*x + a0; */
186 if ((err = mp_lshd(&b1, 4 * B)) != MP_OKAY) goto LBL_ERR;
187 if ((err = mp_lshd(&S2, 3 * B)) != MP_OKAY) goto LBL_ERR;
188 if ((err = mp_add(&b1, &S2, &b1)) != MP_OKAY) goto LBL_ERR;
189 if ((err = mp_lshd(&S1, 2 * B)) != MP_OKAY) goto LBL_ERR;
190 if ((err = mp_add(&b1, &S1, &b1)) != MP_OKAY) goto LBL_ERR;
191 if ((err = mp_lshd(&a1, 1 * B)) != MP_OKAY) goto LBL_ERR;
192 if ((err = mp_add(&b1, &a1, &b1)) != MP_OKAY) goto LBL_ERR;
193 if ((err = mp_add(&b1, &a0, c)) != MP_OKAY) goto LBL_ERR;
194
195 /** a * b - P */
196
197
198 LBL_ERR:
199 mp_clear(&b2);
200 LBL_ERRb2:
201 mp_clear(&b1);
202 LBL_ERRb1:
203 mp_clear(&b0);
204 LBL_ERRb0:
205 mp_clear(&a2);
206 LBL_ERRa2:
207 mp_clear(&a1);
208 LBL_ERRa1:
209 mp_clear(&a0);
210 LBL_ERRa0:
211 mp_clear_multi(&S1, &S2, &T1, NULL);
212 return err;
213 }
214
215 #endif