comparison libtommath/bn_s_mp_karatsuba_mul.c @ 1692:1051e4eea25a

Update LibTomMath to 1.2.0 (#84) * update C files * update other files * update headers * update makefiles * remove mp_set/get_double() * use ltm 1.2.0 API * update ltm_desc * use bundled tommath if system-tommath is too old * XMALLOC etc. were changed to MP_MALLOC etc.
author Steffen Jaeckel <s@jaeckel.eu>
date Tue, 26 May 2020 17:36:47 +0200
parents
children
comparison
equal deleted inserted replaced
1691:2d3745d58843 1692:1051e4eea25a
1 #include "tommath_private.h"
2 #ifdef BN_S_MP_KARATSUBA_MUL_C
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
4 /* SPDX-License-Identifier: Unlicense */
5
6 /* c = |a| * |b| using Karatsuba Multiplication using
7 * three half size multiplications
8 *
9 * Let B represent the radix [e.g. 2**MP_DIGIT_BIT] and
10 * let n represent half of the number of digits in
11 * the min(a,b)
12 *
13 * a = a1 * B**n + a0
14 * b = b1 * B**n + b0
15 *
16 * Then, a * b =>
17 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
18 *
19 * Note that a1b1 and a0b0 are used twice and only need to be
20 * computed once. So in total three half size (half # of
21 * digit) multiplications are performed, a0b0, a1b1 and
22 * (a1+b1)(a0+b0)
23 *
24 * Note that a multiplication of half the digits requires
25 * 1/4th the number of single precision multiplications so in
26 * total after one call 25% of the single precision multiplications
27 * are saved. Note also that the call to mp_mul can end up back
28 * in this function if the a0, a1, b0, or b1 are above the threshold.
29 * This is known as divide-and-conquer and leads to the famous
30 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
31 * the standard O(N**2) that the baseline/comba methods use.
32 * Generally though the overhead of this method doesn't pay off
33 * until a certain size (N ~ 80) is reached.
34 */
35 mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c)
36 {
37 mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
38 int B;
39 mp_err err = MP_MEM; /* default the return code to an error */
40
41 /* min # of digits */
42 B = MP_MIN(a->used, b->used);
43
44 /* now divide in two */
45 B = B >> 1;
46
47 /* init copy all the temps */
48 if (mp_init_size(&x0, B) != MP_OKAY) {
49 goto LBL_ERR;
50 }
51 if (mp_init_size(&x1, a->used - B) != MP_OKAY) {
52 goto X0;
53 }
54 if (mp_init_size(&y0, B) != MP_OKAY) {
55 goto X1;
56 }
57 if (mp_init_size(&y1, b->used - B) != MP_OKAY) {
58 goto Y0;
59 }
60
61 /* init temps */
62 if (mp_init_size(&t1, B * 2) != MP_OKAY) {
63 goto Y1;
64 }
65 if (mp_init_size(&x0y0, B * 2) != MP_OKAY) {
66 goto T1;
67 }
68 if (mp_init_size(&x1y1, B * 2) != MP_OKAY) {
69 goto X0Y0;
70 }
71
72 /* now shift the digits */
73 x0.used = y0.used = B;
74 x1.used = a->used - B;
75 y1.used = b->used - B;
76
77 {
78 int x;
79 mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
80
81 /* we copy the digits directly instead of using higher level functions
82 * since we also need to shift the digits
83 */
84 tmpa = a->dp;
85 tmpb = b->dp;
86
87 tmpx = x0.dp;
88 tmpy = y0.dp;
89 for (x = 0; x < B; x++) {
90 *tmpx++ = *tmpa++;
91 *tmpy++ = *tmpb++;
92 }
93
94 tmpx = x1.dp;
95 for (x = B; x < a->used; x++) {
96 *tmpx++ = *tmpa++;
97 }
98
99 tmpy = y1.dp;
100 for (x = B; x < b->used; x++) {
101 *tmpy++ = *tmpb++;
102 }
103 }
104
105 /* only need to clamp the lower words since by definition the
106 * upper words x1/y1 must have a known number of digits
107 */
108 mp_clamp(&x0);
109 mp_clamp(&y0);
110
111 /* now calc the products x0y0 and x1y1 */
112 /* after this x0 is no longer required, free temp [x0==t2]! */
113 if (mp_mul(&x0, &y0, &x0y0) != MP_OKAY) {
114 goto X1Y1; /* x0y0 = x0*y0 */
115 }
116 if (mp_mul(&x1, &y1, &x1y1) != MP_OKAY) {
117 goto X1Y1; /* x1y1 = x1*y1 */
118 }
119
120 /* now calc x1+x0 and y1+y0 */
121 if (s_mp_add(&x1, &x0, &t1) != MP_OKAY) {
122 goto X1Y1; /* t1 = x1 - x0 */
123 }
124 if (s_mp_add(&y1, &y0, &x0) != MP_OKAY) {
125 goto X1Y1; /* t2 = y1 - y0 */
126 }
127 if (mp_mul(&t1, &x0, &t1) != MP_OKAY) {
128 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */
129 }
130
131 /* add x0y0 */
132 if (mp_add(&x0y0, &x1y1, &x0) != MP_OKAY) {
133 goto X1Y1; /* t2 = x0y0 + x1y1 */
134 }
135 if (s_mp_sub(&t1, &x0, &t1) != MP_OKAY) {
136 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */
137 }
138
139 /* shift by B */
140 if (mp_lshd(&t1, B) != MP_OKAY) {
141 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
142 }
143 if (mp_lshd(&x1y1, B * 2) != MP_OKAY) {
144 goto X1Y1; /* x1y1 = x1y1 << 2*B */
145 }
146
147 if (mp_add(&x0y0, &t1, &t1) != MP_OKAY) {
148 goto X1Y1; /* t1 = x0y0 + t1 */
149 }
150 if (mp_add(&t1, &x1y1, c) != MP_OKAY) {
151 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
152 }
153
154 /* Algorithm succeeded set the return code to MP_OKAY */
155 err = MP_OKAY;
156
157 X1Y1:
158 mp_clear(&x1y1);
159 X0Y0:
160 mp_clear(&x0y0);
161 T1:
162 mp_clear(&t1);
163 Y1:
164 mp_clear(&y1);
165 Y0:
166 mp_clear(&y0);
167 X1:
168 mp_clear(&x1);
169 X0:
170 mp_clear(&x0);
171 LBL_ERR:
172 return err;
173 }
174 #endif