Mercurial > dropbear
comparison libtommath/bn_mp_karatsuba_mul.c @ 1655:f52919ffd3b1
update ltm to 1.1.0 and enable FIPS 186.4 compliant key-generation (#79)
* make key-generation compliant to FIPS 186.4
* fix includes in tommath_class.h
* update fuzzcorpus instead of error-out
* fixup fuzzing make-targets
* update Makefile.in
* apply necessary patches to ltm sources
* clean-up not required ltm files
* update to vanilla ltm 1.1.0
this already only contains the required files
* remove set/get double
author | Steffen Jaeckel <s_jaeckel@gmx.de> |
---|---|
date | Mon, 16 Sep 2019 15:50:38 +0200 |
parents | 8bba51a55704 |
children |
comparison
equal
deleted
inserted
replaced
1654:cc0fc5131c5c | 1655:f52919ffd3b1 |
---|---|
1 #include <tommath_private.h> | 1 #include "tommath_private.h" |
2 #ifdef BN_MP_KARATSUBA_MUL_C | 2 #ifdef BN_MP_KARATSUBA_MUL_C |
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis | 3 /* LibTomMath, multiple-precision integer library -- Tom St Denis |
4 * | 4 * |
5 * LibTomMath is a library that provides multiple-precision | 5 * LibTomMath is a library that provides multiple-precision |
6 * integer arithmetic as well as number theoretic functionality. | 6 * integer arithmetic as well as number theoretic functionality. |
7 * | 7 * |
8 * The library was designed directly after the MPI library by | 8 * The library was designed directly after the MPI library by |
9 * Michael Fromberger but has been written from scratch with | 9 * Michael Fromberger but has been written from scratch with |
10 * additional optimizations in place. | 10 * additional optimizations in place. |
11 * | 11 * |
12 * The library is free for all purposes without any express | 12 * SPDX-License-Identifier: Unlicense |
13 * guarantee it works. | |
14 * | |
15 * Tom St Denis, [email protected], http://libtom.org | |
16 */ | 13 */ |
17 | 14 |
18 /* c = |a| * |b| using Karatsuba Multiplication using | 15 /* c = |a| * |b| using Karatsuba Multiplication using |
19 * three half size multiplications | 16 * three half size multiplications |
20 * | 17 * |
21 * Let B represent the radix [e.g. 2**DIGIT_BIT] and | 18 * Let B represent the radix [e.g. 2**DIGIT_BIT] and |
22 * let n represent half of the number of digits in | 19 * let n represent half of the number of digits in |
23 * the min(a,b) | 20 * the min(a,b) |
24 * | 21 * |
25 * a = a1 * B**n + a0 | 22 * a = a1 * B**n + a0 |
26 * b = b1 * B**n + b0 | 23 * b = b1 * B**n + b0 |
27 * | 24 * |
28 * Then, a * b => | 25 * Then, a * b => |
29 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 | 26 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 |
30 * | 27 * |
31 * Note that a1b1 and a0b0 are used twice and only need to be | 28 * Note that a1b1 and a0b0 are used twice and only need to be |
32 * computed once. So in total three half size (half # of | 29 * computed once. So in total three half size (half # of |
33 * digit) multiplications are performed, a0b0, a1b1 and | 30 * digit) multiplications are performed, a0b0, a1b1 and |
34 * (a1+b1)(a0+b0) | 31 * (a1+b1)(a0+b0) |
35 * | 32 * |
36 * Note that a multiplication of half the digits requires | 33 * Note that a multiplication of half the digits requires |
37 * 1/4th the number of single precision multiplications so in | 34 * 1/4th the number of single precision multiplications so in |
38 * total after one call 25% of the single precision multiplications | 35 * total after one call 25% of the single precision multiplications |
39 * are saved. Note also that the call to mp_mul can end up back | 36 * are saved. Note also that the call to mp_mul can end up back |
40 * in this function if the a0, a1, b0, or b1 are above the threshold. | 37 * in this function if the a0, a1, b0, or b1 are above the threshold. |
41 * This is known as divide-and-conquer and leads to the famous | 38 * This is known as divide-and-conquer and leads to the famous |
42 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than | 39 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than |
43 * the standard O(N**2) that the baseline/comba methods use. | 40 * the standard O(N**2) that the baseline/comba methods use. |
44 * Generally though the overhead of this method doesn't pay off | 41 * Generally though the overhead of this method doesn't pay off |
45 * until a certain size (N ~ 80) is reached. | 42 * until a certain size (N ~ 80) is reached. |
46 */ | 43 */ |
47 int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) | 44 int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) |
48 { | 45 { |
49 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; | 46 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; |
50 int B, err; | 47 int B, err; |
51 | 48 |
52 /* default the return code to an error */ | 49 /* default the return code to an error */ |
53 err = MP_MEM; | 50 err = MP_MEM; |
54 | 51 |
55 /* min # of digits */ | 52 /* min # of digits */ |
56 B = MIN (a->used, b->used); | 53 B = MIN(a->used, b->used); |
57 | 54 |
58 /* now divide in two */ | 55 /* now divide in two */ |
59 B = B >> 1; | 56 B = B >> 1; |
60 | 57 |
61 /* init copy all the temps */ | 58 /* init copy all the temps */ |
62 if (mp_init_size (&x0, B) != MP_OKAY) | 59 if (mp_init_size(&x0, B) != MP_OKAY) |
63 goto ERR; | 60 goto LBL_ERR; |
64 if (mp_init_size (&x1, a->used - B) != MP_OKAY) | 61 if (mp_init_size(&x1, a->used - B) != MP_OKAY) |
65 goto X0; | 62 goto X0; |
66 if (mp_init_size (&y0, B) != MP_OKAY) | 63 if (mp_init_size(&y0, B) != MP_OKAY) |
67 goto X1; | 64 goto X1; |
68 if (mp_init_size (&y1, b->used - B) != MP_OKAY) | 65 if (mp_init_size(&y1, b->used - B) != MP_OKAY) |
69 goto Y0; | 66 goto Y0; |
70 | 67 |
71 /* init temps */ | 68 /* init temps */ |
72 if (mp_init_size (&t1, B * 2) != MP_OKAY) | 69 if (mp_init_size(&t1, B * 2) != MP_OKAY) |
73 goto Y1; | 70 goto Y1; |
74 if (mp_init_size (&x0y0, B * 2) != MP_OKAY) | 71 if (mp_init_size(&x0y0, B * 2) != MP_OKAY) |
75 goto T1; | 72 goto T1; |
76 if (mp_init_size (&x1y1, B * 2) != MP_OKAY) | 73 if (mp_init_size(&x1y1, B * 2) != MP_OKAY) |
77 goto X0Y0; | 74 goto X0Y0; |
78 | 75 |
79 /* now shift the digits */ | 76 /* now shift the digits */ |
80 x0.used = y0.used = B; | 77 x0.used = y0.used = B; |
81 x1.used = a->used - B; | 78 x1.used = a->used - B; |
82 y1.used = b->used - B; | 79 y1.used = b->used - B; |
83 | 80 |
84 { | 81 { |
85 int x; | 82 int x; |
86 mp_digit *tmpa, *tmpb, *tmpx, *tmpy; | 83 mp_digit *tmpa, *tmpb, *tmpx, *tmpy; |
87 | 84 |
88 /* we copy the digits directly instead of using higher level functions | 85 /* we copy the digits directly instead of using higher level functions |
89 * since we also need to shift the digits | 86 * since we also need to shift the digits |
90 */ | 87 */ |
91 tmpa = a->dp; | 88 tmpa = a->dp; |
92 tmpb = b->dp; | 89 tmpb = b->dp; |
93 | 90 |
94 tmpx = x0.dp; | 91 tmpx = x0.dp; |
95 tmpy = y0.dp; | 92 tmpy = y0.dp; |
96 for (x = 0; x < B; x++) { | 93 for (x = 0; x < B; x++) { |
97 *tmpx++ = *tmpa++; | 94 *tmpx++ = *tmpa++; |
98 *tmpy++ = *tmpb++; | 95 *tmpy++ = *tmpb++; |
99 } | 96 } |
100 | 97 |
101 tmpx = x1.dp; | 98 tmpx = x1.dp; |
102 for (x = B; x < a->used; x++) { | 99 for (x = B; x < a->used; x++) { |
103 *tmpx++ = *tmpa++; | 100 *tmpx++ = *tmpa++; |
104 } | 101 } |
105 | 102 |
106 tmpy = y1.dp; | 103 tmpy = y1.dp; |
107 for (x = B; x < b->used; x++) { | 104 for (x = B; x < b->used; x++) { |
108 *tmpy++ = *tmpb++; | 105 *tmpy++ = *tmpb++; |
109 } | 106 } |
110 } | 107 } |
111 | 108 |
112 /* only need to clamp the lower words since by definition the | 109 /* only need to clamp the lower words since by definition the |
113 * upper words x1/y1 must have a known number of digits | 110 * upper words x1/y1 must have a known number of digits |
114 */ | 111 */ |
115 mp_clamp (&x0); | 112 mp_clamp(&x0); |
116 mp_clamp (&y0); | 113 mp_clamp(&y0); |
117 | 114 |
118 /* now calc the products x0y0 and x1y1 */ | 115 /* now calc the products x0y0 and x1y1 */ |
119 /* after this x0 is no longer required, free temp [x0==t2]! */ | 116 /* after this x0 is no longer required, free temp [x0==t2]! */ |
120 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) | 117 if (mp_mul(&x0, &y0, &x0y0) != MP_OKAY) |
121 goto X1Y1; /* x0y0 = x0*y0 */ | 118 goto X1Y1; /* x0y0 = x0*y0 */ |
122 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) | 119 if (mp_mul(&x1, &y1, &x1y1) != MP_OKAY) |
123 goto X1Y1; /* x1y1 = x1*y1 */ | 120 goto X1Y1; /* x1y1 = x1*y1 */ |
124 | 121 |
125 /* now calc x1+x0 and y1+y0 */ | 122 /* now calc x1+x0 and y1+y0 */ |
126 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) | 123 if (s_mp_add(&x1, &x0, &t1) != MP_OKAY) |
127 goto X1Y1; /* t1 = x1 - x0 */ | 124 goto X1Y1; /* t1 = x1 - x0 */ |
128 if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) | 125 if (s_mp_add(&y1, &y0, &x0) != MP_OKAY) |
129 goto X1Y1; /* t2 = y1 - y0 */ | 126 goto X1Y1; /* t2 = y1 - y0 */ |
130 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) | 127 if (mp_mul(&t1, &x0, &t1) != MP_OKAY) |
131 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ | 128 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ |
132 | 129 |
133 /* add x0y0 */ | 130 /* add x0y0 */ |
134 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) | 131 if (mp_add(&x0y0, &x1y1, &x0) != MP_OKAY) |
135 goto X1Y1; /* t2 = x0y0 + x1y1 */ | 132 goto X1Y1; /* t2 = x0y0 + x1y1 */ |
136 if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) | 133 if (s_mp_sub(&t1, &x0, &t1) != MP_OKAY) |
137 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ | 134 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ |
138 | 135 |
139 /* shift by B */ | 136 /* shift by B */ |
140 if (mp_lshd (&t1, B) != MP_OKAY) | 137 if (mp_lshd(&t1, B) != MP_OKAY) |
141 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ | 138 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ |
142 if (mp_lshd (&x1y1, B * 2) != MP_OKAY) | 139 if (mp_lshd(&x1y1, B * 2) != MP_OKAY) |
143 goto X1Y1; /* x1y1 = x1y1 << 2*B */ | 140 goto X1Y1; /* x1y1 = x1y1 << 2*B */ |
144 | 141 |
145 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) | 142 if (mp_add(&x0y0, &t1, &t1) != MP_OKAY) |
146 goto X1Y1; /* t1 = x0y0 + t1 */ | 143 goto X1Y1; /* t1 = x0y0 + t1 */ |
147 if (mp_add (&t1, &x1y1, c) != MP_OKAY) | 144 if (mp_add(&t1, &x1y1, c) != MP_OKAY) |
148 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ | 145 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ |
149 | 146 |
150 /* Algorithm succeeded set the return code to MP_OKAY */ | 147 /* Algorithm succeeded set the return code to MP_OKAY */ |
151 err = MP_OKAY; | 148 err = MP_OKAY; |
152 | 149 |
153 X1Y1:mp_clear (&x1y1); | 150 X1Y1: |
154 X0Y0:mp_clear (&x0y0); | 151 mp_clear(&x1y1); |
155 T1:mp_clear (&t1); | 152 X0Y0: |
156 Y1:mp_clear (&y1); | 153 mp_clear(&x0y0); |
157 Y0:mp_clear (&y0); | 154 T1: |
158 X1:mp_clear (&x1); | 155 mp_clear(&t1); |
159 X0:mp_clear (&x0); | 156 Y1: |
160 ERR: | 157 mp_clear(&y1); |
161 return err; | 158 Y0: |
159 mp_clear(&y0); | |
160 X1: | |
161 mp_clear(&x1); | |
162 X0: | |
163 mp_clear(&x0); | |
164 LBL_ERR: | |
165 return err; | |
162 } | 166 } |
163 #endif | 167 #endif |
164 | 168 |
165 /* ref: $Format:%D$ */ | 169 /* ref: HEAD -> master, tag: v1.1.0 */ |
166 /* git commit: $Format:%H$ */ | 170 /* git commit: 08549ad6bc8b0cede0b357a9c341c5c6473a9c55 */ |
167 /* commit time: $Format:%ai$ */ | 171 /* commit time: 2019-01-28 20:32:32 +0100 */ |