142
|
1 #include <tommath.h> |
|
2 #ifdef BN_MP_EXPTMOD_FAST_C |
2
|
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis |
|
4 * |
|
5 * LibTomMath is a library that provides multiple-precision |
|
6 * integer arithmetic as well as number theoretic functionality. |
|
7 * |
|
8 * The library was designed directly after the MPI library by |
|
9 * Michael Fromberger but has been written from scratch with |
|
10 * additional optimizations in place. |
|
11 * |
|
12 * The library is free for all purposes without any express |
|
13 * guarantee it works. |
|
14 * |
|
15 * Tom St Denis, [email protected], http://math.libtomcrypt.org |
|
16 */ |
|
17 |
|
18 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 |
|
19 * |
|
20 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. |
|
21 * The value of k changes based on the size of the exponent. |
|
22 * |
|
23 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] |
|
24 */ |
|
25 |
|
26 #ifdef MP_LOW_MEM |
|
27 #define TAB_SIZE 32 |
|
28 #else |
|
29 #define TAB_SIZE 256 |
|
30 #endif |
|
31 |
|
32 int |
|
33 mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) |
|
34 { |
|
35 mp_int M[TAB_SIZE], res; |
|
36 mp_digit buf, mp; |
|
37 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; |
|
38 |
|
39 /* use a pointer to the reduction algorithm. This allows us to use |
|
40 * one of many reduction algorithms without modding the guts of |
|
41 * the code with if statements everywhere. |
|
42 */ |
|
43 int (*redux)(mp_int*,mp_int*,mp_digit); |
|
44 |
|
45 /* find window size */ |
|
46 x = mp_count_bits (X); |
|
47 if (x <= 7) { |
|
48 winsize = 2; |
|
49 } else if (x <= 36) { |
|
50 winsize = 3; |
|
51 } else if (x <= 140) { |
|
52 winsize = 4; |
|
53 } else if (x <= 450) { |
|
54 winsize = 5; |
|
55 } else if (x <= 1303) { |
|
56 winsize = 6; |
|
57 } else if (x <= 3529) { |
|
58 winsize = 7; |
|
59 } else { |
|
60 winsize = 8; |
|
61 } |
|
62 |
|
63 #ifdef MP_LOW_MEM |
|
64 if (winsize > 5) { |
|
65 winsize = 5; |
|
66 } |
|
67 #endif |
|
68 |
|
69 /* init M array */ |
|
70 /* init first cell */ |
|
71 if ((err = mp_init(&M[1])) != MP_OKAY) { |
|
72 return err; |
|
73 } |
|
74 |
|
75 /* now init the second half of the array */ |
|
76 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { |
|
77 if ((err = mp_init(&M[x])) != MP_OKAY) { |
|
78 for (y = 1<<(winsize-1); y < x; y++) { |
|
79 mp_clear (&M[y]); |
|
80 } |
|
81 mp_clear(&M[1]); |
|
82 return err; |
|
83 } |
|
84 } |
|
85 |
|
86 /* determine and setup reduction code */ |
|
87 if (redmode == 0) { |
142
|
88 #ifdef BN_MP_MONTGOMERY_SETUP_C |
2
|
89 /* now setup montgomery */ |
|
90 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { |
|
91 goto __M; |
|
92 } |
142
|
93 #else |
|
94 err = MP_VAL; |
|
95 goto __M; |
|
96 #endif |
2
|
97 |
|
98 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ |
142
|
99 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C |
2
|
100 if (((P->used * 2 + 1) < MP_WARRAY) && |
|
101 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { |
|
102 redux = fast_mp_montgomery_reduce; |
142
|
103 } else |
|
104 #endif |
|
105 { |
|
106 #ifdef BN_MP_MONTGOMERY_REDUCE_C |
2
|
107 /* use slower baseline Montgomery method */ |
|
108 redux = mp_montgomery_reduce; |
142
|
109 #else |
|
110 err = MP_VAL; |
|
111 goto __M; |
|
112 #endif |
2
|
113 } |
|
114 } else if (redmode == 1) { |
142
|
115 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C) |
2
|
116 /* setup DR reduction for moduli of the form B**k - b */ |
|
117 mp_dr_setup(P, &mp); |
|
118 redux = mp_dr_reduce; |
142
|
119 #else |
|
120 err = MP_VAL; |
|
121 goto __M; |
|
122 #endif |
2
|
123 } else { |
142
|
124 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C) |
2
|
125 /* setup DR reduction for moduli of the form 2**k - b */ |
|
126 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { |
|
127 goto __M; |
|
128 } |
|
129 redux = mp_reduce_2k; |
142
|
130 #else |
|
131 err = MP_VAL; |
|
132 goto __M; |
|
133 #endif |
2
|
134 } |
|
135 |
|
136 /* setup result */ |
|
137 if ((err = mp_init (&res)) != MP_OKAY) { |
|
138 goto __M; |
|
139 } |
|
140 |
|
141 /* create M table |
|
142 * |
142
|
143 |
2
|
144 * |
|
145 * The first half of the table is not computed though accept for M[0] and M[1] |
|
146 */ |
|
147 |
|
148 if (redmode == 0) { |
142
|
149 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C |
2
|
150 /* now we need R mod m */ |
|
151 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { |
|
152 goto __RES; |
|
153 } |
142
|
154 #else |
|
155 err = MP_VAL; |
|
156 goto __RES; |
|
157 #endif |
2
|
158 |
|
159 /* now set M[1] to G * R mod m */ |
|
160 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) { |
|
161 goto __RES; |
|
162 } |
|
163 } else { |
|
164 mp_set(&res, 1); |
|
165 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { |
|
166 goto __RES; |
|
167 } |
|
168 } |
|
169 |
|
170 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ |
|
171 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { |
|
172 goto __RES; |
|
173 } |
|
174 |
|
175 for (x = 0; x < (winsize - 1); x++) { |
|
176 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { |
|
177 goto __RES; |
|
178 } |
|
179 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { |
|
180 goto __RES; |
|
181 } |
|
182 } |
|
183 |
|
184 /* create upper table */ |
|
185 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { |
|
186 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { |
|
187 goto __RES; |
|
188 } |
|
189 if ((err = redux (&M[x], P, mp)) != MP_OKAY) { |
|
190 goto __RES; |
|
191 } |
|
192 } |
|
193 |
|
194 /* set initial mode and bit cnt */ |
|
195 mode = 0; |
|
196 bitcnt = 1; |
|
197 buf = 0; |
|
198 digidx = X->used - 1; |
|
199 bitcpy = 0; |
|
200 bitbuf = 0; |
|
201 |
|
202 for (;;) { |
|
203 /* grab next digit as required */ |
|
204 if (--bitcnt == 0) { |
|
205 /* if digidx == -1 we are out of digits so break */ |
|
206 if (digidx == -1) { |
|
207 break; |
|
208 } |
|
209 /* read next digit and reset bitcnt */ |
|
210 buf = X->dp[digidx--]; |
|
211 bitcnt = (int)DIGIT_BIT; |
|
212 } |
|
213 |
|
214 /* grab the next msb from the exponent */ |
|
215 y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1; |
|
216 buf <<= (mp_digit)1; |
|
217 |
|
218 /* if the bit is zero and mode == 0 then we ignore it |
|
219 * These represent the leading zero bits before the first 1 bit |
|
220 * in the exponent. Technically this opt is not required but it |
|
221 * does lower the # of trivial squaring/reductions used |
|
222 */ |
|
223 if (mode == 0 && y == 0) { |
|
224 continue; |
|
225 } |
|
226 |
|
227 /* if the bit is zero and mode == 1 then we square */ |
|
228 if (mode == 1 && y == 0) { |
|
229 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { |
|
230 goto __RES; |
|
231 } |
|
232 if ((err = redux (&res, P, mp)) != MP_OKAY) { |
|
233 goto __RES; |
|
234 } |
|
235 continue; |
|
236 } |
|
237 |
|
238 /* else we add it to the window */ |
|
239 bitbuf |= (y << (winsize - ++bitcpy)); |
|
240 mode = 2; |
|
241 |
|
242 if (bitcpy == winsize) { |
|
243 /* ok window is filled so square as required and multiply */ |
|
244 /* square first */ |
|
245 for (x = 0; x < winsize; x++) { |
|
246 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { |
|
247 goto __RES; |
|
248 } |
|
249 if ((err = redux (&res, P, mp)) != MP_OKAY) { |
|
250 goto __RES; |
|
251 } |
|
252 } |
|
253 |
|
254 /* then multiply */ |
|
255 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { |
|
256 goto __RES; |
|
257 } |
|
258 if ((err = redux (&res, P, mp)) != MP_OKAY) { |
|
259 goto __RES; |
|
260 } |
|
261 |
|
262 /* empty window and reset */ |
|
263 bitcpy = 0; |
|
264 bitbuf = 0; |
|
265 mode = 1; |
|
266 } |
|
267 } |
|
268 |
|
269 /* if bits remain then square/multiply */ |
|
270 if (mode == 2 && bitcpy > 0) { |
|
271 /* square then multiply if the bit is set */ |
|
272 for (x = 0; x < bitcpy; x++) { |
|
273 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { |
|
274 goto __RES; |
|
275 } |
|
276 if ((err = redux (&res, P, mp)) != MP_OKAY) { |
|
277 goto __RES; |
|
278 } |
|
279 |
|
280 /* get next bit of the window */ |
|
281 bitbuf <<= 1; |
|
282 if ((bitbuf & (1 << winsize)) != 0) { |
|
283 /* then multiply */ |
|
284 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { |
|
285 goto __RES; |
|
286 } |
|
287 if ((err = redux (&res, P, mp)) != MP_OKAY) { |
|
288 goto __RES; |
|
289 } |
|
290 } |
|
291 } |
|
292 } |
|
293 |
|
294 if (redmode == 0) { |
|
295 /* fixup result if Montgomery reduction is used |
|
296 * recall that any value in a Montgomery system is |
|
297 * actually multiplied by R mod n. So we have |
|
298 * to reduce one more time to cancel out the factor |
|
299 * of R. |
|
300 */ |
142
|
301 if ((err = redux(&res, P, mp)) != MP_OKAY) { |
2
|
302 goto __RES; |
|
303 } |
|
304 } |
|
305 |
|
306 /* swap res with Y */ |
|
307 mp_exch (&res, Y); |
|
308 err = MP_OKAY; |
|
309 __RES:mp_clear (&res); |
|
310 __M: |
|
311 mp_clear(&M[1]); |
|
312 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { |
|
313 mp_clear (&M[x]); |
|
314 } |
|
315 return err; |
|
316 } |
142
|
317 #endif |
|
318 |