Mercurial > dropbear
comparison libtommath/bn_mp_exptmod_fast.c @ 284:eed26cff980b
propagate from branch 'au.asn.ucc.matt.ltm.dropbear' (head 6c790cad5a7fa866ad062cb3a0c279f7ba788583)
to branch 'au.asn.ucc.matt.dropbear' (head fff0894a0399405a9410ea1c6d118f342cf2aa64)
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Wed, 08 Mar 2006 13:23:49 +0000 |
parents | |
children | cd14c94fe89c 5ff8218bcee9 |
comparison
equal
deleted
inserted
replaced
283:bd240aa12ba7 | 284:eed26cff980b |
---|---|
1 #include <tommath.h> | |
2 #ifdef BN_MP_EXPTMOD_FAST_C | |
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis | |
4 * | |
5 * LibTomMath is a library that provides multiple-precision | |
6 * integer arithmetic as well as number theoretic functionality. | |
7 * | |
8 * The library was designed directly after the MPI library by | |
9 * Michael Fromberger but has been written from scratch with | |
10 * additional optimizations in place. | |
11 * | |
12 * The library is free for all purposes without any express | |
13 * guarantee it works. | |
14 * | |
15 * Tom St Denis, [email protected], http://math.libtomcrypt.org | |
16 */ | |
17 | |
18 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 | |
19 * | |
20 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. | |
21 * The value of k changes based on the size of the exponent. | |
22 * | |
23 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] | |
24 */ | |
25 | |
26 #ifdef MP_LOW_MEM | |
27 #define TAB_SIZE 32 | |
28 #else | |
29 #define TAB_SIZE 256 | |
30 #endif | |
31 | |
32 int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) | |
33 { | |
34 mp_int M[TAB_SIZE], res; | |
35 mp_digit buf, mp; | |
36 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; | |
37 | |
38 /* use a pointer to the reduction algorithm. This allows us to use | |
39 * one of many reduction algorithms without modding the guts of | |
40 * the code with if statements everywhere. | |
41 */ | |
42 int (*redux)(mp_int*,mp_int*,mp_digit); | |
43 | |
44 /* find window size */ | |
45 x = mp_count_bits (X); | |
46 if (x <= 7) { | |
47 winsize = 2; | |
48 } else if (x <= 36) { | |
49 winsize = 3; | |
50 } else if (x <= 140) { | |
51 winsize = 4; | |
52 } else if (x <= 450) { | |
53 winsize = 5; | |
54 } else if (x <= 1303) { | |
55 winsize = 6; | |
56 } else if (x <= 3529) { | |
57 winsize = 7; | |
58 } else { | |
59 winsize = 8; | |
60 } | |
61 | |
62 #ifdef MP_LOW_MEM | |
63 if (winsize > 5) { | |
64 winsize = 5; | |
65 } | |
66 #endif | |
67 | |
68 /* init M array */ | |
69 /* init first cell */ | |
70 if ((err = mp_init(&M[1])) != MP_OKAY) { | |
71 return err; | |
72 } | |
73 | |
74 /* now init the second half of the array */ | |
75 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { | |
76 if ((err = mp_init(&M[x])) != MP_OKAY) { | |
77 for (y = 1<<(winsize-1); y < x; y++) { | |
78 mp_clear (&M[y]); | |
79 } | |
80 mp_clear(&M[1]); | |
81 return err; | |
82 } | |
83 } | |
84 | |
85 /* determine and setup reduction code */ | |
86 if (redmode == 0) { | |
87 #ifdef BN_MP_MONTGOMERY_SETUP_C | |
88 /* now setup montgomery */ | |
89 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { | |
90 goto LBL_M; | |
91 } | |
92 #else | |
93 err = MP_VAL; | |
94 goto LBL_M; | |
95 #endif | |
96 | |
97 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ | |
98 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C | |
99 if (((P->used * 2 + 1) < MP_WARRAY) && | |
100 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { | |
101 redux = fast_mp_montgomery_reduce; | |
102 } else | |
103 #endif | |
104 { | |
105 #ifdef BN_MP_MONTGOMERY_REDUCE_C | |
106 /* use slower baseline Montgomery method */ | |
107 redux = mp_montgomery_reduce; | |
108 #else | |
109 err = MP_VAL; | |
110 goto LBL_M; | |
111 #endif | |
112 } | |
113 } else if (redmode == 1) { | |
114 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C) | |
115 /* setup DR reduction for moduli of the form B**k - b */ | |
116 mp_dr_setup(P, &mp); | |
117 redux = mp_dr_reduce; | |
118 #else | |
119 err = MP_VAL; | |
120 goto LBL_M; | |
121 #endif | |
122 } else { | |
123 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C) | |
124 /* setup DR reduction for moduli of the form 2**k - b */ | |
125 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { | |
126 goto LBL_M; | |
127 } | |
128 redux = mp_reduce_2k; | |
129 #else | |
130 err = MP_VAL; | |
131 goto LBL_M; | |
132 #endif | |
133 } | |
134 | |
135 /* setup result */ | |
136 if ((err = mp_init (&res)) != MP_OKAY) { | |
137 goto LBL_M; | |
138 } | |
139 | |
140 /* create M table | |
141 * | |
142 | |
143 * | |
144 * The first half of the table is not computed though accept for M[0] and M[1] | |
145 */ | |
146 | |
147 if (redmode == 0) { | |
148 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C | |
149 /* now we need R mod m */ | |
150 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { | |
151 goto LBL_RES; | |
152 } | |
153 #else | |
154 err = MP_VAL; | |
155 goto LBL_RES; | |
156 #endif | |
157 | |
158 /* now set M[1] to G * R mod m */ | |
159 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) { | |
160 goto LBL_RES; | |
161 } | |
162 } else { | |
163 mp_set(&res, 1); | |
164 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { | |
165 goto LBL_RES; | |
166 } | |
167 } | |
168 | |
169 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ | |
170 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { | |
171 goto LBL_RES; | |
172 } | |
173 | |
174 for (x = 0; x < (winsize - 1); x++) { | |
175 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { | |
176 goto LBL_RES; | |
177 } | |
178 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { | |
179 goto LBL_RES; | |
180 } | |
181 } | |
182 | |
183 /* create upper table */ | |
184 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { | |
185 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { | |
186 goto LBL_RES; | |
187 } | |
188 if ((err = redux (&M[x], P, mp)) != MP_OKAY) { | |
189 goto LBL_RES; | |
190 } | |
191 } | |
192 | |
193 /* set initial mode and bit cnt */ | |
194 mode = 0; | |
195 bitcnt = 1; | |
196 buf = 0; | |
197 digidx = X->used - 1; | |
198 bitcpy = 0; | |
199 bitbuf = 0; | |
200 | |
201 for (;;) { | |
202 /* grab next digit as required */ | |
203 if (--bitcnt == 0) { | |
204 /* if digidx == -1 we are out of digits so break */ | |
205 if (digidx == -1) { | |
206 break; | |
207 } | |
208 /* read next digit and reset bitcnt */ | |
209 buf = X->dp[digidx--]; | |
210 bitcnt = (int)DIGIT_BIT; | |
211 } | |
212 | |
213 /* grab the next msb from the exponent */ | |
214 y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1; | |
215 buf <<= (mp_digit)1; | |
216 | |
217 /* if the bit is zero and mode == 0 then we ignore it | |
218 * These represent the leading zero bits before the first 1 bit | |
219 * in the exponent. Technically this opt is not required but it | |
220 * does lower the # of trivial squaring/reductions used | |
221 */ | |
222 if (mode == 0 && y == 0) { | |
223 continue; | |
224 } | |
225 | |
226 /* if the bit is zero and mode == 1 then we square */ | |
227 if (mode == 1 && y == 0) { | |
228 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { | |
229 goto LBL_RES; | |
230 } | |
231 if ((err = redux (&res, P, mp)) != MP_OKAY) { | |
232 goto LBL_RES; | |
233 } | |
234 continue; | |
235 } | |
236 | |
237 /* else we add it to the window */ | |
238 bitbuf |= (y << (winsize - ++bitcpy)); | |
239 mode = 2; | |
240 | |
241 if (bitcpy == winsize) { | |
242 /* ok window is filled so square as required and multiply */ | |
243 /* square first */ | |
244 for (x = 0; x < winsize; x++) { | |
245 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { | |
246 goto LBL_RES; | |
247 } | |
248 if ((err = redux (&res, P, mp)) != MP_OKAY) { | |
249 goto LBL_RES; | |
250 } | |
251 } | |
252 | |
253 /* then multiply */ | |
254 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { | |
255 goto LBL_RES; | |
256 } | |
257 if ((err = redux (&res, P, mp)) != MP_OKAY) { | |
258 goto LBL_RES; | |
259 } | |
260 | |
261 /* empty window and reset */ | |
262 bitcpy = 0; | |
263 bitbuf = 0; | |
264 mode = 1; | |
265 } | |
266 } | |
267 | |
268 /* if bits remain then square/multiply */ | |
269 if (mode == 2 && bitcpy > 0) { | |
270 /* square then multiply if the bit is set */ | |
271 for (x = 0; x < bitcpy; x++) { | |
272 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { | |
273 goto LBL_RES; | |
274 } | |
275 if ((err = redux (&res, P, mp)) != MP_OKAY) { | |
276 goto LBL_RES; | |
277 } | |
278 | |
279 /* get next bit of the window */ | |
280 bitbuf <<= 1; | |
281 if ((bitbuf & (1 << winsize)) != 0) { | |
282 /* then multiply */ | |
283 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { | |
284 goto LBL_RES; | |
285 } | |
286 if ((err = redux (&res, P, mp)) != MP_OKAY) { | |
287 goto LBL_RES; | |
288 } | |
289 } | |
290 } | |
291 } | |
292 | |
293 if (redmode == 0) { | |
294 /* fixup result if Montgomery reduction is used | |
295 * recall that any value in a Montgomery system is | |
296 * actually multiplied by R mod n. So we have | |
297 * to reduce one more time to cancel out the factor | |
298 * of R. | |
299 */ | |
300 if ((err = redux(&res, P, mp)) != MP_OKAY) { | |
301 goto LBL_RES; | |
302 } | |
303 } | |
304 | |
305 /* swap res with Y */ | |
306 mp_exch (&res, Y); | |
307 err = MP_OKAY; | |
308 LBL_RES:mp_clear (&res); | |
309 LBL_M: | |
310 mp_clear(&M[1]); | |
311 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { | |
312 mp_clear (&M[x]); | |
313 } | |
314 return err; | |
315 } | |
316 #endif | |
317 |