comparison tomsfastmath/demo/test.c @ 643:a362b62d38b2 dropbear-tfm

Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a with Makefile.in renamed
author Matt Johnston <matt@ucc.asn.au>
date Wed, 23 Nov 2011 18:10:20 +0700
parents
children
comparison
equal deleted inserted replaced
642:33fd2f3499d2 643:a362b62d38b2
1 /* TFM demo program */
2 #include <tfm.h>
3
4 void draw(fp_int *a)
5 {
6 int x;
7 printf("%d, %d, ", a->used, a->sign);
8 for (x = a->used - 1; x >= 0; x--) {
9 printf("%08lx ", a->dp[x]);
10 }
11 printf("\n");
12 }
13
14 int myrng(unsigned char *dst, int len, void *dat)
15 {
16 int x;
17 for (x = 0; x < len; x++) dst[x] = rand() & 0xFF;
18 return len;
19 }
20
21 /* RDTSC from Scott Duplichan */
22 static ulong64 TIMFUNC (void)
23 {
24 #if defined __GNUC__
25 #if defined(INTEL_CC)
26 ulong64 a;
27 asm ("rdtsc":"=A"(a));
28 return a;
29 #elif defined(__i386__) || defined(__x86_64__)
30 ulong64 a;
31 __asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx");
32 return a;
33 #elif defined(TFM_PPC32)
34 unsigned long a, b;
35 __asm__ __volatile__ ("mftbu %1 \nmftb %0\n":"=r"(a), "=r"(b));
36 return (((ulong64)b) << 32ULL) | ((ulong64)a);
37 #elif defined(TFM_AVR32)
38 FILE *in;
39 char buf[20];
40 in = fopen("/sys/devices/system/cpu/cpu0/pccycles", "r");
41 fgets(buf, 20, in);
42 fclose(in);
43 return strtoul(buf, NULL, 10);
44 #else /* gcc-IA64 version */
45 unsigned long result;
46 __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
47 while (__builtin_expect ((int) result == -1, 0))
48 __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
49 return result;
50 #endif
51
52 // Microsoft and Intel Windows compilers
53 #elif defined _M_IX86
54 __asm rdtsc
55 #elif defined _M_AMD64
56 return __rdtsc ();
57 #elif defined _M_IA64
58 #if defined __INTEL_COMPILER
59 #include <ia64intrin.h>
60 #endif
61 return __getReg (3116);
62 #else
63 #error need rdtsc function for this build
64 #endif
65 }
66
67 char cmd[4096], buf[4096];
68
69 int main(void)
70 {
71 fp_int a,b,c,d,e,f;
72 fp_digit fp;
73 int n, err;
74 unsigned long expt_n, add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, inv_n,
75 div2_n, mul2_n, add_d_n, sub_d_n, mul_d_n, t, cnt, rr, ix;
76 ulong64 t1, t2;
77
78 srand(time(NULL));
79 printf("TFM Ident string:\n%s\n\n", fp_ident());
80 fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f);
81 fp_zero(&a); draw(&a);
82
83 /* test set and simple shifts */
84 printf("Testing mul/div 2\n");
85 fp_set(&a, 1); draw(&a);
86 for (n = 0; n <= DIGIT_BIT; n++) {
87 fp_mul_2(&a, &a); printf("(%d) ", fp_count_bits(&a));
88 draw(&a);
89
90 }
91 for (n = 0; n <= (DIGIT_BIT + 1); n++) {
92 fp_div_2(&a, &a);
93 draw(&a);
94 }
95 fp_set(&a, 1);
96
97 /* test lshd/rshd */
98 printf("testing lshd/rshd\n");
99 fp_lshd(&a, 3); draw(&a);
100 fp_rshd(&a, 3); draw(&a);
101
102 /* test more complicated shifts */
103 printf("Testing mul/div 2d\n");
104 fp_mul_2d(&a, DIGIT_BIT/2, &a); draw(&a);
105 fp_div_2d(&a, DIGIT_BIT/2, &a, NULL); draw(&a);
106
107 fp_mul_2d(&a, DIGIT_BIT + DIGIT_BIT/2, &a); draw(&a);
108 fp_div_2d(&a, DIGIT_BIT + DIGIT_BIT/2, &a, NULL); draw(&a);
109
110 /* test neg/abs */
111 printf("testing neg/abs\n");
112 fp_neg(&a, &a); draw(&a);
113 fp_neg(&a, &a); draw(&a);
114 fp_neg(&a, &a); draw(&a);
115 fp_abs(&a, &a); draw(&a);
116
117 /* test comparisons */
118 fp_set(&b, 3);
119 fp_set(&c, 4); fp_neg(&c, &c);
120 fp_set(&d, 1);
121 printf("Testing compares\n%d, %d, %d, %d\n", fp_cmp(&a, &b), fp_cmp(&a, &c), fp_cmp(&a, &d), fp_cmp(&b, &c));
122
123 /* test add/sub */
124 printf("Testing add/sub \n");
125 fp_set(&a, ((fp_digit)1)<<(DIGIT_BIT-1)); draw(&a);
126 fp_set(&b, ((fp_digit)1)<<(DIGIT_BIT-2));
127 fp_add(&a, &b, &a); draw(&a);
128 fp_add(&a, &b, &a); draw(&a);
129 fp_add(&a, &b, &a); draw(&a);
130 printf("sub...\n");
131 printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
132 printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
133 printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
134 printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
135 printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
136 printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
137 fp_read_radix(&a, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000000000000000000001", 16); draw(&a);
138 fp_sub_d(&a, 3, &b); draw(&b);
139 fp_read_radix(&a, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFE", 16);
140 printf("cmp returns: %d, ", fp_cmp(&a, &b)); fp_sub(&a, &b, &a); draw(&a);
141
142 /* test mul_d */
143 printf("Testing mul_d and div_d\n");
144 fp_set(&a, 1);
145 fp_mul_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a); draw(&a);
146 fp_mul_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a); draw(&a);
147 fp_mul_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a); draw(&a);
148 printf("div_d\n");
149 fp_div_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a, NULL); draw(&a);
150 fp_div_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a, NULL); draw(&a);
151 fp_div_d(&a, ((fp_digit)1)<<(DIGIT_BIT/2), &a, NULL); draw(&a);
152
153 /* testing read radix */
154 printf("Testing read_radix\n");
155 fp_read_radix(&a, "123456789012345678901234567890", 16); draw(&a);
156
157 #if 0
158 /* test mont */
159 printf("Montgomery test #1\n");
160 fp_set(&a, 0x1234567ULL);
161 fp_montgomery_setup(&a, &fp);
162 fp_montgomery_calc_normalization(&b, &a);
163
164 fp_read_radix(&d, "123456789123", 16);
165 for (n = 0; n < 1000000; n++) {
166 fp_add_d(&d, 1, &d); fp_sqrmod(&d, &a, &d);
167 fp_mul(&d, &b, &c);
168 fp_montgomery_reduce(&c, &a, fp);
169 if (fp_cmp(&c, &d) != FP_EQ) {
170 printf("Failed mont %d\n", n);
171 draw(&a);
172 draw(&d);
173 draw(&c);
174 return EXIT_FAILURE;
175 }
176 }
177 printf("Passed.\n");
178
179 printf("Montgomery test #2\n");
180 fp_set(&a, 0x1234567ULL);
181 fp_lshd(&a, 4);
182 fp_add_d(&a, 1, &a);
183 fp_montgomery_setup(&a, &fp);
184 fp_montgomery_calc_normalization(&b, &a);
185
186 fp_read_radix(&d, "123456789123", 16);
187 for (n = 0; n < 1000000; n++) {
188 fp_add_d(&d, 1, &d); fp_sqrmod(&d, &a, &d);
189 fp_mul(&d, &b, &c);
190 fp_montgomery_reduce(&c, &a, fp);
191 if (fp_cmp(&c, &d) != FP_EQ) {
192 printf("Failed mont %d\n", n);
193 draw(&a);
194 draw(&d);
195 draw(&c);
196 return EXIT_FAILURE;
197 }
198 }
199 printf("Passed.\n");
200
201 /* test for size */
202 for (ix = 8*DIGIT_BIT; ix < 10*DIGIT_BIT; ix++) {
203 printf("Testing (not safe-prime): %9lu bits \r", ix); fflush(stdout);
204 err = fp_prime_random_ex(&a, 8, ix, (rand()&1)?TFM_PRIME_2MSB_OFF:TFM_PRIME_2MSB_ON, myrng, NULL);
205 if (err != FP_OKAY) {
206 printf("failed with err code %d\n", err);
207 return EXIT_FAILURE;
208 }
209 if ((unsigned long)fp_count_bits(&a) != ix) {
210 printf("Prime is %d not %lu bits!!!\n", fp_count_bits(&a), ix);
211 return EXIT_FAILURE;
212 }
213 }
214 printf("\n\n");
215 #endif
216
217 #ifdef TESTING
218 goto testing;
219 #endif
220
221 #if 1
222
223 t1 = TIMFUNC();
224 sleep(1);
225 printf("Ticks per second: %llu\n", TIMFUNC() - t1);
226
227 goto multtime;
228 /* do some timings... */
229 printf("Addition:\n");
230 for (t = 2; t <= FP_SIZE/2; t += 2) {
231 fp_zero(&a);
232 fp_zero(&b);
233 fp_zero(&c);
234 for (ix = 0; ix < t; ix++) {
235 a.dp[ix] = ix;
236 b.dp[ix] = ix;
237 }
238 a.used = t;
239 b.used = t;
240 t2 = -1;
241 for (ix = 0; ix < 25000; ++ix) {
242 t1 = TIMFUNC();
243 fp_add(&a, &b, &c); fp_add(&a, &b, &c);
244 fp_add(&a, &b, &c); fp_add(&a, &b, &c);
245 fp_add(&a, &b, &c); fp_add(&a, &b, &c);
246 fp_add(&a, &b, &c); fp_add(&a, &b, &c);
247 t2 = (TIMFUNC() - t1)>>3;
248 if (t1<t2) { --ix; t2 = t1; }
249 }
250 printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
251 }
252 multtime:
253 printf("Multiplication:\n");
254 for (t = 2; t < FP_SIZE/2; t += 2) {
255 fp_zero(&a);
256 fp_zero(&b);
257 fp_zero(&c);
258 for (ix = 0; ix < t; ix++) {
259 a.dp[ix] = ix;
260 b.dp[ix] = ix;
261 }
262 a.used = t;
263 b.used = t;
264 t2 = -1;
265 for (ix = 0; ix < 100; ++ix) {
266 t1 = TIMFUNC();
267 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
268 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
269 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
270 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
271 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
272 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
273 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
274 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
275 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
276 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
277 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
278 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
279 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
280 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
281 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
282 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
283 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
284 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
285 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
286 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
287 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
288 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
289 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
290 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
291 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
292 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
293 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
294 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
295 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
296 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
297 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
298 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
299 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
300 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
301 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
302 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
303 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
304 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
305 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
306 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
307 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
308 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
309 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
310 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
311 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
312 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
313 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
314 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
315 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
316 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
317 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
318 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
319 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
320 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
321 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
322 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
323 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
324 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
325 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
326 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
327 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
328 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
329 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
330 fp_mul(&a, &b, &c); fp_mul(&a, &b, &c);
331 t2 = (TIMFUNC() - t1)>>7;
332 if (t1<t2) { --ix; t2 = t1; }
333 }
334 printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
335 }
336 //#else
337 sqrtime:
338 printf("Squaring:\n");
339 for (t = 2; t < FP_SIZE/2; t += 2) {
340 fp_zero(&a);
341 fp_zero(&b);
342 for (ix = 0; ix < t; ix++) {
343 a.dp[ix] = ix;
344 }
345 a.used = t;
346 t2 = -1;
347 for (ix = 0; ix < 100; ++ix) {
348 t1 = TIMFUNC();
349 fp_sqr(&a, &b); fp_sqr(&a, &b);
350 fp_sqr(&a, &b); fp_sqr(&a, &b);
351 fp_sqr(&a, &b); fp_sqr(&a, &b);
352 fp_sqr(&a, &b); fp_sqr(&a, &b);
353 fp_sqr(&a, &b); fp_sqr(&a, &b);
354 fp_sqr(&a, &b); fp_sqr(&a, &b);
355 fp_sqr(&a, &b); fp_sqr(&a, &b);
356 fp_sqr(&a, &b); fp_sqr(&a, &b);
357 fp_sqr(&a, &b); fp_sqr(&a, &b);
358 fp_sqr(&a, &b); fp_sqr(&a, &b);
359 fp_sqr(&a, &b); fp_sqr(&a, &b);
360 fp_sqr(&a, &b); fp_sqr(&a, &b);
361 fp_sqr(&a, &b); fp_sqr(&a, &b);
362 fp_sqr(&a, &b); fp_sqr(&a, &b);
363 fp_sqr(&a, &b); fp_sqr(&a, &b);
364 fp_sqr(&a, &b); fp_sqr(&a, &b);
365 fp_sqr(&a, &b); fp_sqr(&a, &b);
366 fp_sqr(&a, &b); fp_sqr(&a, &b);
367 fp_sqr(&a, &b); fp_sqr(&a, &b);
368 fp_sqr(&a, &b); fp_sqr(&a, &b);
369 fp_sqr(&a, &b); fp_sqr(&a, &b);
370 fp_sqr(&a, &b); fp_sqr(&a, &b);
371 fp_sqr(&a, &b); fp_sqr(&a, &b);
372 fp_sqr(&a, &b); fp_sqr(&a, &b);
373 fp_sqr(&a, &b); fp_sqr(&a, &b);
374 fp_sqr(&a, &b); fp_sqr(&a, &b);
375 fp_sqr(&a, &b); fp_sqr(&a, &b);
376 fp_sqr(&a, &b); fp_sqr(&a, &b);
377 fp_sqr(&a, &b); fp_sqr(&a, &b);
378 fp_sqr(&a, &b); fp_sqr(&a, &b);
379 fp_sqr(&a, &b); fp_sqr(&a, &b);
380 fp_sqr(&a, &b); fp_sqr(&a, &b);
381 fp_sqr(&a, &b); fp_sqr(&a, &b);
382 fp_sqr(&a, &b); fp_sqr(&a, &b);
383 fp_sqr(&a, &b); fp_sqr(&a, &b);
384 fp_sqr(&a, &b); fp_sqr(&a, &b);
385 fp_sqr(&a, &b); fp_sqr(&a, &b);
386 fp_sqr(&a, &b); fp_sqr(&a, &b);
387 fp_sqr(&a, &b); fp_sqr(&a, &b);
388 fp_sqr(&a, &b); fp_sqr(&a, &b);
389 fp_sqr(&a, &b); fp_sqr(&a, &b);
390 fp_sqr(&a, &b); fp_sqr(&a, &b);
391 fp_sqr(&a, &b); fp_sqr(&a, &b);
392 fp_sqr(&a, &b); fp_sqr(&a, &b);
393 fp_sqr(&a, &b); fp_sqr(&a, &b);
394 fp_sqr(&a, &b); fp_sqr(&a, &b);
395 fp_sqr(&a, &b); fp_sqr(&a, &b);
396 fp_sqr(&a, &b); fp_sqr(&a, &b);
397 fp_sqr(&a, &b); fp_sqr(&a, &b);
398 fp_sqr(&a, &b); fp_sqr(&a, &b);
399 fp_sqr(&a, &b); fp_sqr(&a, &b);
400 fp_sqr(&a, &b); fp_sqr(&a, &b);
401 fp_sqr(&a, &b); fp_sqr(&a, &b);
402 fp_sqr(&a, &b); fp_sqr(&a, &b);
403 fp_sqr(&a, &b); fp_sqr(&a, &b);
404 fp_sqr(&a, &b); fp_sqr(&a, &b);
405 fp_sqr(&a, &b); fp_sqr(&a, &b);
406 fp_sqr(&a, &b); fp_sqr(&a, &b);
407 fp_sqr(&a, &b); fp_sqr(&a, &b);
408 fp_sqr(&a, &b); fp_sqr(&a, &b);
409 fp_sqr(&a, &b); fp_sqr(&a, &b);
410 fp_sqr(&a, &b); fp_sqr(&a, &b);
411 fp_sqr(&a, &b); fp_sqr(&a, &b);
412 fp_sqr(&a, &b); fp_sqr(&a, &b);
413 t2 = (TIMFUNC() - t1)>>7;
414 if (t1<t2) { --ix; t2 = t1; }
415 }
416 printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
417 }
418 invmodtime:
419 printf("Invmod:\n");
420 for (t = 2; t < FP_SIZE/2; t += 2) {
421 fp_zero(&a);
422 for (ix = 0; ix < t; ix++) {
423 a.dp[ix] = ix | 1;
424 }
425 a.used = t;
426 fp_zero(&b);
427 for (ix = 0; ix < t; ix++) {
428 b.dp[ix] = rand();
429 }
430 b.used = t;
431 fp_clamp(&b);
432 fp_zero(&c);
433 t2 = -1;
434 for (ix = 0; ix < 100; ++ix) {
435 t1 = TIMFUNC();
436 fp_invmod(&b, &a, &c);
437 fp_invmod(&b, &a, &c);
438 fp_invmod(&b, &a, &c);
439 fp_invmod(&b, &a, &c);
440 fp_invmod(&b, &a, &c);
441 fp_invmod(&b, &a, &c);
442 fp_invmod(&b, &a, &c);
443 fp_invmod(&b, &a, &c);
444 fp_invmod(&b, &a, &c);
445 fp_invmod(&b, &a, &c);
446 fp_invmod(&b, &a, &c);
447 fp_invmod(&b, &a, &c);
448 fp_invmod(&b, &a, &c);
449 fp_invmod(&b, &a, &c);
450 fp_invmod(&b, &a, &c);
451 fp_invmod(&b, &a, &c);
452 fp_invmod(&b, &a, &c);
453 fp_invmod(&b, &a, &c);
454 fp_invmod(&b, &a, &c);
455 fp_invmod(&b, &a, &c);
456 fp_invmod(&b, &a, &c);
457 fp_invmod(&b, &a, &c);
458 fp_invmod(&b, &a, &c);
459 fp_invmod(&b, &a, &c);
460 fp_invmod(&b, &a, &c);
461 fp_invmod(&b, &a, &c);
462 fp_invmod(&b, &a, &c);
463 fp_invmod(&b, &a, &c);
464 fp_invmod(&b, &a, &c);
465 fp_invmod(&b, &a, &c);
466 fp_invmod(&b, &a, &c);
467 fp_invmod(&b, &a, &c);
468 fp_invmod(&b, &a, &c);
469 fp_invmod(&b, &a, &c);
470 fp_invmod(&b, &a, &c);
471 fp_invmod(&b, &a, &c);
472 fp_invmod(&b, &a, &c);
473 fp_invmod(&b, &a, &c);
474 fp_invmod(&b, &a, &c);
475 fp_invmod(&b, &a, &c);
476 fp_invmod(&b, &a, &c);
477 fp_invmod(&b, &a, &c);
478 fp_invmod(&b, &a, &c);
479 fp_invmod(&b, &a, &c);
480 fp_invmod(&b, &a, &c);
481 fp_invmod(&b, &a, &c);
482 fp_invmod(&b, &a, &c);
483 fp_invmod(&b, &a, &c);
484 fp_invmod(&b, &a, &c);
485 fp_invmod(&b, &a, &c);
486 fp_invmod(&b, &a, &c);
487 fp_invmod(&b, &a, &c);
488 fp_invmod(&b, &a, &c);
489 fp_invmod(&b, &a, &c);
490 fp_invmod(&b, &a, &c);
491 fp_invmod(&b, &a, &c);
492 fp_invmod(&b, &a, &c);
493 fp_invmod(&b, &a, &c);
494 fp_invmod(&b, &a, &c);
495 fp_invmod(&b, &a, &c);
496 fp_invmod(&b, &a, &c);
497 fp_invmod(&b, &a, &c);
498 fp_invmod(&b, &a, &c);
499 fp_invmod(&b, &a, &c);
500 t2 = (TIMFUNC() - t1)>>6;
501 if (t1<t2) { --ix; t2 = t1; }
502 }
503 printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
504 }
505 //#else
506 monttime:
507 printf("Montgomery:\n");
508 for (t = 2; t <= (FP_SIZE/2)-4; t += 2) {
509 // printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
510 fp_zero(&a);
511 for (ix = 0; ix < t; ix++) {
512 a.dp[ix] = ix | 1;
513 }
514 a.used = t;
515
516 fp_montgomery_setup(&a, &fp);
517 fp_sub_d(&a, 3, &b);
518 fp_sqr(&b, &b);
519 fp_copy(&b, &c);
520 fp_copy(&b, &d);
521
522 t2 = -1;
523 for (ix = 0; ix < 100; ++ix) {
524 t1 = TIMFUNC();
525 fp_montgomery_reduce(&c, &a, &fp);
526 fp_montgomery_reduce(&d, &a, &fp);
527 fp_montgomery_reduce(&c, &a, &fp);
528 fp_montgomery_reduce(&d, &a, &fp);
529 fp_montgomery_reduce(&c, &a, &fp);
530 fp_montgomery_reduce(&d, &a, &fp);
531 fp_montgomery_reduce(&c, &a, &fp);
532 fp_montgomery_reduce(&d, &a, &fp);
533 fp_montgomery_reduce(&c, &a, &fp);
534 fp_montgomery_reduce(&d, &a, &fp);
535 fp_montgomery_reduce(&c, &a, &fp);
536 fp_montgomery_reduce(&d, &a, &fp);
537 fp_montgomery_reduce(&c, &a, &fp);
538 fp_montgomery_reduce(&d, &a, &fp);
539 fp_montgomery_reduce(&c, &a, &fp);
540 fp_montgomery_reduce(&d, &a, &fp);
541 fp_montgomery_reduce(&c, &a, &fp);
542 fp_montgomery_reduce(&d, &a, &fp);
543 fp_montgomery_reduce(&c, &a, &fp);
544 fp_montgomery_reduce(&d, &a, &fp);
545 fp_montgomery_reduce(&c, &a, &fp);
546 fp_montgomery_reduce(&d, &a, &fp);
547 fp_montgomery_reduce(&c, &a, &fp);
548 fp_montgomery_reduce(&d, &a, &fp);
549 fp_montgomery_reduce(&c, &a, &fp);
550 fp_montgomery_reduce(&d, &a, &fp);
551 fp_montgomery_reduce(&c, &a, &fp);
552 fp_montgomery_reduce(&d, &a, &fp);
553 fp_montgomery_reduce(&c, &a, &fp);
554 fp_montgomery_reduce(&d, &a, &fp);
555 fp_montgomery_reduce(&c, &a, &fp);
556 fp_montgomery_reduce(&d, &a, &fp);
557 fp_montgomery_reduce(&c, &a, &fp);
558 fp_montgomery_reduce(&d, &a, &fp);
559 fp_montgomery_reduce(&c, &a, &fp);
560 fp_montgomery_reduce(&d, &a, &fp);
561 fp_montgomery_reduce(&c, &a, &fp);
562 fp_montgomery_reduce(&d, &a, &fp);
563 fp_montgomery_reduce(&c, &a, &fp);
564 fp_montgomery_reduce(&d, &a, &fp);
565 fp_montgomery_reduce(&c, &a, &fp);
566 fp_montgomery_reduce(&d, &a, &fp);
567 fp_montgomery_reduce(&c, &a, &fp);
568 fp_montgomery_reduce(&d, &a, &fp);
569 fp_montgomery_reduce(&c, &a, &fp);
570 fp_montgomery_reduce(&d, &a, &fp);
571 fp_montgomery_reduce(&c, &a, &fp);
572 fp_montgomery_reduce(&d, &a, &fp);
573 fp_montgomery_reduce(&c, &a, &fp);
574 fp_montgomery_reduce(&d, &a, &fp);
575 fp_montgomery_reduce(&c, &a, &fp);
576 fp_montgomery_reduce(&d, &a, &fp);
577 fp_montgomery_reduce(&c, &a, &fp);
578 fp_montgomery_reduce(&d, &a, &fp);
579 fp_montgomery_reduce(&c, &a, &fp);
580 fp_montgomery_reduce(&d, &a, &fp);
581 fp_montgomery_reduce(&c, &a, &fp);
582 fp_montgomery_reduce(&d, &a, &fp);
583 fp_montgomery_reduce(&c, &a, &fp);
584 fp_montgomery_reduce(&d, &a, &fp);
585 fp_montgomery_reduce(&c, &a, &fp);
586 fp_montgomery_reduce(&d, &a, &fp);
587 fp_montgomery_reduce(&c, &a, &fp);
588 fp_montgomery_reduce(&d, &a, &fp);
589 t2 = (TIMFUNC() - t1)>>6;
590 fp_copy(&b, &c);
591 fp_copy(&b, &d);
592 if (t1<t2) { --ix; t2 = t1; }
593 }
594 printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
595 }
596 //#else
597 expttime:
598 printf("Exptmod:\n");
599
600 for (t = 512/DIGIT_BIT; t <= (FP_SIZE/2)-2; t += 256/DIGIT_BIT) {
601 fp_zero(&a);
602 fp_zero(&b);
603 fp_zero(&c);
604 for (ix = 0; ix < t; ix++) {
605 a.dp[ix] = ix+1;
606 b.dp[ix] = (fp_digit)rand() * (fp_digit)rand();
607 c.dp[ix] = ix;
608 }
609 a.used = t;
610 b.used = t;
611 c.used = t;
612
613 t2 = -1;
614 for (ix = 0; ix < 500; ++ix) {
615 t1 = TIMFUNC();
616 fp_exptmod(&c, &b, &a, &d);
617 fp_exptmod(&c, &b, &a, &d);
618 t2 = (TIMFUNC() - t1)>>1;
619 fp_copy(&b, &c);
620 fp_copy(&b, &d);
621 if (t1<t2) { t2 = t1; --ix; }
622 }
623 printf("%5lu-bit: %9llu\n", t * DIGIT_BIT, t2);
624 }
625 return 0;
626 #endif
627
628 return 0;
629 testing:
630
631 fp_zero(&b); fp_zero(&c); fp_zero(&d); fp_zero(&e); fp_zero(&f); fp_zero(&a);
632
633
634 div2_n = mul2_n = inv_n = expt_n = lcm_n = gcd_n = add_n =
635 sub_n = mul_n = div_n = sqr_n = mul2d_n = div2d_n = cnt = add_d_n = sub_d_n= mul_d_n = 0;
636
637 for (;;) {
638 printf("%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu/%4lu ", add_n, sub_n, mul_n, div_n, sqr_n, mul2d_n, div2d_n, gcd_n, lcm_n, expt_n, inv_n, div2_n, mul2_n, add_d_n, sub_d_n, mul_d_n);
639 fgets(cmd, 4095, stdin);
640 cmd[strlen(cmd)-1] = 0;
641 printf("%s ]\r",cmd); fflush(stdout);
642 if (!strcmp(cmd, "mul2d")) { ++mul2d_n;
643 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
644 fgets(buf, 4095, stdin); sscanf(buf, "%lu", &rr);
645 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
646
647 fp_mul_2d(&a, rr, &a);
648 a.sign = b.sign;
649 if (fp_cmp(&a, &b) != FP_EQ) {
650 printf("mul2d failed, rr == %lu\n",rr);
651 draw(&a);
652 draw(&b);
653 return 0;
654 }
655 } else if (!strcmp(cmd, "div2d")) { ++div2d_n;
656 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
657 fgets(buf, 4095, stdin); sscanf(buf, "%lu", &rr);
658 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
659
660 fp_div_2d(&a, rr, &a, &e);
661 a.sign = b.sign;
662 if (a.used == b.used && a.used == 0) { a.sign = b.sign = FP_ZPOS; }
663 if (fp_cmp(&a, &b) != FP_EQ) {
664 printf("div2d failed, rr == %lu\n",rr);
665 draw(&a);
666 draw(&b);
667 return 0;
668 }
669 } else if (!strcmp(cmd, "add")) { ++add_n;
670 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
671 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
672 fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
673 fp_copy(&a, &d);
674 fp_add(&d, &b, &d);
675 if (fp_cmp(&c, &d) != FP_EQ) {
676 printf("add %lu failure!\n", add_n);
677 draw(&a);draw(&b);draw(&c);draw(&d);
678 return 0;
679 }
680
681 /* test the sign/unsigned storage functions */
682
683 rr = fp_signed_bin_size(&c);
684 fp_to_signed_bin(&c, (unsigned char *)cmd);
685 memset(cmd+rr, rand()&255, sizeof(cmd)-rr);
686 fp_read_signed_bin(&d, (unsigned char *)cmd, rr);
687 if (fp_cmp(&c, &d) != FP_EQ) {
688 printf("fp_signed_bin failure!\n");
689 draw(&c);
690 draw(&d);
691 return 0;
692 }
693
694 rr = fp_unsigned_bin_size(&c);
695 fp_to_unsigned_bin(&c, (unsigned char *)cmd);
696 memset(cmd+rr, rand()&255, sizeof(cmd)-rr);
697 fp_read_unsigned_bin(&d, (unsigned char *)cmd, rr);
698 if (fp_cmp_mag(&c, &d) != FP_EQ) {
699 printf("fp_unsigned_bin failure!\n");
700 draw(&c);
701 draw(&d);
702 return 0;
703 }
704
705 } else if (!strcmp(cmd, "sub")) { ++sub_n;
706 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
707 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
708 fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
709 fp_copy(&a, &d);
710 fp_sub(&d, &b, &d);
711 if (fp_cmp(&c, &d) != FP_EQ) {
712 printf("sub %lu failure!\n", sub_n);
713 draw(&a);draw(&b);draw(&c);draw(&d);
714 return 0;
715 }
716 } else if (!strcmp(cmd, "mul")) {
717 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
718 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
719 fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
720 //continue;
721 fp_copy(&a, &d);
722 fp_mul(&d, &b, &d); ++mul_n;
723 if (fp_cmp(&c, &d) != FP_EQ) {
724 printf("mul %lu failure!\n", mul_n);
725 draw(&a);draw(&b);draw(&c);draw(&d);
726 return 0;
727 }
728 } else if (!strcmp(cmd, "div")) {
729 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
730 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
731 fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
732 fgets(buf, 4095, stdin); fp_read_radix(&d, buf, 64);
733 // continue;
734 fp_div(&a, &b, &e, &f); ++div_n;
735 if (fp_cmp(&c, &e) != FP_EQ || fp_cmp(&d, &f) != FP_EQ) {
736 printf("div %lu failure!\n", div_n);
737 draw(&a);draw(&b);draw(&c);draw(&d); draw(&e); draw(&f);
738 return 0;
739 }
740
741 } else if (!strcmp(cmd, "sqr")) {
742 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
743 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
744 // continue;
745 fp_copy(&a, &c);
746 fp_sqr(&c, &c); ++sqr_n;
747 if (fp_cmp(&b, &c) != FP_EQ) {
748 printf("sqr %lu failure!\n", sqr_n);
749 draw(&a);draw(&b);draw(&c);
750 return 0;
751 }
752 } else if (!strcmp(cmd, "gcd")) {
753 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
754 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
755 fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
756 // continue;
757 fp_copy(&a, &d);
758 fp_gcd(&d, &b, &d); ++gcd_n;
759 d.sign = c.sign;
760 if (fp_cmp(&c, &d) != FP_EQ) {
761 printf("gcd %lu failure!\n", gcd_n);
762 draw(&a);draw(&b);draw(&c);draw(&d);
763 return 0;
764 }
765 } else if (!strcmp(cmd, "lcm")) {
766 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
767 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
768 fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
769 //continue;
770 fp_copy(&a, &d);
771 fp_lcm(&d, &b, &d); ++lcm_n;
772 d.sign = c.sign;
773 if (fp_cmp(&c, &d) != FP_EQ) {
774 printf("lcm %lu failure!\n", lcm_n);
775 draw(&a);draw(&b);draw(&c);draw(&d);
776 return 0;
777 }
778 } else if (!strcmp(cmd, "expt")) {
779 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
780 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
781 fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
782 fgets(buf, 4095, stdin); fp_read_radix(&d, buf, 64);
783 // continue;
784 fp_copy(&a, &e);
785 fp_exptmod(&e, &b, &c, &e); ++expt_n;
786 if (fp_cmp(&d, &e) != FP_EQ) {
787 printf("expt %lu failure!\n", expt_n);
788 draw(&a);draw(&b);draw(&c);draw(&d); draw(&e);
789 return 0;
790 }
791 } else if (!strcmp(cmd, "invmod")) {
792 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
793 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
794 fgets(buf, 4095, stdin); fp_read_radix(&c, buf, 64);
795 //continue;
796 fp_invmod(&a, &b, &d);
797 #if 1
798 fp_mulmod(&d,&a,&b,&e); ++inv_n;
799 if (fp_cmp_d(&e, 1) != FP_EQ) {
800 #else
801 if (fp_cmp(&d, &c) != FP_EQ) {
802 #endif
803 printf("inv [wrong value from MPI?!] failure\n");
804 draw(&a);draw(&b);draw(&c);draw(&d);
805 return 0;
806 }
807
808 } else if (!strcmp(cmd, "div2")) { ++div2_n;
809 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
810 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
811 fp_div_2(&a, &c);
812 if (fp_cmp(&c, &b) != FP_EQ) {
813 printf("div_2 %lu failure\n", div2_n);
814 draw(&a);
815 draw(&b);
816 draw(&c);
817 return 0;
818 }
819 } else if (!strcmp(cmd, "mul2")) { ++mul2_n;
820 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
821 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
822 fp_mul_2(&a, &c);
823 if (fp_cmp(&c, &b) != FP_EQ) {
824 printf("mul_2 %lu failure\n", mul2_n);
825 draw(&a);
826 draw(&b);
827 draw(&c);
828 return 0;
829 }
830 } else if (!strcmp(cmd, "add_d")) { ++add_d_n;
831 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
832 fgets(buf, 4095, stdin); sscanf(buf, "%lu", &ix);
833 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
834 fp_add_d(&a, ix, &c);
835 if (fp_cmp(&b, &c) != FP_EQ) {
836 printf("add_d %lu failure\n", add_d_n);
837 draw(&a);
838 draw(&b);
839 draw(&c);
840 printf("d == %lu\n", ix);
841 return 0;
842 }
843 } else if (!strcmp(cmd, "sub_d")) { ++sub_d_n;
844 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
845 fgets(buf, 4095, stdin); sscanf(buf, "%lu", &ix);
846 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
847 fp_sub_d(&a, ix, &c);
848 if (fp_cmp(&b, &c) != FP_EQ) {
849 printf("sub_d %lu failure\n", sub_d_n);
850 draw(&a);
851 draw(&b);
852 draw(&c);
853 printf("d == %lu\n", ix);
854 return 0;
855 }
856 } else if (!strcmp(cmd, "mul_d")) { ++mul_d_n;
857 fgets(buf, 4095, stdin); fp_read_radix(&a, buf, 64);
858 fgets(buf, 4095, stdin); sscanf(buf, "%lu", &ix);
859 fgets(buf, 4095, stdin); fp_read_radix(&b, buf, 64);
860 fp_mul_d(&a, ix, &c);
861 if (fp_cmp(&b, &c) != FP_EQ) {
862 printf("mul_d %lu failure\n", sub_d_n);
863 draw(&a);
864 draw(&b);
865 draw(&c);
866 printf("d == %lu\n", ix);
867 return 0;
868 }
869 }
870
871 }
872 }
873
874
875
876 /* $Source$ */
877 /* $Revision$ */
878 /* $Date$ */