282
|
1 /* Tune the Karatsuba parameters |
|
2 * |
386
|
3 * Tom St Denis, [email protected] |
282
|
4 */ |
|
5 #include <tommath.h> |
|
6 #include <time.h> |
|
7 |
|
8 /* how many times todo each size mult. Depends on your computer. For slow computers |
|
9 * this can be low like 5 or 10. For fast [re: Athlon] should be 25 - 50 or so |
|
10 */ |
|
11 #define TIMES (1UL<<14UL) |
|
12 |
|
13 /* RDTSC from Scott Duplichan */ |
|
14 static ulong64 TIMFUNC (void) |
|
15 { |
|
16 #if defined __GNUC__ |
|
17 #if defined(__i386__) || defined(__x86_64__) |
|
18 unsigned long long a; |
|
19 __asm__ __volatile__ ("rdtsc\nmovl %%eax,%0\nmovl %%edx,4+%0\n"::"m"(a):"%eax","%edx"); |
|
20 return a; |
|
21 #else /* gcc-IA64 version */ |
|
22 unsigned long result; |
|
23 __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); |
|
24 while (__builtin_expect ((int) result == -1, 0)) |
|
25 __asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory"); |
|
26 return result; |
|
27 #endif |
|
28 |
|
29 // Microsoft and Intel Windows compilers |
|
30 #elif defined _M_IX86 |
|
31 __asm rdtsc |
|
32 #elif defined _M_AMD64 |
|
33 return __rdtsc (); |
|
34 #elif defined _M_IA64 |
|
35 #if defined __INTEL_COMPILER |
|
36 #include <ia64intrin.h> |
|
37 #endif |
|
38 return __getReg (3116); |
|
39 #else |
|
40 #error need rdtsc function for this build |
|
41 #endif |
|
42 } |
|
43 |
|
44 |
|
45 #ifndef X86_TIMER |
|
46 |
|
47 /* generic ISO C timer */ |
|
48 ulong64 LBL_T; |
|
49 void t_start(void) { LBL_T = TIMFUNC(); } |
|
50 ulong64 t_read(void) { return TIMFUNC() - LBL_T; } |
|
51 |
|
52 #else |
|
53 extern void t_start(void); |
|
54 extern ulong64 t_read(void); |
|
55 #endif |
|
56 |
|
57 ulong64 time_mult(int size, int s) |
|
58 { |
|
59 unsigned long x; |
|
60 mp_int a, b, c; |
|
61 ulong64 t1; |
|
62 |
|
63 mp_init (&a); |
|
64 mp_init (&b); |
|
65 mp_init (&c); |
|
66 |
|
67 mp_rand (&a, size); |
|
68 mp_rand (&b, size); |
|
69 |
|
70 if (s == 1) { |
|
71 KARATSUBA_MUL_CUTOFF = size; |
|
72 } else { |
|
73 KARATSUBA_MUL_CUTOFF = 100000; |
|
74 } |
|
75 |
|
76 t_start(); |
|
77 for (x = 0; x < TIMES; x++) { |
|
78 mp_mul(&a,&b,&c); |
|
79 } |
|
80 t1 = t_read(); |
|
81 mp_clear (&a); |
|
82 mp_clear (&b); |
|
83 mp_clear (&c); |
|
84 return t1; |
|
85 } |
|
86 |
|
87 ulong64 time_sqr(int size, int s) |
|
88 { |
|
89 unsigned long x; |
|
90 mp_int a, b; |
|
91 ulong64 t1; |
|
92 |
|
93 mp_init (&a); |
|
94 mp_init (&b); |
|
95 |
|
96 mp_rand (&a, size); |
|
97 |
|
98 if (s == 1) { |
|
99 KARATSUBA_SQR_CUTOFF = size; |
|
100 } else { |
|
101 KARATSUBA_SQR_CUTOFF = 100000; |
|
102 } |
|
103 |
|
104 t_start(); |
|
105 for (x = 0; x < TIMES; x++) { |
|
106 mp_sqr(&a,&b); |
|
107 } |
|
108 t1 = t_read(); |
|
109 mp_clear (&a); |
|
110 mp_clear (&b); |
|
111 return t1; |
|
112 } |
|
113 |
|
114 int |
|
115 main (void) |
|
116 { |
|
117 ulong64 t1, t2; |
|
118 int x, y; |
|
119 |
|
120 for (x = 8; ; x += 2) { |
|
121 t1 = time_mult(x, 0); |
|
122 t2 = time_mult(x, 1); |
|
123 printf("%d: %9llu %9llu, %9llu\n", x, t1, t2, t2 - t1); |
|
124 if (t2 < t1) break; |
|
125 } |
|
126 y = x; |
|
127 |
|
128 for (x = 8; ; x += 2) { |
|
129 t1 = time_sqr(x, 0); |
|
130 t2 = time_sqr(x, 1); |
|
131 printf("%d: %9llu %9llu, %9llu\n", x, t1, t2, t2 - t1); |
|
132 if (t2 < t1) break; |
|
133 } |
|
134 printf("KARATSUBA_MUL_CUTOFF = %d\n", y); |
|
135 printf("KARATSUBA_SQR_CUTOFF = %d\n", x); |
|
136 |
|
137 return 0; |
|
138 } |
386
|
139 |
|
140 /* $Source: /cvs/libtom/libtommath/etc/tune.c,v $ */ |
|
141 /* $Revision: 1.3 $ */ |
|
142 /* $Date: 2006/03/31 14:18:47 $ */ |