2
|
1 /* Tune the Karatsuba parameters |
|
2 * |
|
3 * Tom St Denis, [email protected] |
|
4 */ |
|
5 #include <tommath.h> |
|
6 #include <time.h> |
|
7 |
|
8 /* how many times todo each size mult. Depends on your computer. For slow computers |
|
9 * this can be low like 5 or 10. For fast [re: Athlon] should be 25 - 50 or so |
|
10 */ |
|
11 #define TIMES 50 |
|
12 |
|
13 |
|
14 #ifndef X86_TIMER |
|
15 |
|
16 /* generic ISO C timer */ |
|
17 ulong64 __T; |
|
18 void t_start(void) { __T = clock(); } |
|
19 ulong64 t_read(void) { return clock() - __T; } |
|
20 |
|
21 #else |
|
22 extern void t_start(void); |
|
23 extern ulong64 t_read(void); |
|
24 #endif |
|
25 |
|
26 ulong64 |
|
27 time_mult (int max) |
|
28 { |
|
29 int x, y; |
|
30 mp_int a, b, c; |
|
31 |
|
32 mp_init (&a); |
|
33 mp_init (&b); |
|
34 mp_init (&c); |
|
35 |
|
36 t_start(); |
|
37 for (x = 32; x <= max; x += 4) { |
|
38 mp_rand (&a, x); |
|
39 mp_rand (&b, x); |
|
40 for (y = 0; y < TIMES; y++) { |
|
41 mp_mul (&a, &b, &c); |
|
42 } |
|
43 } |
|
44 mp_clear (&a); |
|
45 mp_clear (&b); |
|
46 mp_clear (&c); |
|
47 return t_read(); |
|
48 } |
|
49 |
|
50 ulong64 |
|
51 time_sqr (int max) |
|
52 { |
|
53 int x, y; |
|
54 mp_int a, b; |
|
55 |
|
56 mp_init (&a); |
|
57 mp_init (&b); |
|
58 |
|
59 t_start(); |
|
60 for (x = 32; x <= max; x += 4) { |
|
61 mp_rand (&a, x); |
|
62 for (y = 0; y < TIMES; y++) { |
|
63 mp_sqr (&a, &b); |
|
64 } |
|
65 } |
|
66 mp_clear (&a); |
|
67 mp_clear (&b); |
|
68 return t_read(); |
|
69 } |
|
70 |
|
71 int |
|
72 main (void) |
|
73 { |
|
74 int best_kmult, best_tmult, best_ksquare, best_tsquare, counter; |
|
75 ulong64 best, ti; |
|
76 FILE *log; |
|
77 |
|
78 best_kmult = best_ksquare = best_tmult = best_tsquare = 0; |
|
79 /* tune multiplication first */ |
|
80 |
|
81 /* effectively turn TOOM off */ |
|
82 TOOM_SQR_CUTOFF = TOOM_MUL_CUTOFF = 100000; |
|
83 |
|
84 log = fopen ("mult.log", "w"); |
|
85 best = -1; |
|
86 counter = 16; |
|
87 for (KARATSUBA_MUL_CUTOFF = 8; KARATSUBA_MUL_CUTOFF <= 200; KARATSUBA_MUL_CUTOFF++) { |
|
88 ti = time_mult (300); |
|
89 printf ("%4d : %9llu \r", KARATSUBA_MUL_CUTOFF, ti); |
|
90 fprintf (log, "%d, %llu\n", KARATSUBA_MUL_CUTOFF, ti); |
|
91 fflush (stdout); |
|
92 if (ti < best) { |
|
93 printf ("New best: %llu, %d \r", ti, KARATSUBA_MUL_CUTOFF); |
|
94 best = ti; |
|
95 best_kmult = KARATSUBA_MUL_CUTOFF; |
|
96 counter = 16; |
|
97 } else if (--counter == 0) { |
|
98 printf("No better found in 16 trials.\n"); |
|
99 break; |
|
100 } |
|
101 } |
|
102 fclose (log); |
|
103 printf("Karatsuba Multiplier Cutoff (KARATSUBA_MUL_CUTOFF) == %d\n", best_kmult); |
|
104 |
|
105 /* tune squaring */ |
|
106 log = fopen ("sqr.log", "w"); |
|
107 best = -1; |
|
108 counter = 16; |
|
109 for (KARATSUBA_SQR_CUTOFF = 8; KARATSUBA_SQR_CUTOFF <= 200; KARATSUBA_SQR_CUTOFF++) { |
|
110 ti = time_sqr (300); |
|
111 printf ("%4d : %9llu \r", KARATSUBA_SQR_CUTOFF, ti); |
|
112 fprintf (log, "%d, %llu\n", KARATSUBA_SQR_CUTOFF, ti); |
|
113 fflush (stdout); |
|
114 if (ti < best) { |
|
115 printf ("New best: %llu, %d \r", ti, KARATSUBA_SQR_CUTOFF); |
|
116 best = ti; |
|
117 best_ksquare = KARATSUBA_SQR_CUTOFF; |
|
118 counter = 16; |
|
119 } else if (--counter == 0) { |
|
120 printf("No better found in 16 trials.\n"); |
|
121 break; |
|
122 } |
|
123 } |
|
124 fclose (log); |
|
125 printf("Karatsuba Squaring Cutoff (KARATSUBA_SQR_CUTOFF) == %d\n", best_ksquare); |
|
126 |
|
127 KARATSUBA_MUL_CUTOFF = best_kmult; |
|
128 KARATSUBA_SQR_CUTOFF = best_ksquare; |
|
129 |
|
130 /* tune TOOM mult */ |
|
131 counter = 16; |
|
132 log = fopen ("tmult.log", "w"); |
|
133 best = -1; |
|
134 for (TOOM_MUL_CUTOFF = best_kmult*5; TOOM_MUL_CUTOFF <= 800; TOOM_MUL_CUTOFF++) { |
|
135 ti = time_mult (1200); |
|
136 printf ("%4d : %9llu \r", TOOM_MUL_CUTOFF, ti); |
|
137 fprintf (log, "%d, %llu\n", TOOM_MUL_CUTOFF, ti); |
|
138 fflush (stdout); |
|
139 if (ti < best) { |
|
140 printf ("New best: %llu, %d \r", ti, TOOM_MUL_CUTOFF); |
|
141 best = ti; |
|
142 best_tmult = TOOM_MUL_CUTOFF; |
|
143 counter = 16; |
|
144 } else if (--counter == 0) { |
|
145 printf("No better found in 16 trials.\n"); |
|
146 break; |
|
147 } |
|
148 } |
|
149 fclose (log); |
|
150 printf("Toom-Cook Multiplier Cutoff (TOOM_MUL_CUTOFF) == %d\n", best_tmult); |
|
151 |
|
152 /* tune TOOM sqr */ |
|
153 log = fopen ("tsqr.log", "w"); |
|
154 best = -1; |
|
155 counter = 16; |
|
156 for (TOOM_SQR_CUTOFF = best_ksquare*3; TOOM_SQR_CUTOFF <= 800; TOOM_SQR_CUTOFF++) { |
|
157 ti = time_sqr (1200); |
|
158 printf ("%4d : %9llu \r", TOOM_SQR_CUTOFF, ti); |
|
159 fprintf (log, "%d, %llu\n", TOOM_SQR_CUTOFF, ti); |
|
160 fflush (stdout); |
|
161 if (ti < best) { |
|
162 printf ("New best: %llu, %d \r", ti, TOOM_SQR_CUTOFF); |
|
163 best = ti; |
|
164 best_tsquare = TOOM_SQR_CUTOFF; |
|
165 counter = 16; |
|
166 } else if (--counter == 0) { |
|
167 printf("No better found in 16 trials.\n"); |
|
168 break; |
|
169 } |
|
170 } |
|
171 fclose (log); |
|
172 printf("Toom-Cook Squaring Cutoff (TOOM_SQR_CUTOFF) == %d\n", best_tsquare); |
|
173 |
|
174 |
|
175 return 0; |
|
176 } |