Mercurial > dropbear
comparison tomsfastmath/src/mul/fp_mul_comba.c @ 643:a362b62d38b2 dropbear-tfm
Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a
with Makefile.in renamed
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Wed, 23 Nov 2011 18:10:20 +0700 |
parents | |
children | 8622ee48fab5 |
comparison
equal
deleted
inserted
replaced
642:33fd2f3499d2 | 643:a362b62d38b2 |
---|---|
1 /* TomsFastMath, a fast ISO C bignum library. | |
2 * | |
3 * This project is meant to fill in where LibTomMath | |
4 * falls short. That is speed ;-) | |
5 * | |
6 * This project is public domain and free for all purposes. | |
7 * | |
8 * Tom St Denis, [email protected] | |
9 */ | |
10 | |
11 /* About this file... | |
12 | |
13 */ | |
14 | |
15 #include <tfm.h> | |
16 | |
17 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2) | |
18 #undef TFM_SSE2 | |
19 #define TFM_X86 | |
20 #endif | |
21 | |
22 /* these are the combas. Worship them. */ | |
23 #if defined(TFM_X86) | |
24 /* Generic x86 optimized code */ | |
25 | |
26 /* anything you need at the start */ | |
27 #define COMBA_START | |
28 | |
29 /* clear the chaining variables */ | |
30 #define COMBA_CLEAR \ | |
31 c0 = c1 = c2 = 0; | |
32 | |
33 /* forward the carry to the next digit */ | |
34 #define COMBA_FORWARD \ | |
35 do { c0 = c1; c1 = c2; c2 = 0; } while (0); | |
36 | |
37 /* store the first sum */ | |
38 #define COMBA_STORE(x) \ | |
39 x = c0; | |
40 | |
41 /* store the second sum [carry] */ | |
42 #define COMBA_STORE2(x) \ | |
43 x = c1; | |
44 | |
45 /* anything you need at the end */ | |
46 #define COMBA_FINI | |
47 | |
48 /* this should multiply i and j */ | |
49 #define MULADD(i, j) \ | |
50 asm( \ | |
51 "movl %6,%%eax \n\t" \ | |
52 "mull %7 \n\t" \ | |
53 "addl %%eax,%0 \n\t" \ | |
54 "adcl %%edx,%1 \n\t" \ | |
55 "adcl $0,%2 \n\t" \ | |
56 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); | |
57 | |
58 #elif defined(TFM_X86_64) | |
59 /* x86-64 optimized */ | |
60 | |
61 /* anything you need at the start */ | |
62 #define COMBA_START | |
63 | |
64 /* clear the chaining variables */ | |
65 #define COMBA_CLEAR \ | |
66 c0 = c1 = c2 = 0; | |
67 | |
68 /* forward the carry to the next digit */ | |
69 #define COMBA_FORWARD \ | |
70 do { c0 = c1; c1 = c2; c2 = 0; } while (0); | |
71 | |
72 /* store the first sum */ | |
73 #define COMBA_STORE(x) \ | |
74 x = c0; | |
75 | |
76 /* store the second sum [carry] */ | |
77 #define COMBA_STORE2(x) \ | |
78 x = c1; | |
79 | |
80 /* anything you need at the end */ | |
81 #define COMBA_FINI | |
82 | |
83 /* this should multiply i and j */ | |
84 #define MULADD(i, j) \ | |
85 asm ( \ | |
86 "movq %6,%%rax \n\t" \ | |
87 "mulq %7 \n\t" \ | |
88 "addq %%rax,%0 \n\t" \ | |
89 "adcq %%rdx,%1 \n\t" \ | |
90 "adcq $0,%2 \n\t" \ | |
91 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); | |
92 | |
93 #elif defined(TFM_SSE2) | |
94 /* use SSE2 optimizations */ | |
95 | |
96 /* anything you need at the start */ | |
97 #define COMBA_START | |
98 | |
99 /* clear the chaining variables */ | |
100 #define COMBA_CLEAR \ | |
101 c0 = c1 = c2 = 0; | |
102 | |
103 /* forward the carry to the next digit */ | |
104 #define COMBA_FORWARD \ | |
105 do { c0 = c1; c1 = c2; c2 = 0; } while (0); | |
106 | |
107 /* store the first sum */ | |
108 #define COMBA_STORE(x) \ | |
109 x = c0; | |
110 | |
111 /* store the second sum [carry] */ | |
112 #define COMBA_STORE2(x) \ | |
113 x = c1; | |
114 | |
115 /* anything you need at the end */ | |
116 #define COMBA_FINI \ | |
117 asm("emms"); | |
118 | |
119 /* this should multiply i and j */ | |
120 #define MULADD(i, j) \ | |
121 asm( \ | |
122 "movd %6,%%mm0 \n\t" \ | |
123 "movd %7,%%mm1 \n\t" \ | |
124 "pmuludq %%mm1,%%mm0\n\t" \ | |
125 "movd %%mm0,%%eax \n\t" \ | |
126 "psrlq $32,%%mm0 \n\t" \ | |
127 "addl %%eax,%0 \n\t" \ | |
128 "movd %%mm0,%%eax \n\t" \ | |
129 "adcl %%eax,%1 \n\t" \ | |
130 "adcl $0,%2 \n\t" \ | |
131 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc"); | |
132 | |
133 #elif defined(TFM_ARM) | |
134 /* ARM code */ | |
135 | |
136 #define COMBA_START | |
137 | |
138 #define COMBA_CLEAR \ | |
139 c0 = c1 = c2 = 0; | |
140 | |
141 #define COMBA_FORWARD \ | |
142 do { c0 = c1; c1 = c2; c2 = 0; } while (0); | |
143 | |
144 #define COMBA_STORE(x) \ | |
145 x = c0; | |
146 | |
147 #define COMBA_STORE2(x) \ | |
148 x = c1; | |
149 | |
150 #define COMBA_FINI | |
151 | |
152 #define MULADD(i, j) \ | |
153 asm( \ | |
154 " UMULL r0,r1,%6,%7 \n\t" \ | |
155 " ADDS %0,%0,r0 \n\t" \ | |
156 " ADCS %1,%1,r1 \n\t" \ | |
157 " ADC %2,%2,#0 \n\t" \ | |
158 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); | |
159 | |
160 #elif defined(TFM_PPC32) | |
161 /* For 32-bit PPC */ | |
162 | |
163 #define COMBA_START | |
164 | |
165 #define COMBA_CLEAR \ | |
166 c0 = c1 = c2 = 0; | |
167 | |
168 #define COMBA_FORWARD \ | |
169 do { c0 = c1; c1 = c2; c2 = 0; } while (0); | |
170 | |
171 #define COMBA_STORE(x) \ | |
172 x = c0; | |
173 | |
174 #define COMBA_STORE2(x) \ | |
175 x = c1; | |
176 | |
177 #define COMBA_FINI | |
178 | |
179 /* untested: will mulhwu change the flags? Docs say no */ | |
180 #define MULADD(i, j) \ | |
181 asm( \ | |
182 " mullw 16,%6,%7 \n\t" \ | |
183 " addc %0,%0,16 \n\t" \ | |
184 " mulhwu 16,%6,%7 \n\t" \ | |
185 " adde %1,%1,16 \n\t" \ | |
186 " addze %2,%2 \n\t" \ | |
187 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); | |
188 | |
189 #elif defined(TFM_PPC64) | |
190 /* For 64-bit PPC */ | |
191 | |
192 #define COMBA_START | |
193 | |
194 #define COMBA_CLEAR \ | |
195 c0 = c1 = c2 = 0; | |
196 | |
197 #define COMBA_FORWARD \ | |
198 do { c0 = c1; c1 = c2; c2 = 0; } while (0); | |
199 | |
200 #define COMBA_STORE(x) \ | |
201 x = c0; | |
202 | |
203 #define COMBA_STORE2(x) \ | |
204 x = c1; | |
205 | |
206 #define COMBA_FINI | |
207 | |
208 /* untested: will mulhdu change the flags? Docs say no */ | |
209 #define MULADD(i, j) \ | |
210 asm( \ | |
211 " mulld r16,%6,%7 \n\t" \ | |
212 " addc %0,%0,16 \n\t" \ | |
213 " mulhdu r16,%6,%7 \n\t" \ | |
214 " adde %1,%1,16 \n\t" \ | |
215 " addze %2,%2 \n\t" \ | |
216 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16"); | |
217 | |
218 #elif defined(TFM_AVR32) | |
219 | |
220 /* ISO C code */ | |
221 | |
222 #define COMBA_START | |
223 | |
224 #define COMBA_CLEAR \ | |
225 c0 = c1 = c2 = 0; | |
226 | |
227 #define COMBA_FORWARD \ | |
228 do { c0 = c1; c1 = c2; c2 = 0; } while (0); | |
229 | |
230 #define COMBA_STORE(x) \ | |
231 x = c0; | |
232 | |
233 #define COMBA_STORE2(x) \ | |
234 x = c1; | |
235 | |
236 #define COMBA_FINI | |
237 | |
238 #define MULADD(i, j) \ | |
239 asm( \ | |
240 " mulu.d r2,%6,%7 \n\t"\ | |
241 " add %0,r2 \n\t"\ | |
242 " adc %1,%1,r3 \n\t"\ | |
243 " acr %2 \n\t"\ | |
244 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3"); | |
245 | |
246 #elif defined(TFM_MIPS) | |
247 | |
248 #define COMBA_START | |
249 | |
250 #define COMBA_CLEAR \ | |
251 c0 = c1 = c2 = 0; | |
252 | |
253 #define COMBA_FORWARD \ | |
254 do { c0 = c1; c1 = c2; c2 = 0; } while (0); | |
255 | |
256 #define COMBA_STORE(x) \ | |
257 x = c0; | |
258 | |
259 #define COMBA_STORE2(x) \ | |
260 x = c1; | |
261 | |
262 #define COMBA_FINI | |
263 | |
264 #define MULADD(i, j) \ | |
265 asm( \ | |
266 " multu %6,%7 \n\t" \ | |
267 " mflo $12 \n\t" \ | |
268 " mfhi $13 \n\t" \ | |
269 " addu %0,%0,$12 \n\t" \ | |
270 " sltu $12,%0,$12 \n\t" \ | |
271 " addu %1,%1,$13 \n\t" \ | |
272 " sltu $13,%1,$13 \n\t" \ | |
273 " addu %1,%1,$12 \n\t" \ | |
274 " sltu $12,%1,$12 \n\t" \ | |
275 " addu %2,%2,$13 \n\t" \ | |
276 " addu %2,%2,$12 \n\t" \ | |
277 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13"); | |
278 | |
279 #else | |
280 /* ISO C code */ | |
281 | |
282 #define COMBA_START | |
283 | |
284 #define COMBA_CLEAR \ | |
285 c0 = c1 = c2 = 0; | |
286 | |
287 #define COMBA_FORWARD \ | |
288 do { c0 = c1; c1 = c2; c2 = 0; } while (0); | |
289 | |
290 #define COMBA_STORE(x) \ | |
291 x = c0; | |
292 | |
293 #define COMBA_STORE2(x) \ | |
294 x = c1; | |
295 | |
296 #define COMBA_FINI | |
297 | |
298 #define MULADD(i, j) \ | |
299 do { fp_word t; \ | |
300 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ | |
301 t = (fp_word)c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ | |
302 } while (0); | |
303 | |
304 #endif | |
305 | |
306 #ifndef TFM_DEFINES | |
307 | |
308 /* generic PxQ multiplier */ | |
309 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) | |
310 { | |
311 int ix, iy, iz, tx, ty, pa; | |
312 fp_digit c0, c1, c2, *tmpx, *tmpy; | |
313 fp_int tmp, *dst; | |
314 | |
315 COMBA_START; | |
316 COMBA_CLEAR; | |
317 | |
318 /* get size of output and trim */ | |
319 pa = A->used + B->used; | |
320 if (pa >= FP_SIZE) { | |
321 pa = FP_SIZE-1; | |
322 } | |
323 | |
324 if (A == C || B == C) { | |
325 fp_zero(&tmp); | |
326 dst = &tmp; | |
327 } else { | |
328 fp_zero(C); | |
329 dst = C; | |
330 } | |
331 | |
332 for (ix = 0; ix < pa; ix++) { | |
333 /* get offsets into the two bignums */ | |
334 ty = MIN(ix, B->used-1); | |
335 tx = ix - ty; | |
336 | |
337 /* setup temp aliases */ | |
338 tmpx = A->dp + tx; | |
339 tmpy = B->dp + ty; | |
340 | |
341 /* this is the number of times the loop will iterrate, essentially its | |
342 while (tx++ < a->used && ty-- >= 0) { ... } | |
343 */ | |
344 iy = MIN(A->used-tx, ty+1); | |
345 | |
346 /* execute loop */ | |
347 COMBA_FORWARD; | |
348 for (iz = 0; iz < iy; ++iz) { | |
349 MULADD(*tmpx++, *tmpy--); | |
350 } | |
351 | |
352 /* store term */ | |
353 COMBA_STORE(dst->dp[ix]); | |
354 } | |
355 COMBA_FINI; | |
356 | |
357 dst->used = pa; | |
358 dst->sign = A->sign ^ B->sign; | |
359 fp_clamp(dst); | |
360 fp_copy(dst, C); | |
361 } | |
362 | |
363 #endif | |
364 | |
365 /* $Source$ */ | |
366 /* $Revision$ */ | |
367 /* $Date$ */ | |
368 |