comparison tomsfastmath/src/sqr/fp_sqr_comba.c @ 643:a362b62d38b2 dropbear-tfm

Add tomsfastmath from git rev bfa4582842bc3bab42e4be4aed5703437049502a with Makefile.in renamed
author Matt Johnston <matt@ucc.asn.au>
date Wed, 23 Nov 2011 18:10:20 +0700
parents
children f10335e5e42f
comparison
equal deleted inserted replaced
642:33fd2f3499d2 643:a362b62d38b2
1 /*
2 *
3 * This project is meant to fill in where LibTomMath
4 * falls short. That is speed ;-)
5 *
6 * This project is public domain and free for all purposes.
7 *
8 * Tom St Denis, [email protected]
9 */
10 #include <tfm.h>
11
12 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
13 #undef TFM_SSE2
14 #define TFM_X86
15 #endif
16
17 #if defined(TFM_X86)
18
19 /* x86-32 optimized */
20
21 #define COMBA_START
22
23 #define CLEAR_CARRY \
24 c0 = c1 = c2 = 0;
25
26 #define COMBA_STORE(x) \
27 x = c0;
28
29 #define COMBA_STORE2(x) \
30 x = c1;
31
32 #define CARRY_FORWARD \
33 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
34
35 #define COMBA_FINI
36
37 #define SQRADD(i, j) \
38 asm( \
39 "movl %6,%%eax \n\t" \
40 "mull %%eax \n\t" \
41 "addl %%eax,%0 \n\t" \
42 "adcl %%edx,%1 \n\t" \
43 "adcl $0,%2 \n\t" \
44 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
45
46 #define SQRADD2(i, j) \
47 asm( \
48 "movl %6,%%eax \n\t" \
49 "mull %7 \n\t" \
50 "addl %%eax,%0 \n\t" \
51 "adcl %%edx,%1 \n\t" \
52 "adcl $0,%2 \n\t" \
53 "addl %%eax,%0 \n\t" \
54 "adcl %%edx,%1 \n\t" \
55 "adcl $0,%2 \n\t" \
56 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
57
58 #define SQRADDSC(i, j) \
59 asm( \
60 "movl %6,%%eax \n\t" \
61 "mull %7 \n\t" \
62 "movl %%eax,%0 \n\t" \
63 "movl %%edx,%1 \n\t" \
64 "xorl %2,%2 \n\t" \
65 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
66
67 #define SQRADDAC(i, j) \
68 asm( \
69 "movl %6,%%eax \n\t" \
70 "mull %7 \n\t" \
71 "addl %%eax,%0 \n\t" \
72 "adcl %%edx,%1 \n\t" \
73 "adcl $0,%2 \n\t" \
74 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
75
76 #define SQRADDDB \
77 asm( \
78 "addl %6,%0 \n\t" \
79 "adcl %7,%1 \n\t" \
80 "adcl %8,%2 \n\t" \
81 "addl %6,%0 \n\t" \
82 "adcl %7,%1 \n\t" \
83 "adcl %8,%2 \n\t" \
84 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
85
86 #elif defined(TFM_X86_64)
87 /* x86-64 optimized */
88
89 #define COMBA_START
90
91 #define CLEAR_CARRY \
92 c0 = c1 = c2 = 0;
93
94 #define COMBA_STORE(x) \
95 x = c0;
96
97 #define COMBA_STORE2(x) \
98 x = c1;
99
100 #define CARRY_FORWARD \
101 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
102
103 #define COMBA_FINI
104
105 #define SQRADD(i, j) \
106 asm( \
107 "movq %6,%%rax \n\t" \
108 "mulq %%rax \n\t" \
109 "addq %%rax,%0 \n\t" \
110 "adcq %%rdx,%1 \n\t" \
111 "adcq $0,%2 \n\t" \
112 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc");
113
114 #define SQRADD2(i, j) \
115 asm( \
116 "movq %6,%%rax \n\t" \
117 "mulq %7 \n\t" \
118 "addq %%rax,%0 \n\t" \
119 "adcq %%rdx,%1 \n\t" \
120 "adcq $0,%2 \n\t" \
121 "addq %%rax,%0 \n\t" \
122 "adcq %%rdx,%1 \n\t" \
123 "adcq $0,%2 \n\t" \
124 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
125
126 #define SQRADDSC(i, j) \
127 asm( \
128 "movq %6,%%rax \n\t" \
129 "mulq %7 \n\t" \
130 "movq %%rax,%0 \n\t" \
131 "movq %%rdx,%1 \n\t" \
132 "xorq %2,%2 \n\t" \
133 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
134
135 #define SQRADDAC(i, j) \
136 asm( \
137 "movq %6,%%rax \n\t" \
138 "mulq %7 \n\t" \
139 "addq %%rax,%0 \n\t" \
140 "adcq %%rdx,%1 \n\t" \
141 "adcq $0,%2 \n\t" \
142 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
143
144 #define SQRADDDB \
145 asm( \
146 "addq %6,%0 \n\t" \
147 "adcq %7,%1 \n\t" \
148 "adcq %8,%2 \n\t" \
149 "addq %6,%0 \n\t" \
150 "adcq %7,%1 \n\t" \
151 "adcq %8,%2 \n\t" \
152 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
153
154 #elif defined(TFM_SSE2)
155
156 /* SSE2 Optimized */
157 #define COMBA_START
158
159 #define CLEAR_CARRY \
160 c0 = c1 = c2 = 0;
161
162 #define COMBA_STORE(x) \
163 x = c0;
164
165 #define COMBA_STORE2(x) \
166 x = c1;
167
168 #define CARRY_FORWARD \
169 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
170
171 #define COMBA_FINI \
172 asm("emms");
173
174 #define SQRADD(i, j) \
175 asm( \
176 "movd %6,%%mm0 \n\t" \
177 "pmuludq %%mm0,%%mm0\n\t" \
178 "movd %%mm0,%%eax \n\t" \
179 "psrlq $32,%%mm0 \n\t" \
180 "addl %%eax,%0 \n\t" \
181 "movd %%mm0,%%eax \n\t" \
182 "adcl %%eax,%1 \n\t" \
183 "adcl $0,%2 \n\t" \
184 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
185
186 #define SQRADD2(i, j) \
187 asm( \
188 "movd %6,%%mm0 \n\t" \
189 "movd %7,%%mm1 \n\t" \
190 "pmuludq %%mm1,%%mm0\n\t" \
191 "movd %%mm0,%%eax \n\t" \
192 "psrlq $32,%%mm0 \n\t" \
193 "movd %%mm0,%%edx \n\t" \
194 "addl %%eax,%0 \n\t" \
195 "adcl %%edx,%1 \n\t" \
196 "adcl $0,%2 \n\t" \
197 "addl %%eax,%0 \n\t" \
198 "adcl %%edx,%1 \n\t" \
199 "adcl $0,%2 \n\t" \
200 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
201
202 #define SQRADDSC(i, j) \
203 asm( \
204 "movd %6,%%mm0 \n\t" \
205 "movd %7,%%mm1 \n\t" \
206 "pmuludq %%mm1,%%mm0\n\t" \
207 "movd %%mm0,%0 \n\t" \
208 "psrlq $32,%%mm0 \n\t" \
209 "movd %%mm0,%1 \n\t" \
210 "xorl %2,%2 \n\t" \
211 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j));
212
213 #define SQRADDAC(i, j) \
214 asm( \
215 "movd %6,%%mm0 \n\t" \
216 "movd %7,%%mm1 \n\t" \
217 "pmuludq %%mm1,%%mm0\n\t" \
218 "movd %%mm0,%%eax \n\t" \
219 "psrlq $32,%%mm0 \n\t" \
220 "movd %%mm0,%%edx \n\t" \
221 "addl %%eax,%0 \n\t" \
222 "adcl %%edx,%1 \n\t" \
223 "adcl $0,%2 \n\t" \
224 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc");
225
226 #define SQRADDDB \
227 asm( \
228 "addl %6,%0 \n\t" \
229 "adcl %7,%1 \n\t" \
230 "adcl %8,%2 \n\t" \
231 "addl %6,%0 \n\t" \
232 "adcl %7,%1 \n\t" \
233 "adcl %8,%2 \n\t" \
234 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
235
236 #elif defined(TFM_ARM)
237
238 /* ARM code */
239
240 #define COMBA_START
241
242 #define CLEAR_CARRY \
243 c0 = c1 = c2 = 0;
244
245 #define COMBA_STORE(x) \
246 x = c0;
247
248 #define COMBA_STORE2(x) \
249 x = c1;
250
251 #define CARRY_FORWARD \
252 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
253
254 #define COMBA_FINI
255
256 /* multiplies point i and j, updates carry "c1" and digit c2 */
257 #define SQRADD(i, j) \
258 asm( \
259 " UMULL r0,r1,%6,%6 \n\t" \
260 " ADDS %0,%0,r0 \n\t" \
261 " ADCS %1,%1,r1 \n\t" \
262 " ADC %2,%2,#0 \n\t" \
263 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc");
264
265 /* for squaring some of the terms are doubled... */
266 #define SQRADD2(i, j) \
267 asm( \
268 " UMULL r0,r1,%6,%7 \n\t" \
269 " ADDS %0,%0,r0 \n\t" \
270 " ADCS %1,%1,r1 \n\t" \
271 " ADC %2,%2,#0 \n\t" \
272 " ADDS %0,%0,r0 \n\t" \
273 " ADCS %1,%1,r1 \n\t" \
274 " ADC %2,%2,#0 \n\t" \
275 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
276
277 #define SQRADDSC(i, j) \
278 asm( \
279 " UMULL %0,%1,%6,%7 \n\t" \
280 " SUB %2,%2,%2 \n\t" \
281 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc");
282
283 #define SQRADDAC(i, j) \
284 asm( \
285 " UMULL r0,r1,%6,%7 \n\t" \
286 " ADDS %0,%0,r0 \n\t" \
287 " ADCS %1,%1,r1 \n\t" \
288 " ADC %2,%2,#0 \n\t" \
289 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc");
290
291 #define SQRADDDB \
292 asm( \
293 " ADDS %0,%0,%3 \n\t" \
294 " ADCS %1,%1,%4 \n\t" \
295 " ADC %2,%2,%5 \n\t" \
296 " ADDS %0,%0,%3 \n\t" \
297 " ADCS %1,%1,%4 \n\t" \
298 " ADC %2,%2,%5 \n\t" \
299 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
300
301 #elif defined(TFM_PPC32)
302
303 /* PPC32 */
304
305 #define COMBA_START
306
307 #define CLEAR_CARRY \
308 c0 = c1 = c2 = 0;
309
310 #define COMBA_STORE(x) \
311 x = c0;
312
313 #define COMBA_STORE2(x) \
314 x = c1;
315
316 #define CARRY_FORWARD \
317 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
318
319 #define COMBA_FINI
320
321 /* multiplies point i and j, updates carry "c1" and digit c2 */
322 #define SQRADD(i, j) \
323 asm( \
324 " mullw 16,%6,%6 \n\t" \
325 " addc %0,%0,16 \n\t" \
326 " mulhwu 16,%6,%6 \n\t" \
327 " adde %1,%1,16 \n\t" \
328 " addze %2,%2 \n\t" \
329 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
330
331 /* for squaring some of the terms are doubled... */
332 #define SQRADD2(i, j) \
333 asm( \
334 " mullw 16,%6,%7 \n\t" \
335 " mulhwu 17,%6,%7 \n\t" \
336 " addc %0,%0,16 \n\t" \
337 " adde %1,%1,17 \n\t" \
338 " addze %2,%2 \n\t" \
339 " addc %0,%0,16 \n\t" \
340 " adde %1,%1,17 \n\t" \
341 " addze %2,%2 \n\t" \
342 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
343
344 #define SQRADDSC(i, j) \
345 asm( \
346 " mullw %0,%6,%7 \n\t" \
347 " mulhwu %1,%6,%7 \n\t" \
348 " xor %2,%2,%2 \n\t" \
349 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
350
351 #define SQRADDAC(i, j) \
352 asm( \
353 " mullw 16,%6,%7 \n\t" \
354 " addc %0,%0,16 \n\t" \
355 " mulhwu 16,%6,%7 \n\t" \
356 " adde %1,%1,16 \n\t" \
357 " addze %2,%2 \n\t" \
358 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
359
360 #define SQRADDDB \
361 asm( \
362 " addc %0,%0,%3 \n\t" \
363 " adde %1,%1,%4 \n\t" \
364 " adde %2,%2,%5 \n\t" \
365 " addc %0,%0,%3 \n\t" \
366 " adde %1,%1,%4 \n\t" \
367 " adde %2,%2,%5 \n\t" \
368 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
369
370 #elif defined(TFM_PPC64)
371 /* PPC64 */
372
373 #define COMBA_START
374
375 #define CLEAR_CARRY \
376 c0 = c1 = c2 = 0;
377
378 #define COMBA_STORE(x) \
379 x = c0;
380
381 #define COMBA_STORE2(x) \
382 x = c1;
383
384 #define CARRY_FORWARD \
385 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
386
387 #define COMBA_FINI
388
389 /* multiplies point i and j, updates carry "c1" and digit c2 */
390 #define SQRADD(i, j) \
391 asm( \
392 " mulld r16,%6,%6 \n\t" \
393 " addc %0,%0,r16 \n\t" \
394 " mulhdu r16,%6,%6 \n\t" \
395 " adde %1,%1,r16 \n\t" \
396 " addze %2,%2 \n\t" \
397 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc");
398
399 /* for squaring some of the terms are doubled... */
400 #define SQRADD2(i, j) \
401 asm( \
402 " mulld r16,%6,%7 \n\t" \
403 " mulhdu r17,%6,%7 \n\t" \
404 " addc %0,%0,r16 \n\t" \
405 " adde %1,%1,r17 \n\t" \
406 " addze %2,%2 \n\t" \
407 " addc %0,%0,r16 \n\t" \
408 " adde %1,%1,r17 \n\t" \
409 " addze %2,%2 \n\t" \
410 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc");
411
412 #define SQRADDSC(i, j) \
413 asm( \
414 " mulld %0,%6,%7 \n\t" \
415 " mulhdu %1,%6,%7 \n\t" \
416 " xor %2,%2,%2 \n\t" \
417 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
418
419 #define SQRADDAC(i, j) \
420 asm( \
421 " mulld r16,%6,%7 \n\t" \
422 " addc %0,%0,r16 \n\t" \
423 " mulhdu r16,%6,%7 \n\t" \
424 " adde %1,%1,r16 \n\t" \
425 " addze %2,%2 \n\t" \
426 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc");
427
428 #define SQRADDDB \
429 asm( \
430 " addc %0,%0,%3 \n\t" \
431 " adde %1,%1,%4 \n\t" \
432 " adde %2,%2,%5 \n\t" \
433 " addc %0,%0,%3 \n\t" \
434 " adde %1,%1,%4 \n\t" \
435 " adde %2,%2,%5 \n\t" \
436 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
437
438
439 #elif defined(TFM_AVR32)
440
441 /* AVR32 */
442
443 #define COMBA_START
444
445 #define CLEAR_CARRY \
446 c0 = c1 = c2 = 0;
447
448 #define COMBA_STORE(x) \
449 x = c0;
450
451 #define COMBA_STORE2(x) \
452 x = c1;
453
454 #define CARRY_FORWARD \
455 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
456
457 #define COMBA_FINI
458
459 /* multiplies point i and j, updates carry "c1" and digit c2 */
460 #define SQRADD(i, j) \
461 asm( \
462 " mulu.d r2,%6,%6 \n\t" \
463 " add %0,%0,r2 \n\t" \
464 " adc %1,%1,r3 \n\t" \
465 " acr %2 \n\t" \
466 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
467
468 /* for squaring some of the terms are doubled... */
469 #define SQRADD2(i, j) \
470 asm( \
471 " mulu.d r2,%6,%7 \n\t" \
472 " add %0,%0,r2 \n\t" \
473 " adc %1,%1,r3 \n\t" \
474 " acr %2, \n\t" \
475 " add %0,%0,r2 \n\t" \
476 " adc %1,%1,r3 \n\t" \
477 " acr %2, \n\t" \
478 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
479
480 #define SQRADDSC(i, j) \
481 asm( \
482 " mulu.d r2,%6,%7 \n\t" \
483 " mov %0,r2 \n\t" \
484 " mov %1,r3 \n\t" \
485 " eor %2,%2 \n\t" \
486 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
487
488 #define SQRADDAC(i, j) \
489 asm( \
490 " mulu.d r2,%6,%7 \n\t" \
491 " add %0,%0,r2 \n\t" \
492 " adc %1,%1,r3 \n\t" \
493 " acr %2 \n\t" \
494 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
495
496 #define SQRADDDB \
497 asm( \
498 " add %0,%0,%3 \n\t" \
499 " adc %1,%1,%4 \n\t" \
500 " adc %2,%2,%5 \n\t" \
501 " add %0,%0,%3 \n\t" \
502 " adc %1,%1,%4 \n\t" \
503 " adc %2,%2,%5 \n\t" \
504 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
505
506 #elif defined(TFM_MIPS)
507
508 /* MIPS */
509
510 #define COMBA_START
511
512 #define CLEAR_CARRY \
513 c0 = c1 = c2 = 0;
514
515 #define COMBA_STORE(x) \
516 x = c0;
517
518 #define COMBA_STORE2(x) \
519 x = c1;
520
521 #define CARRY_FORWARD \
522 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
523
524 #define COMBA_FINI
525
526 /* multiplies point i and j, updates carry "c1" and digit c2 */
527 #define SQRADD(i, j) \
528 asm( \
529 " multu %6,%6 \n\t" \
530 " mflo $12 \n\t" \
531 " mfhi $13 \n\t" \
532 " addu %0,%0,$12 \n\t" \
533 " sltu $12,%0,$12 \n\t" \
534 " addu %1,%1,$13 \n\t" \
535 " sltu $13,%1,$13 \n\t" \
536 " addu %1,%1,$12 \n\t" \
537 " sltu $12,%1,$12 \n\t" \
538 " addu %2,%2,$13 \n\t" \
539 " addu %2,%2,$12 \n\t" \
540 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
541
542 /* for squaring some of the terms are doubled... */
543 #define SQRADD2(i, j) \
544 asm( \
545 " multu %6,%7 \n\t" \
546 " mflo $12 \n\t" \
547 " mfhi $13 \n\t" \
548 \
549 " addu %0,%0,$12 \n\t" \
550 " sltu $14,%0,$12 \n\t" \
551 " addu %1,%1,$13 \n\t" \
552 " sltu $15,%1,$13 \n\t" \
553 " addu %1,%1,$14 \n\t" \
554 " sltu $14,%1,$14 \n\t" \
555 " addu %2,%2,$15 \n\t" \
556 " addu %2,%2,$14 \n\t" \
557 \
558 " addu %0,%0,$12 \n\t" \
559 " sltu $14,%0,$12 \n\t" \
560 " addu %1,%1,$13 \n\t" \
561 " sltu $15,%1,$13 \n\t" \
562 " addu %1,%1,$14 \n\t" \
563 " sltu $14,%1,$14 \n\t" \
564 " addu %2,%2,$15 \n\t" \
565 " addu %2,%2,$14 \n\t" \
566 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
567
568 #define SQRADDSC(i, j) \
569 asm( \
570 " multu %6,%7 \n\t" \
571 " mflo %0 \n\t" \
572 " mfhi %1 \n\t" \
573 " xor %2,%2,%2 \n\t" \
574 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
575
576 #define SQRADDAC(i, j) \
577 asm( \
578 " multu %6,%7 \n\t" \
579 " mflo $12 \n\t" \
580 " mfhi $13 \n\t" \
581 " addu %0,%0,$12 \n\t" \
582 " sltu $12,%0,$12 \n\t" \
583 " addu %1,%1,$13 \n\t" \
584 " sltu $13,%1,$13 \n\t" \
585 " addu %1,%1,$12 \n\t" \
586 " sltu $12,%1,$12 \n\t" \
587 " addu %2,%2,$13 \n\t" \
588 " addu %2,%2,$12 \n\t" \
589 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
590
591 #define SQRADDDB \
592 asm( \
593 " addu %0,%0,%3 \n\t" \
594 " sltu $10,%0,%3 \n\t" \
595 " addu %1,%1,$10 \n\t" \
596 " sltu $10,%1,$10 \n\t" \
597 " addu %1,%1,%4 \n\t" \
598 " sltu $11,%1,%4 \n\t" \
599 " addu %2,%2,$10 \n\t" \
600 " addu %2,%2,$11 \n\t" \
601 " addu %2,%2,%5 \n\t" \
602 \
603 " addu %0,%0,%3 \n\t" \
604 " sltu $10,%0,%3 \n\t" \
605 " addu %1,%1,$10 \n\t" \
606 " sltu $10,%1,$10 \n\t" \
607 " addu %1,%1,%4 \n\t" \
608 " sltu $11,%1,%4 \n\t" \
609 " addu %2,%2,$10 \n\t" \
610 " addu %2,%2,$11 \n\t" \
611 " addu %2,%2,%5 \n\t" \
612 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
613
614 #else
615
616 #define TFM_ISO
617
618 /* ISO C portable code */
619
620 #define COMBA_START
621
622 #define CLEAR_CARRY \
623 c0 = c1 = c2 = 0;
624
625 #define COMBA_STORE(x) \
626 x = c0;
627
628 #define COMBA_STORE2(x) \
629 x = c1;
630
631 #define CARRY_FORWARD \
632 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
633
634 #define COMBA_FINI
635
636 /* multiplies point i and j, updates carry "c1" and digit c2 */
637 #define SQRADD(i, j) \
638 do { fp_word t; \
639 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \
640 t = c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \
641 } while (0);
642
643
644 /* for squaring some of the terms are doubled... */
645 #define SQRADD2(i, j) \
646 do { fp_word t; \
647 t = ((fp_word)i) * ((fp_word)j); \
648 tt = (fp_word)c0 + t; c0 = tt; \
649 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \
650 tt = (fp_word)c0 + t; c0 = tt; \
651 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \
652 } while (0);
653
654 #define SQRADDSC(i, j) \
655 do { fp_word t; \
656 t = ((fp_word)i) * ((fp_word)j); \
657 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \
658 } while (0);
659
660 #define SQRADDAC(i, j) \
661 do { fp_word t; \
662 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \
663 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \
664 } while (0);
665
666 #define SQRADDDB \
667 do { fp_word t; \
668 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \
669 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \
670 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \
671 } while (0);
672
673 #endif
674
675 /* $Source$ */
676 /* $Revision$ */
677 /* $Date$ */