Mercurial > dropbear
comparison tomsfastmath/src/sqr/fp_sqr_comba.c @ 646:f10335e5e42f dropbear-tfm
- More asm constraint fixes. Now seems to build OK on 32-bit OS X.
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Wed, 30 Nov 2011 23:03:47 +0800 |
parents | a362b62d38b2 |
children | 939cd3e22c87 |
comparison
equal
deleted
inserted
replaced
645:8622ee48fab5 | 646:f10335e5e42f |
---|---|
33 do { c0 = c1; c1 = c2; c2 = 0; } while (0); | 33 do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
34 | 34 |
35 #define COMBA_FINI | 35 #define COMBA_FINI |
36 | 36 |
37 #define SQRADD(i, j) \ | 37 #define SQRADD(i, j) \ |
38 do { fp_digit ti = (i); \ | |
38 asm( \ | 39 asm( \ |
39 "movl %6,%%eax \n\t" \ | 40 "movl %6,%%eax \n\t" \ |
40 "mull %%eax \n\t" \ | 41 "mull %%eax \n\t" \ |
41 "addl %%eax,%0 \n\t" \ | 42 "addl %%eax,%0 \n\t" \ |
42 "adcl %%edx,%1 \n\t" \ | 43 "adcl %%edx,%1 \n\t" \ |
43 "adcl $0,%2 \n\t" \ | 44 "adcl $0,%2 \n\t" \ |
44 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); | 45 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(ti) :"%eax","%edx","%cc"); } while (0); |
45 | 46 |
46 #define SQRADD2(i, j) \ | 47 #define SQRADD2(i, j) \ |
48 do { fp_digit ti = (i); \ | |
49 fp_digit tj = (j); \ | |
47 asm( \ | 50 asm( \ |
48 "movl %6,%%eax \n\t" \ | 51 "movl %6,%%eax \n\t" \ |
49 "mull %7 \n\t" \ | 52 "mull %7 \n\t" \ |
50 "addl %%eax,%0 \n\t" \ | 53 "addl %%eax,%0 \n\t" \ |
51 "adcl %%edx,%1 \n\t" \ | 54 "adcl %%edx,%1 \n\t" \ |
52 "adcl $0,%2 \n\t" \ | 55 "adcl $0,%2 \n\t" \ |
53 "addl %%eax,%0 \n\t" \ | 56 "addl %%eax,%0 \n\t" \ |
54 "adcl %%edx,%1 \n\t" \ | 57 "adcl %%edx,%1 \n\t" \ |
55 "adcl $0,%2 \n\t" \ | 58 "adcl $0,%2 \n\t" \ |
56 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); | 59 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(ti), "m"(tj) :"%eax","%edx","%cc"); } while (0); |
57 | 60 |
58 #define SQRADDSC(i, j) \ | 61 #define SQRADDSC(i, j) \ |
62 do { \ | |
63 asm( \ | |
64 "movl %2,%%eax \n\t" \ | |
65 "mull %3 \n\t" \ | |
66 "movl %%eax,%0 \n\t" \ | |
67 "movl %%edx,%1 \n\t" \ | |
68 :"=r"(sc0), "=r"(sc1): "g"(i), "g"(j) :"%eax","%edx","%cc"); \ | |
69 sc2 = 0; } while (0); | |
70 | |
71 #define SQRADDAC(i, j) \ | |
72 do { fp_digit ti = (i); \ | |
73 fp_digit tj = (j); \ | |
59 asm( \ | 74 asm( \ |
60 "movl %6,%%eax \n\t" \ | 75 "movl %6,%%eax \n\t" \ |
61 "mull %7 \n\t" \ | 76 "mull %7 \n\t" \ |
62 "movl %%eax,%0 \n\t" \ | |
63 "movl %%edx,%1 \n\t" \ | |
64 "xorl %2,%2 \n\t" \ | |
65 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); | |
66 | |
67 #define SQRADDAC(i, j) \ | |
68 asm( \ | |
69 "movl %6,%%eax \n\t" \ | |
70 "mull %7 \n\t" \ | |
71 "addl %%eax,%0 \n\t" \ | 77 "addl %%eax,%0 \n\t" \ |
72 "adcl %%edx,%1 \n\t" \ | 78 "adcl %%edx,%1 \n\t" \ |
73 "adcl $0,%2 \n\t" \ | 79 "adcl $0,%2 \n\t" \ |
74 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); | 80 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(ti), "m"(tj) :"%eax","%edx","%cc"); \ |
81 } while (0); | |
75 | 82 |
76 #define SQRADDDB \ | 83 #define SQRADDDB \ |
84 do { fp_digit tsc0 = (sc0); \ | |
85 fp_digit tsc1 = (sc1); \ | |
86 fp_digit tsc2 = (sc2); \ | |
77 asm( \ | 87 asm( \ |
78 "addl %6,%0 \n\t" \ | 88 "addl %6,%0 \n\t" \ |
79 "adcl %7,%1 \n\t" \ | 89 "adcl %7,%1 \n\t" \ |
80 "adcl %8,%2 \n\t" \ | 90 "adcl %8,%2 \n\t" \ |
81 "addl %6,%0 \n\t" \ | 91 "addl %6,%0 \n\t" \ |
82 "adcl %7,%1 \n\t" \ | 92 "adcl %7,%1 \n\t" \ |
83 "adcl %8,%2 \n\t" \ | 93 "adcl %8,%2 \n\t" \ |
84 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); | 94 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "m"(tsc0), "m"(tsc1), "m"(tsc2) : "%cc"); } while (0); |
85 | 95 |
86 #elif defined(TFM_X86_64) | 96 #elif defined(TFM_X86_64) |
87 /* x86-64 optimized */ | 97 /* x86-64 optimized */ |
88 | 98 |
89 #define COMBA_START | 99 #define COMBA_START |