Mercurial > dropbear
comparison bn_fast_s_mp_mul_digs.c @ 142:d29b64170cf0 libtommath-orig
import of libtommath 0.32
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Sun, 19 Dec 2004 11:33:56 +0000 |
parents | 86e0b50a9b58 |
children | d8254fc979e9 |
comparison
equal
deleted
inserted
replaced
19:e1037a1e12e7 | 142:d29b64170cf0 |
---|---|
1 #include <tommath.h> | |
2 #ifdef BN_FAST_S_MP_MUL_DIGS_C | |
1 /* LibTomMath, multiple-precision integer library -- Tom St Denis | 3 /* LibTomMath, multiple-precision integer library -- Tom St Denis |
2 * | 4 * |
3 * LibTomMath is a library that provides multiple-precision | 5 * LibTomMath is a library that provides multiple-precision |
4 * integer arithmetic as well as number theoretic functionality. | 6 * integer arithmetic as well as number theoretic functionality. |
5 * | 7 * |
10 * The library is free for all purposes without any express | 12 * The library is free for all purposes without any express |
11 * guarantee it works. | 13 * guarantee it works. |
12 * | 14 * |
13 * Tom St Denis, [email protected], http://math.libtomcrypt.org | 15 * Tom St Denis, [email protected], http://math.libtomcrypt.org |
14 */ | 16 */ |
15 #include <tommath.h> | |
16 | 17 |
17 /* Fast (comba) multiplier | 18 /* Fast (comba) multiplier |
18 * | 19 * |
19 * This is the fast column-array [comba] multiplier. It is | 20 * This is the fast column-array [comba] multiplier. It is |
20 * designed to compute the columns of the product first | 21 * designed to compute the columns of the product first |
31 * | 32 * |
32 */ | 33 */ |
33 int | 34 int |
34 fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) | 35 fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) |
35 { | 36 { |
36 int olduse, res, pa, ix; | 37 int olduse, res, pa, ix, iz; |
37 mp_word W[MP_WARRAY]; | 38 mp_digit W[MP_WARRAY]; |
39 register mp_word _W; | |
38 | 40 |
39 /* grow the destination as required */ | 41 /* grow the destination as required */ |
40 if (c->alloc < digs) { | 42 if (c->alloc < digs) { |
41 if ((res = mp_grow (c, digs)) != MP_OKAY) { | 43 if ((res = mp_grow (c, digs)) != MP_OKAY) { |
42 return res; | 44 return res; |
43 } | 45 } |
44 } | 46 } |
45 | 47 |
46 /* clear temp buf (the columns) */ | 48 /* number of output digits to produce */ |
47 memset (W, 0, sizeof (mp_word) * digs); | 49 pa = MIN(digs, a->used + b->used); |
48 | 50 |
49 /* calculate the columns */ | 51 /* clear the carry */ |
50 pa = a->used; | 52 _W = 0; |
51 for (ix = 0; ix < pa; ix++) { | 53 for (ix = 0; ix <= pa; ix++) { |
52 /* this multiplier has been modified to allow you to | 54 int tx, ty; |
53 * control how many digits of output are produced. | 55 int iy; |
54 * So at most we want to make upto "digs" digits of output. | 56 mp_digit *tmpx, *tmpy; |
55 * | |
56 * this adds products to distinct columns (at ix+iy) of W | |
57 * note that each step through the loop is not dependent on | |
58 * the previous which means the compiler can easily unroll | |
59 * the loop without scheduling problems | |
60 */ | |
61 { | |
62 register mp_digit tmpx, *tmpy; | |
63 register mp_word *_W; | |
64 register int iy, pb; | |
65 | 57 |
66 /* alias for the the word on the left e.g. A[ix] * A[iy] */ | 58 /* get offsets into the two bignums */ |
67 tmpx = a->dp[ix]; | 59 ty = MIN(b->used-1, ix); |
60 tx = ix - ty; | |
68 | 61 |
69 /* alias for the right side */ | 62 /* setup temp aliases */ |
70 tmpy = b->dp; | 63 tmpx = a->dp + tx; |
64 tmpy = b->dp + ty; | |
71 | 65 |
72 /* alias for the columns, each step through the loop adds a new | 66 /* this is the number of times the loop will iterrate, essentially its |
73 term to each column | 67 while (tx++ < a->used && ty-- >= 0) { ... } |
74 */ | 68 */ |
75 _W = W + ix; | 69 iy = MIN(a->used-tx, ty+1); |
76 | 70 |
77 /* the number of digits is limited by their placement. E.g. | 71 /* execute loop */ |
78 we avoid multiplying digits that will end up above the # of | 72 for (iz = 0; iz < iy; ++iz) { |
79 digits of precision requested | 73 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); |
80 */ | 74 } |
81 pb = MIN (b->used, digs - ix); | |
82 | 75 |
83 for (iy = 0; iy < pb; iy++) { | 76 /* store term */ |
84 *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++); | 77 W[ix] = ((mp_digit)_W) & MP_MASK; |
85 } | |
86 } | |
87 | 78 |
79 /* make next carry */ | |
80 _W = _W >> ((mp_word)DIGIT_BIT); | |
88 } | 81 } |
89 | 82 |
90 /* setup dest */ | 83 /* setup dest */ |
91 olduse = c->used; | 84 olduse = c->used; |
92 c->used = digs; | 85 c->used = digs; |
93 | 86 |
94 { | 87 { |
95 register mp_digit *tmpc; | 88 register mp_digit *tmpc; |
96 | |
97 /* At this point W[] contains the sums of each column. To get the | |
98 * correct result we must take the extra bits from each column and | |
99 * carry them down | |
100 * | |
101 * Note that while this adds extra code to the multiplier it | |
102 * saves time since the carry propagation is removed from the | |
103 * above nested loop.This has the effect of reducing the work | |
104 * from N*(N+N*c)==N**2 + c*N**2 to N**2 + N*c where c is the | |
105 * cost of the shifting. On very small numbers this is slower | |
106 * but on most cryptographic size numbers it is faster. | |
107 * | |
108 * In this particular implementation we feed the carries from | |
109 * behind which means when the loop terminates we still have one | |
110 * last digit to copy | |
111 */ | |
112 tmpc = c->dp; | 89 tmpc = c->dp; |
113 for (ix = 1; ix < digs; ix++) { | 90 for (ix = 0; ix < digs; ix++) { |
114 /* forward the carry from the previous temp */ | |
115 W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT)); | |
116 | |
117 /* now extract the previous digit [below the carry] */ | 91 /* now extract the previous digit [below the carry] */ |
118 *tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK)); | 92 *tmpc++ = W[ix]; |
119 } | 93 } |
120 /* fetch the last digit */ | |
121 *tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK)); | |
122 | 94 |
123 /* clear unused digits [that existed in the old copy of c] */ | 95 /* clear unused digits [that existed in the old copy of c] */ |
124 for (; ix < olduse; ix++) { | 96 for (; ix < olduse; ix++) { |
125 *tmpc++ = 0; | 97 *tmpc++ = 0; |
126 } | 98 } |
127 } | 99 } |
128 mp_clamp (c); | 100 mp_clamp (c); |
129 return MP_OKAY; | 101 return MP_OKAY; |
130 } | 102 } |
103 #endif |