282
|
1 #include <tommath.h> |
|
2 #ifdef BN_FAST_S_MP_MUL_DIGS_C |
|
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis |
|
4 * |
|
5 * LibTomMath is a library that provides multiple-precision |
|
6 * integer arithmetic as well as number theoretic functionality. |
|
7 * |
|
8 * The library was designed directly after the MPI library by |
|
9 * Michael Fromberger but has been written from scratch with |
|
10 * additional optimizations in place. |
|
11 * |
|
12 * The library is free for all purposes without any express |
|
13 * guarantee it works. |
|
14 * |
|
15 * Tom St Denis, [email protected], http://math.libtomcrypt.org |
|
16 */ |
|
17 |
|
18 /* Fast (comba) multiplier |
|
19 * |
|
20 * This is the fast column-array [comba] multiplier. It is |
|
21 * designed to compute the columns of the product first |
|
22 * then handle the carries afterwards. This has the effect |
|
23 * of making the nested loops that compute the columns very |
|
24 * simple and schedulable on super-scalar processors. |
|
25 * |
|
26 * This has been modified to produce a variable number of |
|
27 * digits of output so if say only a half-product is required |
|
28 * you don't have to compute the upper half (a feature |
|
29 * required for fast Barrett reduction). |
|
30 * |
|
31 * Based on Algorithm 14.12 on pp.595 of HAC. |
|
32 * |
|
33 */ |
|
34 int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) |
|
35 { |
|
36 int olduse, res, pa, ix, iz; |
|
37 mp_digit W[MP_WARRAY]; |
|
38 register mp_word _W; |
|
39 |
|
40 /* grow the destination as required */ |
|
41 if (c->alloc < digs) { |
|
42 if ((res = mp_grow (c, digs)) != MP_OKAY) { |
|
43 return res; |
|
44 } |
|
45 } |
|
46 |
|
47 /* number of output digits to produce */ |
|
48 pa = MIN(digs, a->used + b->used); |
|
49 |
|
50 /* clear the carry */ |
|
51 _W = 0; |
|
52 for (ix = 0; ix < pa; ix++) { |
|
53 int tx, ty; |
|
54 int iy; |
|
55 mp_digit *tmpx, *tmpy; |
|
56 |
|
57 /* get offsets into the two bignums */ |
|
58 ty = MIN(b->used-1, ix); |
|
59 tx = ix - ty; |
|
60 |
|
61 /* setup temp aliases */ |
|
62 tmpx = a->dp + tx; |
|
63 tmpy = b->dp + ty; |
|
64 |
|
65 /* this is the number of times the loop will iterrate, essentially |
|
66 while (tx++ < a->used && ty-- >= 0) { ... } |
|
67 */ |
|
68 iy = MIN(a->used-tx, ty+1); |
|
69 |
|
70 /* execute loop */ |
|
71 for (iz = 0; iz < iy; ++iz) { |
|
72 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); |
|
73 } |
|
74 |
|
75 /* store term */ |
|
76 W[ix] = ((mp_digit)_W) & MP_MASK; |
|
77 |
|
78 /* make next carry */ |
|
79 _W = _W >> ((mp_word)DIGIT_BIT); |
|
80 } |
|
81 |
|
82 /* store final carry */ |
|
83 W[ix] = (mp_digit)(_W & MP_MASK); |
|
84 |
|
85 /* setup dest */ |
|
86 olduse = c->used; |
|
87 c->used = pa; |
|
88 |
|
89 { |
|
90 register mp_digit *tmpc; |
|
91 tmpc = c->dp; |
|
92 for (ix = 0; ix < pa+1; ix++) { |
|
93 /* now extract the previous digit [below the carry] */ |
|
94 *tmpc++ = W[ix]; |
|
95 } |
|
96 |
|
97 /* clear unused digits [that existed in the old copy of c] */ |
|
98 for (; ix < olduse; ix++) { |
|
99 *tmpc++ = 0; |
|
100 } |
|
101 } |
|
102 mp_clamp (c); |
|
103 return MP_OKAY; |
|
104 } |
|
105 #endif |