annotate bn_fast_s_mp_mul_digs.c @ 2:86e0b50a9b58 libtommath-orig ltm-0.30-orig

ltm 0.30 orig import
author Matt Johnston <matt@ucc.asn.au>
date Mon, 31 May 2004 18:25:22 +0000
parents
children d29b64170cf0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
1 /* LibTomMath, multiple-precision integer library -- Tom St Denis
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
2 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
3 * LibTomMath is a library that provides multiple-precision
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
4 * integer arithmetic as well as number theoretic functionality.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
5 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
6 * The library was designed directly after the MPI library by
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
7 * Michael Fromberger but has been written from scratch with
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
8 * additional optimizations in place.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
9 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
10 * The library is free for all purposes without any express
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
11 * guarantee it works.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
12 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
13 * Tom St Denis, [email protected], http://math.libtomcrypt.org
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
14 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
15 #include <tommath.h>
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
16
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
17 /* Fast (comba) multiplier
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
18 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
19 * This is the fast column-array [comba] multiplier. It is
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
20 * designed to compute the columns of the product first
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
21 * then handle the carries afterwards. This has the effect
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
22 * of making the nested loops that compute the columns very
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
23 * simple and schedulable on super-scalar processors.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
24 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
25 * This has been modified to produce a variable number of
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
26 * digits of output so if say only a half-product is required
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
27 * you don't have to compute the upper half (a feature
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
28 * required for fast Barrett reduction).
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
29 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
30 * Based on Algorithm 14.12 on pp.595 of HAC.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
31 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
32 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
33 int
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
34 fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
35 {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
36 int olduse, res, pa, ix;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
37 mp_word W[MP_WARRAY];
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
38
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
39 /* grow the destination as required */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
40 if (c->alloc < digs) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
41 if ((res = mp_grow (c, digs)) != MP_OKAY) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
42 return res;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
43 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
44 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
45
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
46 /* clear temp buf (the columns) */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
47 memset (W, 0, sizeof (mp_word) * digs);
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
48
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
49 /* calculate the columns */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
50 pa = a->used;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
51 for (ix = 0; ix < pa; ix++) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
52 /* this multiplier has been modified to allow you to
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
53 * control how many digits of output are produced.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
54 * So at most we want to make upto "digs" digits of output.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
55 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
56 * this adds products to distinct columns (at ix+iy) of W
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
57 * note that each step through the loop is not dependent on
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
58 * the previous which means the compiler can easily unroll
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
59 * the loop without scheduling problems
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
60 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
61 {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
62 register mp_digit tmpx, *tmpy;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
63 register mp_word *_W;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
64 register int iy, pb;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
65
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
66 /* alias for the the word on the left e.g. A[ix] * A[iy] */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
67 tmpx = a->dp[ix];
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
68
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
69 /* alias for the right side */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
70 tmpy = b->dp;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
71
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
72 /* alias for the columns, each step through the loop adds a new
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
73 term to each column
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
74 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
75 _W = W + ix;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
76
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
77 /* the number of digits is limited by their placement. E.g.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
78 we avoid multiplying digits that will end up above the # of
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
79 digits of precision requested
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
80 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
81 pb = MIN (b->used, digs - ix);
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
82
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
83 for (iy = 0; iy < pb; iy++) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
84 *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++);
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
85 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
86 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
87
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
88 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
89
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
90 /* setup dest */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
91 olduse = c->used;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
92 c->used = digs;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
93
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
94 {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
95 register mp_digit *tmpc;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
96
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
97 /* At this point W[] contains the sums of each column. To get the
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
98 * correct result we must take the extra bits from each column and
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
99 * carry them down
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
100 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
101 * Note that while this adds extra code to the multiplier it
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
102 * saves time since the carry propagation is removed from the
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
103 * above nested loop.This has the effect of reducing the work
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
104 * from N*(N+N*c)==N**2 + c*N**2 to N**2 + N*c where c is the
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
105 * cost of the shifting. On very small numbers this is slower
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
106 * but on most cryptographic size numbers it is faster.
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
107 *
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
108 * In this particular implementation we feed the carries from
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
109 * behind which means when the loop terminates we still have one
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
110 * last digit to copy
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
111 */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
112 tmpc = c->dp;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
113 for (ix = 1; ix < digs; ix++) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
114 /* forward the carry from the previous temp */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
115 W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
116
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
117 /* now extract the previous digit [below the carry] */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
118 *tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
119 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
120 /* fetch the last digit */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
121 *tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK));
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
122
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
123 /* clear unused digits [that existed in the old copy of c] */
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
124 for (; ix < olduse; ix++) {
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
125 *tmpc++ = 0;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
126 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
127 }
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
128 mp_clamp (c);
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
129 return MP_OKAY;
86e0b50a9b58 ltm 0.30 orig import
Matt Johnston <matt@ucc.asn.au>
parents:
diff changeset
130 }