dropbear: bn_fast_s_mp_mul

annotate bn_fast_s_mp_mul_digs.c @ 2:86e0b50a9b58 libtommath-orig ltm-0.30-orig

ltm 0.30 orig import

author	Matt Johnston <matt@ucc.asn.au>
date	Mon, 31 May 2004 18:25:22 +0000
parents
children	d29b64170cf0

rev	line source
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	1 /* LibTomMath, multiple-precision integer library -- Tom St Denis
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	2 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	3 * LibTomMath is a library that provides multiple-precision
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	4 * integer arithmetic as well as number theoretic functionality.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	5 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	6 * The library was designed directly after the MPI library by
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	7 * Michael Fromberger but has been written from scratch with
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	8 * additional optimizations in place.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	9 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	10 * The library is free for all purposes without any express
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	11 * guarantee it works.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	12 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	13 * Tom St Denis, [email protected], http://math.libtomcrypt.org
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	14 */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	15 #include <tommath.h>
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	16
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	17 /* Fast (comba) multiplier
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	18 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	19 * This is the fast column-array [comba] multiplier. It is
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	20 * designed to compute the columns of the product first
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	21 * then handle the carries afterwards. This has the effect
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	22 * of making the nested loops that compute the columns very
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	23 * simple and schedulable on super-scalar processors.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	24 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	25 * This has been modified to produce a variable number of
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	26 * digits of output so if say only a half-product is required
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	27 * you don't have to compute the upper half (a feature
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	28 * required for fast Barrett reduction).
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	29 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	30 * Based on Algorithm 14.12 on pp.595 of HAC.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	31 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	32 */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	33 int
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	34 fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	35 {
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	36 int olduse, res, pa, ix;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	37 mp_word W[MP_WARRAY];
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	38
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	39 /* grow the destination as required */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	40 if (c->alloc < digs) {
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	41 if ((res = mp_grow (c, digs)) != MP_OKAY) {
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	42 return res;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	43 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	44 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	45
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	46 /* clear temp buf (the columns) */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	47 memset (W, 0, sizeof (mp_word) * digs);
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	48
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	49 /* calculate the columns */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	50 pa = a->used;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	51 for (ix = 0; ix < pa; ix++) {
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	52 /* this multiplier has been modified to allow you to
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	53 * control how many digits of output are produced.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	54 * So at most we want to make upto "digs" digits of output.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	55 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	56 * this adds products to distinct columns (at ix+iy) of W
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	57 * note that each step through the loop is not dependent on
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	58 * the previous which means the compiler can easily unroll
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	59 * the loop without scheduling problems
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	60 */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	61 {
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	62 register mp_digit tmpx, *tmpy;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	63 register mp_word *_W;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	64 register int iy, pb;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	65
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	66 /* alias for the the word on the left e.g. A[ix] * A[iy] */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	67 tmpx = a->dp[ix];
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	68
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	69 /* alias for the right side */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	70 tmpy = b->dp;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	71
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	72 /* alias for the columns, each step through the loop adds a new
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	73 term to each column
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	74 */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	75 _W = W + ix;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	76
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	77 /* the number of digits is limited by their placement. E.g.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	78 we avoid multiplying digits that will end up above the # of
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	79 digits of precision requested
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	80 */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	81 pb = MIN (b->used, digs - ix);
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	82
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	83 for (iy = 0; iy < pb; iy++) {
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	84 _W++ += ((mp_word)tmpx) ((mp_word)*tmpy++);
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	85 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	86 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	87
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	88 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	89
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	90 /* setup dest */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	91 olduse = c->used;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	92 c->used = digs;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	93
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	94 {
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	95 register mp_digit *tmpc;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	96
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	97 /* At this point W[] contains the sums of each column. To get the
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	98 * correct result we must take the extra bits from each column and
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	99 * carry them down
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	100 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	101 * Note that while this adds extra code to the multiplier it
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	102 * saves time since the carry propagation is removed from the
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	103 * above nested loop.This has the effect of reducing the work
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	104 * from N(N+Nc)==N*2 + cN2 to N2 + N*c where c is the
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	105 * cost of the shifting. On very small numbers this is slower
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	106 * but on most cryptographic size numbers it is faster.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	107 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	108 * In this particular implementation we feed the carries from
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	109 * behind which means when the loop terminates we still have one
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	110 * last digit to copy
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	111 */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	112 tmpc = c->dp;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	113 for (ix = 1; ix < digs; ix++) {
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	114 /* forward the carry from the previous temp */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	115 W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT));
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	116
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	117 /* now extract the previous digit [below the carry] */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	118 *tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK));
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	119 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	120 /* fetch the last digit */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	121 *tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK));
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	122
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	123 /* clear unused digits [that existed in the old copy of c] */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	124 for (; ix < olduse; ix++) {
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	125 *tmpc++ = 0;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	126 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	127 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	128 mp_clamp (c);
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	129 return MP_OKAY;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	130 }

Mercurial > dropbear

annotate bn_fast_s_mp_mul_digs.c @ 2:86e0b50a9b58 libtommath-orig ltm-0.30-orig