Mercurial > dropbear
diff bn_fast_s_mp_mul_digs.c @ 142:d29b64170cf0 libtommath-orig
import of libtommath 0.32
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Sun, 19 Dec 2004 11:33:56 +0000 |
parents | 86e0b50a9b58 |
children | d8254fc979e9 |
line wrap: on
line diff
--- a/bn_fast_s_mp_mul_digs.c Tue Jun 15 14:42:57 2004 +0000 +++ b/bn_fast_s_mp_mul_digs.c Sun Dec 19 11:33:56 2004 +0000 @@ -1,3 +1,5 @@ +#include <tommath.h> +#ifdef BN_FAST_S_MP_MUL_DIGS_C /* LibTomMath, multiple-precision integer library -- Tom St Denis * * LibTomMath is a library that provides multiple-precision @@ -12,7 +14,6 @@ * * Tom St Denis, [email protected], http://math.libtomcrypt.org */ -#include <tommath.h> /* Fast (comba) multiplier * @@ -33,8 +34,9 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) { - int olduse, res, pa, ix; - mp_word W[MP_WARRAY]; + int olduse, res, pa, ix, iz; + mp_digit W[MP_WARRAY]; + register mp_word _W; /* grow the destination as required */ if (c->alloc < digs) { @@ -43,82 +45,52 @@ } } - /* clear temp buf (the columns) */ - memset (W, 0, sizeof (mp_word) * digs); + /* number of output digits to produce */ + pa = MIN(digs, a->used + b->used); - /* calculate the columns */ - pa = a->used; - for (ix = 0; ix < pa; ix++) { - /* this multiplier has been modified to allow you to - * control how many digits of output are produced. - * So at most we want to make upto "digs" digits of output. - * - * this adds products to distinct columns (at ix+iy) of W - * note that each step through the loop is not dependent on - * the previous which means the compiler can easily unroll - * the loop without scheduling problems - */ - { - register mp_digit tmpx, *tmpy; - register mp_word *_W; - register int iy, pb; + /* clear the carry */ + _W = 0; + for (ix = 0; ix <= pa; ix++) { + int tx, ty; + int iy; + mp_digit *tmpx, *tmpy; + + /* get offsets into the two bignums */ + ty = MIN(b->used-1, ix); + tx = ix - ty; - /* alias for the the word on the left e.g. A[ix] * A[iy] */ - tmpx = a->dp[ix]; + /* setup temp aliases */ + tmpx = a->dp + tx; + tmpy = b->dp + ty; - /* alias for the right side */ - tmpy = b->dp; - - /* alias for the columns, each step through the loop adds a new - term to each column + /* this is the number of times the loop will iterrate, essentially its + while (tx++ < a->used && ty-- >= 0) { ... } */ - _W = W + ix; + iy = MIN(a->used-tx, ty+1); - /* the number of digits is limited by their placement. E.g. - we avoid multiplying digits that will end up above the # of - digits of precision requested - */ - pb = MIN (b->used, digs - ix); + /* execute loop */ + for (iz = 0; iz < iy; ++iz) { + _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); + } - for (iy = 0; iy < pb; iy++) { - *_W++ += ((mp_word)tmpx) * ((mp_word)*tmpy++); - } - } + /* store term */ + W[ix] = ((mp_digit)_W) & MP_MASK; + /* make next carry */ + _W = _W >> ((mp_word)DIGIT_BIT); } /* setup dest */ - olduse = c->used; + olduse = c->used; c->used = digs; { register mp_digit *tmpc; - - /* At this point W[] contains the sums of each column. To get the - * correct result we must take the extra bits from each column and - * carry them down - * - * Note that while this adds extra code to the multiplier it - * saves time since the carry propagation is removed from the - * above nested loop.This has the effect of reducing the work - * from N*(N+N*c)==N**2 + c*N**2 to N**2 + N*c where c is the - * cost of the shifting. On very small numbers this is slower - * but on most cryptographic size numbers it is faster. - * - * In this particular implementation we feed the carries from - * behind which means when the loop terminates we still have one - * last digit to copy - */ tmpc = c->dp; - for (ix = 1; ix < digs; ix++) { - /* forward the carry from the previous temp */ - W[ix] += (W[ix - 1] >> ((mp_word) DIGIT_BIT)); - + for (ix = 0; ix < digs; ix++) { /* now extract the previous digit [below the carry] */ - *tmpc++ = (mp_digit) (W[ix - 1] & ((mp_word) MP_MASK)); + *tmpc++ = W[ix]; } - /* fetch the last digit */ - *tmpc++ = (mp_digit) (W[digs - 1] & ((mp_word) MP_MASK)); /* clear unused digits [that existed in the old copy of c] */ for (; ix < olduse; ix++) { @@ -128,3 +100,4 @@ mp_clamp (c); return MP_OKAY; } +#endif