Mercurial > dropbear
diff bn_fast_s_mp_sqr.c @ 190:d8254fc979e9 libtommath-orig LTM_0.35
Initial import of libtommath 0.35
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Fri, 06 May 2005 08:59:30 +0000 |
parents | d29b64170cf0 |
children |
line wrap: on
line diff
--- a/bn_fast_s_mp_sqr.c Sun Dec 19 11:33:56 2004 +0000 +++ b/bn_fast_s_mp_sqr.c Fri May 06 08:59:30 2005 +0000 @@ -15,33 +15,14 @@ * Tom St Denis, [email protected], http://math.libtomcrypt.org */ -/* fast squaring - * - * This is the comba method where the columns of the product - * are computed first then the carries are computed. This - * has the effect of making a very simple inner loop that - * is executed the most - * - * W2 represents the outer products and W the inner. - * - * A further optimizations is made because the inner - * products are of the form "A * B * 2". The *2 part does - * not need to be computed until the end which is good - * because 64-bit shifts are slow! - * - * Based on Algorithm 14.16 on pp.597 of HAC. - * - */ /* the jist of squaring... - -you do like mult except the offset of the tmpx [one that starts closer to zero] -can't equal the offset of tmpy. So basically you set up iy like before then you min it with -(ty-tx) so that it never happens. You double all those you add in the inner loop + * you do like mult except the offset of the tmpx [one that + * starts closer to zero] can't equal the offset of tmpy. + * So basically you set up iy like before then you min it with + * (ty-tx) so that it never happens. You double all those + * you add in the inner loop After that loop you do the squares and add them in. - -Remove W2 and don't memset W - */ int fast_s_mp_sqr (mp_int * a, mp_int * b) @@ -60,7 +41,7 @@ /* number of output digits to produce */ W1 = 0; - for (ix = 0; ix <= pa; ix++) { + for (ix = 0; ix < pa; ix++) { int tx, ty, iy; mp_word _W; mp_digit *tmpy; @@ -76,7 +57,7 @@ tmpx = a->dp + tx; tmpy = a->dp + ty; - /* this is the number of times the loop will iterrate, essentially its + /* this is the number of times the loop will iterrate, essentially while (tx++ < a->used && ty-- >= 0) { ... } */ iy = MIN(a->used-tx, ty+1); @@ -101,7 +82,7 @@ } /* store it */ - W[ix] = _W; + W[ix] = (mp_digit)(_W & MP_MASK); /* make next carry */ W1 = _W >> ((mp_word)DIGIT_BIT);