dropbear: bn_fast_s_mp_sqr.c annotate

annotate bn_fast_s_mp_sqr.c @ 142:d29b64170cf0 libtommath-orig

import of libtommath 0.32

author	Matt Johnston <matt@ucc.asn.au>
date	Sun, 19 Dec 2004 11:33:56 +0000
parents	86e0b50a9b58
children	d8254fc979e9

rev	line source
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	1 #include <tommath.h>
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	2 #ifdef BN_FAST_S_MP_SQR_C
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	3 /* LibTomMath, multiple-precision integer library -- Tom St Denis
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	4 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	5 * LibTomMath is a library that provides multiple-precision
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	6 * integer arithmetic as well as number theoretic functionality.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	7 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	8 * The library was designed directly after the MPI library by
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	9 * Michael Fromberger but has been written from scratch with
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	10 * additional optimizations in place.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	11 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	12 * The library is free for all purposes without any express
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	13 * guarantee it works.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	14 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	15 * Tom St Denis, [email protected], http://math.libtomcrypt.org
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	16 */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	17
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	18 /* fast squaring
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	19 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	20 * This is the comba method where the columns of the product
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	21 * are computed first then the carries are computed. This
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	22 * has the effect of making a very simple inner loop that
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	23 * is executed the most
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	24 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	25 * W2 represents the outer products and W the inner.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	26 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	27 * A further optimizations is made because the inner
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	28 * products are of the form "A * B * 2". The *2 part does
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	29 * not need to be computed until the end which is good
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	30 * because 64-bit shifts are slow!
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	31 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	32 * Based on Algorithm 14.16 on pp.597 of HAC.
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	33 *
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	34 */
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	35 /* the jist of squaring...
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	36
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	37 you do like mult except the offset of the tmpx [one that starts closer to zero]
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	38 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	39 (ty-tx) so that it never happens. You double all those you add in the inner loop
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	40
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	41 After that loop you do the squares and add them in.
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	42
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	43 Remove W2 and don't memset W
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	44
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	45 */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	46
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	47 int fast_s_mp_sqr (mp_int * a, mp_int * b)
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	48 {
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	49 int olduse, res, pa, ix, iz;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	50 mp_digit W[MP_WARRAY], *tmpx;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	51 mp_word W1;
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	52
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	53 /* grow the destination as required */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	54 pa = a->used + a->used;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	55 if (b->alloc < pa) {
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	56 if ((res = mp_grow (b, pa)) != MP_OKAY) {
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	57 return res;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	58 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	59 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	60
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	61 /* number of output digits to produce */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	62 W1 = 0;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	63 for (ix = 0; ix <= pa; ix++) {
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	64 int tx, ty, iy;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	65 mp_word _W;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	66 mp_digit *tmpy;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	67
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	68 /* clear counter */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	69 _W = 0;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	70
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	71 /* get offsets into the two bignums */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	72 ty = MIN(a->used-1, ix);
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	73 tx = ix - ty;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	74
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	75 /* setup temp aliases */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	76 tmpx = a->dp + tx;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	77 tmpy = a->dp + ty;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	78
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	79 /* this is the number of times the loop will iterrate, essentially its
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	80 while (tx++ < a->used && ty-- >= 0) { ... }
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	81 */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	82 iy = MIN(a->used-tx, ty+1);
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	83
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	84 /* now for squaring tx can never equal ty
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	85 * we halve the distance since they approach at a rate of 2x
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	86 * and we have to round because odd cases need to be executed
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	87 */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	88 iy = MIN(iy, (ty-tx+1)>>1);
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	89
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	90 /* execute loop */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	91 for (iz = 0; iz < iy; iz++) {
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	92 _W += ((mp_word)tmpx++)((mp_word)*tmpy--);
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	93 }
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	94
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	95 /* double the inner product and add carry */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	96 _W = _W + _W + W1;
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	97
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	98 /* even columns have the square term in them */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	99 if ((ix&1) == 0) {
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	100 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	101 }
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	102
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	103 /* store it */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	104 W[ix] = _W;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	105
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	106 /* make next carry */
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	107 W1 = _W >> ((mp_word)DIGIT_BIT);
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	108 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	109
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	110 /* setup dest */
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	111 olduse = b->used;
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	112 b->used = a->used+a->used;
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	113
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	114 {
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	115 mp_digit *tmpb;
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	116 tmpb = b->dp;
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	117 for (ix = 0; ix < pa; ix++) {
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	118 *tmpb++ = W[ix] & MP_MASK;
d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	119 }
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	120
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	121 /* clear unused digits [that existed in the old copy of c] */
2 86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	122 for (; ix < olduse; ix++) {
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	123 *tmpb++ = 0;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	124 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	125 }
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	126 mp_clamp (b);
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	127 return MP_OKAY;
86e0b50a9b58 ltm 0.30 orig import Matt Johnston <matt@ucc.asn.au> parents: diff changeset	128 }
142 d29b64170cf0 import of libtommath 0.32 Matt Johnston <matt@ucc.asn.au> parents: 2 diff changeset	129 #endif

Mercurial > dropbear

annotate bn_fast_s_mp_sqr.c @ 142:d29b64170cf0 libtommath-orig