Mercurial > dropbear
diff pre_gen/mpi.c @ 190:d8254fc979e9 libtommath-orig LTM_0.35
Initial import of libtommath 0.35
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Fri, 06 May 2005 08:59:30 +0000 |
parents | d29b64170cf0 |
children |
line wrap: on
line diff
--- a/pre_gen/mpi.c Sun Dec 19 11:33:56 2004 +0000 +++ b/pre_gen/mpi.c Fri May 06 08:59:30 2005 +0000 @@ -69,8 +69,7 @@ * Based on slow invmod except this is optimized for the case where b is * odd as per HAC Note 14.64 on pp. 610 */ -int -fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c) +int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c) { mp_int x, y, u, v, B, D; int res, neg; @@ -87,20 +86,20 @@ /* x == modulus, y == value to invert */ if ((res = mp_copy (b, &x)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } /* we need y = |a| */ - if ((res = mp_abs (a, &y)) != MP_OKAY) { - goto __ERR; + if ((res = mp_mod (a, b, &y)) != MP_OKAY) { + goto LBL_ERR; } /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ if ((res = mp_copy (&x, &u)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_copy (&y, &v)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } mp_set (&D, 1); @@ -109,17 +108,17 @@ while (mp_iseven (&u) == 1) { /* 4.1 u = u/2 */ if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } /* 4.2 if B is odd then */ if (mp_isodd (&B) == 1) { if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } /* B = B/2 */ if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } @@ -127,18 +126,18 @@ while (mp_iseven (&v) == 1) { /* 5.1 v = v/2 */ if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } /* 5.2 if D is odd then */ if (mp_isodd (&D) == 1) { /* D = (D-x)/2 */ if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } /* D = D/2 */ if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } @@ -146,20 +145,20 @@ if (mp_cmp (&u, &v) != MP_LT) { /* u = u - v, B = B - D */ if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } else { /* v - v - u, D = D - B */ if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } @@ -173,21 +172,21 @@ /* if v != 1 then there is no inverse */ if (mp_cmp_d (&v, 1) != MP_EQ) { res = MP_VAL; - goto __ERR; + goto LBL_ERR; } /* b is now the inverse */ neg = a->sign; while (D.sign == MP_NEG) { if ((res = mp_add (&D, b, &D)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } mp_exch (&D, c); c->sign = neg; res = MP_OKAY; -__ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); +LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); return res; } #endif @@ -220,8 +219,7 @@ * * Based on Algorithm 14.32 on pp.601 of HAC. */ -int -fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) +int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) { int ix, res, olduse; mp_word W[MP_WARRAY]; @@ -401,8 +399,7 @@ * Based on Algorithm 14.12 on pp.595 of HAC. * */ -int -fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) { int olduse, res, pa, ix, iz; mp_digit W[MP_WARRAY]; @@ -420,7 +417,7 @@ /* clear the carry */ _W = 0; - for (ix = 0; ix <= pa; ix++) { + for (ix = 0; ix < pa; ix++) { int tx, ty; int iy; mp_digit *tmpx, *tmpy; @@ -433,7 +430,7 @@ tmpx = a->dp + tx; tmpy = b->dp + ty; - /* this is the number of times the loop will iterrate, essentially its + /* this is the number of times the loop will iterrate, essentially while (tx++ < a->used && ty-- >= 0) { ... } */ iy = MIN(a->used-tx, ty+1); @@ -450,14 +447,17 @@ _W = _W >> ((mp_word)DIGIT_BIT); } + /* store final carry */ + W[ix] = (mp_digit)(_W & MP_MASK); + /* setup dest */ olduse = c->used; - c->used = digs; + c->used = pa; { register mp_digit *tmpc; tmpc = c->dp; - for (ix = 0; ix < digs; ix++) { + for (ix = 0; ix < pa+1; ix++) { /* now extract the previous digit [below the carry] */ *tmpc++ = W[ix]; } @@ -501,8 +501,7 @@ * * Based on Algorithm 14.12 on pp.595 of HAC. */ -int -fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) { int olduse, res, pa, ix, iz; mp_digit W[MP_WARRAY]; @@ -519,7 +518,7 @@ /* number of output digits to produce */ pa = a->used + b->used; _W = 0; - for (ix = digs; ix <= pa; ix++) { + for (ix = digs; ix < pa; ix++) { int tx, ty, iy; mp_digit *tmpx, *tmpy; @@ -547,6 +546,9 @@ /* make next carry */ _W = _W >> ((mp_word)DIGIT_BIT); } + + /* store final carry */ + W[ix] = (mp_digit)(_W & MP_MASK); /* setup dest */ olduse = c->used; @@ -591,33 +593,14 @@ * Tom St Denis, [email protected], http://math.libtomcrypt.org */ -/* fast squaring - * - * This is the comba method where the columns of the product - * are computed first then the carries are computed. This - * has the effect of making a very simple inner loop that - * is executed the most - * - * W2 represents the outer products and W the inner. - * - * A further optimizations is made because the inner - * products are of the form "A * B * 2". The *2 part does - * not need to be computed until the end which is good - * because 64-bit shifts are slow! - * - * Based on Algorithm 14.16 on pp.597 of HAC. - * - */ /* the jist of squaring... - -you do like mult except the offset of the tmpx [one that starts closer to zero] -can't equal the offset of tmpy. So basically you set up iy like before then you min it with -(ty-tx) so that it never happens. You double all those you add in the inner loop + * you do like mult except the offset of the tmpx [one that + * starts closer to zero] can't equal the offset of tmpy. + * So basically you set up iy like before then you min it with + * (ty-tx) so that it never happens. You double all those + * you add in the inner loop After that loop you do the squares and add them in. - -Remove W2 and don't memset W - */ int fast_s_mp_sqr (mp_int * a, mp_int * b) @@ -636,7 +619,7 @@ /* number of output digits to produce */ W1 = 0; - for (ix = 0; ix <= pa; ix++) { + for (ix = 0; ix < pa; ix++) { int tx, ty, iy; mp_word _W; mp_digit *tmpy; @@ -652,7 +635,7 @@ tmpx = a->dp + tx; tmpy = a->dp + ty; - /* this is the number of times the loop will iterrate, essentially its + /* this is the number of times the loop will iterrate, essentially while (tx++ < a->used && ty-- >= 0) { ... } */ iy = MIN(a->used-tx, ty+1); @@ -677,7 +660,7 @@ } /* store it */ - W[ix] = _W; + W[ix] = (mp_digit)(_W & MP_MASK); /* make next carry */ W1 = _W >> ((mp_word)DIGIT_BIT); @@ -1539,23 +1522,23 @@ mp_set(&tq, 1); n = mp_count_bits(a) - mp_count_bits(b); - if (((res = mp_copy(a, &ta)) != MP_OKAY) || - ((res = mp_copy(b, &tb)) != MP_OKAY) || + if (((res = mp_abs(a, &ta)) != MP_OKAY) || + ((res = mp_abs(b, &tb)) != MP_OKAY) || ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) || ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) { - goto __ERR; + goto LBL_ERR; } while (n-- >= 0) { if (mp_cmp(&tb, &ta) != MP_GT) { if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) || ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) { - goto __ERR; + goto LBL_ERR; } } if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) || ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) { - goto __ERR; + goto LBL_ERR; } } @@ -1564,13 +1547,13 @@ n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG); if (c != NULL) { mp_exch(c, &q); - c->sign = n2; + c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2; } if (d != NULL) { mp_exch(d, &ta); - d->sign = n; - } -__ERR: + d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n; + } +LBL_ERR: mp_clear_multi(&ta, &tb, &tq, &q, NULL); return res; } @@ -1619,19 +1602,19 @@ q.used = a->used + 2; if ((res = mp_init (&t1)) != MP_OKAY) { - goto __Q; + goto LBL_Q; } if ((res = mp_init (&t2)) != MP_OKAY) { - goto __T1; + goto LBL_T1; } if ((res = mp_init_copy (&x, a)) != MP_OKAY) { - goto __T2; + goto LBL_T2; } if ((res = mp_init_copy (&y, b)) != MP_OKAY) { - goto __X; + goto LBL_X; } /* fix the sign */ @@ -1643,10 +1626,10 @@ if (norm < (int)(DIGIT_BIT-1)) { norm = (DIGIT_BIT-1) - norm; if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } } else { norm = 0; @@ -1658,13 +1641,13 @@ /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ - goto __Y; + goto LBL_Y; } while (mp_cmp (&x, &y) != MP_LT) { ++(q.dp[n - t]); if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } } @@ -1706,7 +1689,7 @@ t1.dp[1] = y.dp[t]; t1.used = 2; if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } /* find right hand */ @@ -1718,27 +1701,27 @@ /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ if (x.sign == MP_NEG) { if ((res = mp_copy (&y, &t1)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; @@ -1765,11 +1748,11 @@ res = MP_OKAY; -__Y:mp_clear (&y); -__X:mp_clear (&x); -__T2:mp_clear (&t2); -__T1:mp_clear (&t1); -__Q:mp_clear (&q); +LBL_Y:mp_clear (&y); +LBL_X:mp_clear (&x); +LBL_T2:mp_clear (&t2); +LBL_T1:mp_clear (&t1); +LBL_Q:mp_clear (&q); return res; } @@ -2199,7 +2182,7 @@ * Based on algorithm from the paper * * "Generating Efficient Primes for Discrete Log Cryptosystems" - * Chae Hoon Lim, Pil Loong Lee, + * Chae Hoon Lim, Pil Joong Lee, * POSTECH Information Research Laboratories * * The modulus must be of a special format [see manual] @@ -2457,25 +2440,33 @@ return err; #else /* no invmod */ - return MP_VAL -#endif - } + return MP_VAL; +#endif + } + +/* modified diminished radix reduction */ +#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) + if (mp_reduce_is_2k_l(P) == MP_YES) { + return s_mp_exptmod(G, X, P, Y, 1); + } +#endif #ifdef BN_MP_DR_IS_MODULUS_C /* is it a DR modulus? */ dr = mp_dr_is_modulus(P); #else + /* default to no */ dr = 0; #endif #ifdef BN_MP_REDUCE_IS_2K_C - /* if not, is it a uDR modulus? */ + /* if not, is it a unrestricted DR modulus? */ if (dr == 0) { dr = mp_reduce_is_2k(P) << 1; } #endif - /* if the modulus is odd or dr != 0 use the fast method */ + /* if the modulus is odd or dr != 0 use the montgomery method */ #ifdef BN_MP_EXPTMOD_FAST_C if (mp_isodd (P) == 1 || dr != 0) { return mp_exptmod_fast (G, X, P, Y, dr); @@ -2483,7 +2474,7 @@ #endif #ifdef BN_S_MP_EXPTMOD_C /* otherwise use the generic Barrett reduction technique */ - return s_mp_exptmod (G, X, P, Y); + return s_mp_exptmod (G, X, P, Y, 0); #else /* no exptmod for evens */ return MP_VAL; @@ -2529,8 +2520,7 @@ #define TAB_SIZE 256 #endif -int -mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) +int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) { mp_int M[TAB_SIZE], res; mp_digit buf, mp; @@ -2588,11 +2578,11 @@ #ifdef BN_MP_MONTGOMERY_SETUP_C /* now setup montgomery */ if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { - goto __M; + goto LBL_M; } #else err = MP_VAL; - goto __M; + goto LBL_M; #endif /* automatically pick the comba one if available (saves quite a few calls/ifs) */ @@ -2608,7 +2598,7 @@ redux = mp_montgomery_reduce; #else err = MP_VAL; - goto __M; + goto LBL_M; #endif } } else if (redmode == 1) { @@ -2618,24 +2608,24 @@ redux = mp_dr_reduce; #else err = MP_VAL; - goto __M; + goto LBL_M; #endif } else { #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C) /* setup DR reduction for moduli of the form 2**k - b */ if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { - goto __M; + goto LBL_M; } redux = mp_reduce_2k; #else err = MP_VAL; - goto __M; + goto LBL_M; #endif } /* setup result */ if ((err = mp_init (&res)) != MP_OKAY) { - goto __M; + goto LBL_M; } /* create M table @@ -2649,45 +2639,45 @@ #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C /* now we need R mod m */ if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } #else err = MP_VAL; - goto __RES; + goto LBL_RES; #endif /* now set M[1] to G * R mod m */ if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) { - goto __RES; + goto LBL_RES; } } else { mp_set(&res, 1); if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { - goto __RES; + goto LBL_RES; } } /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { - goto __RES; + goto LBL_RES; } for (x = 0; x < (winsize - 1); x++) { if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { - goto __RES; + goto LBL_RES; } if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } } /* create upper table */ for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { - goto __RES; + goto LBL_RES; } if ((err = redux (&M[x], P, mp)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } } @@ -2727,10 +2717,10 @@ /* if the bit is zero and mode == 1 then we square */ if (mode == 1 && y == 0) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } if ((err = redux (&res, P, mp)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } continue; } @@ -2744,19 +2734,19 @@ /* square first */ for (x = 0; x < winsize; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } if ((err = redux (&res, P, mp)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } } /* then multiply */ if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } if ((err = redux (&res, P, mp)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } /* empty window and reset */ @@ -2771,10 +2761,10 @@ /* square then multiply if the bit is set */ for (x = 0; x < bitcpy; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } if ((err = redux (&res, P, mp)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } /* get next bit of the window */ @@ -2782,10 +2772,10 @@ if ((bitbuf & (1 << winsize)) != 0) { /* then multiply */ if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } if ((err = redux (&res, P, mp)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } } } @@ -2799,15 +2789,15 @@ * of R. */ if ((err = redux(&res, P, mp)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } } /* swap res with Y */ mp_exch (&res, Y); err = MP_OKAY; -__RES:mp_clear (&res); -__M: +LBL_RES:mp_clear (&res); +LBL_M: mp_clear(&M[1]); for (x = 1<<(winsize-1); x < (1 << winsize); x++) { mp_clear (&M[x]); @@ -2881,6 +2871,13 @@ if ((err = mp_copy(&t3, &v3)) != MP_OKAY) { goto _ERR; } } + /* make sure U3 >= 0 */ + if (u3.sign == MP_NEG) { + mp_neg(&u1, &u1); + mp_neg(&u2, &u2); + mp_neg(&u3, &u3); + } + /* copy result out */ if (U1 != NULL) { mp_exch(U1, &u1); } if (U2 != NULL) { mp_exch(U2, &u2); } @@ -3059,7 +3056,7 @@ } if ((res = mp_init_copy (&v, b)) != MP_OKAY) { - goto __U; + goto LBL_U; } /* must be positive for the remainder of the algorithm */ @@ -3073,24 +3070,24 @@ if (k > 0) { /* divide the power of two out */ if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) { - goto __V; + goto LBL_V; } if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) { - goto __V; + goto LBL_V; } } /* divide any remaining factors of two out */ if (u_lsb != k) { if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) { - goto __V; + goto LBL_V; } } if (v_lsb != k) { if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) { - goto __V; + goto LBL_V; } } @@ -3103,23 +3100,23 @@ /* subtract smallest from largest */ if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) { - goto __V; + goto LBL_V; } /* Divide out all factors of two */ if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) { - goto __V; + goto LBL_V; } } /* multiply by 2**k which we divided out at the beginning */ if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) { - goto __V; + goto LBL_V; } c->sign = MP_ZPOS; res = MP_OKAY; -__V:mp_clear (&u); -__U:mp_clear (&v); +LBL_V:mp_clear (&u); +LBL_U:mp_clear (&v); return res; } #endif @@ -3555,25 +3552,25 @@ } /* x = a, y = b */ - if ((res = mp_copy (a, &x)) != MP_OKAY) { - goto __ERR; + if ((res = mp_mod(a, b, &x)) != MP_OKAY) { + goto LBL_ERR; } if ((res = mp_copy (b, &y)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } /* 2. [modified] if x,y are both even then return an error! */ if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) { res = MP_VAL; - goto __ERR; + goto LBL_ERR; } /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ if ((res = mp_copy (&x, &u)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_copy (&y, &v)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } mp_set (&A, 1); mp_set (&D, 1); @@ -3583,24 +3580,24 @@ while (mp_iseven (&u) == 1) { /* 4.1 u = u/2 */ if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } /* 4.2 if A or B is odd then */ if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) { /* A = (A+y)/2, B = (B-x)/2 */ if ((res = mp_add (&A, &y, &A)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } /* A = A/2, B = B/2 */ if ((res = mp_div_2 (&A, &A)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } @@ -3608,24 +3605,24 @@ while (mp_iseven (&v) == 1) { /* 5.1 v = v/2 */ if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } /* 5.2 if C or D is odd then */ if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) { /* C = (C+y)/2, D = (D-x)/2 */ if ((res = mp_add (&C, &y, &C)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } /* C = C/2, D = D/2 */ if ((res = mp_div_2 (&C, &C)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } @@ -3633,28 +3630,28 @@ if (mp_cmp (&u, &v) != MP_LT) { /* u = u - v, A = A - C, B = B - D */ if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } else { /* v - v - u, C = C - A, D = D - B */ if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } @@ -3667,27 +3664,27 @@ /* if v != 1 then there is no inverse */ if (mp_cmp_d (&v, 1) != MP_EQ) { res = MP_VAL; - goto __ERR; + goto LBL_ERR; } /* if its too low */ while (mp_cmp_d(&C, 0) == MP_LT) { if ((res = mp_add(&C, b, &C)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } /* too big */ while (mp_cmp_mag(&C, b) != MP_LT) { if ((res = mp_sub(&C, b, &C)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } } /* C is now the inverse */ mp_exch (&C, c); res = MP_OKAY; -__ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); +LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); return res; } #endif @@ -3856,13 +3853,13 @@ } if ((res = mp_init (&p1)) != MP_OKAY) { - goto __A1; + goto LBL_A1; } /* divide out larger power of two */ k = mp_cnt_lsb(&a1); if ((res = mp_div_2d(&a1, k, &a1, NULL)) != MP_OKAY) { - goto __P1; + goto LBL_P1; } /* step 4. if e is even set s=1 */ @@ -3890,18 +3887,18 @@ } else { /* n1 = n mod a1 */ if ((res = mp_mod (p, &a1, &p1)) != MP_OKAY) { - goto __P1; + goto LBL_P1; } if ((res = mp_jacobi (&p1, &a1, &r)) != MP_OKAY) { - goto __P1; + goto LBL_P1; } *c = s * r; } /* done */ res = MP_OKAY; -__P1:mp_clear (&p1); -__A1:mp_clear (&a1); +LBL_P1:mp_clear (&p1); +LBL_A1:mp_clear (&a1); return res; } #endif @@ -4227,20 +4224,20 @@ /* t1 = get the GCD of the two inputs */ if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) { - goto __T; + goto LBL_T; } /* divide the smallest by the GCD */ if (mp_cmp_mag(a, b) == MP_LT) { /* store quotient in t2 such that t2 * b is the LCM */ if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) { - goto __T; + goto LBL_T; } res = mp_mul(b, &t2, c); } else { /* store quotient in t2 such that t2 * a is the LCM */ if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) { - goto __T; + goto LBL_T; } res = mp_mul(a, &t2, c); } @@ -4248,7 +4245,7 @@ /* fix the sign to positive */ c->sign = MP_ZPOS; -__T: +LBL_T: mp_clear_multi (&t1, &t2, NULL); return res; } @@ -4402,7 +4399,7 @@ } /* if the modulus is larger than the value than return */ - if (b > (int) (a->used * DIGIT_BIT)) { + if (b >= (int) (a->used * DIGIT_BIT)) { res = mp_copy (a, c); return res; } @@ -4484,7 +4481,6 @@ /* how many bits of last digit does b use */ bits = mp_count_bits (b) % DIGIT_BIT; - if (b->used > 1) { if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { return res; @@ -4983,8 +4979,9 @@ u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } - /* store final carry [if any] */ + /* store final carry [if any] and increment ix offset */ *tmpc++ = u; + ++ix; /* now zero digits above the top */ while (ix++ < olduse) { @@ -5085,11 +5082,11 @@ } if ((res = mp_init (&t2)) != MP_OKAY) { - goto __T1; + goto LBL_T1; } if ((res = mp_init (&t3)) != MP_OKAY) { - goto __T2; + goto LBL_T2; } /* if a is negative fudge the sign but keep track */ @@ -5102,52 +5099,52 @@ do { /* t1 = t2 */ if ((res = mp_copy (&t2, &t1)) != MP_OKAY) { - goto __T3; + goto LBL_T3; } /* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */ /* t3 = t1**(b-1) */ if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) { - goto __T3; + goto LBL_T3; } /* numerator */ /* t2 = t1**b */ if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) { - goto __T3; + goto LBL_T3; } /* t2 = t1**b - a */ if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) { - goto __T3; + goto LBL_T3; } /* denominator */ /* t3 = t1**(b-1) * b */ if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) { - goto __T3; + goto LBL_T3; } /* t3 = (t1**b - a)/(b * t1**(b-1)) */ if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) { - goto __T3; + goto LBL_T3; } if ((res = mp_sub (&t1, &t3, &t2)) != MP_OKAY) { - goto __T3; + goto LBL_T3; } } while (mp_cmp (&t1, &t2) != MP_EQ); /* result can be off by a few so check */ for (;;) { if ((res = mp_expt_d (&t1, b, &t2)) != MP_OKAY) { - goto __T3; + goto LBL_T3; } if (mp_cmp (&t2, a) == MP_GT) { if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) { - goto __T3; + goto LBL_T3; } } else { break; @@ -5165,9 +5162,9 @@ res = MP_OKAY; -__T3:mp_clear (&t3); -__T2:mp_clear (&t2); -__T1:mp_clear (&t1); +LBL_T3:mp_clear (&t3); +LBL_T2:mp_clear (&t2); +LBL_T1:mp_clear (&t1); return res; } #endif @@ -5196,12 +5193,18 @@ int mp_neg (mp_int * a, mp_int * b) { int res; - if ((res = mp_copy (a, b)) != MP_OKAY) { - return res; - } + if (a != b) { + if ((res = mp_copy (a, b)) != MP_OKAY) { + return res; + } + } + if (mp_iszero(b) != MP_YES) { b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS; - } + } else { + b->sign = MP_ZPOS; + } + return MP_OKAY; } #endif @@ -5304,7 +5307,7 @@ /* compute t = b**a mod a */ if ((err = mp_exptmod (b, a, a, &t)) != MP_OKAY) { - goto __T; + goto LBL_T; } /* is it equal to b? */ @@ -5313,7 +5316,7 @@ } err = MP_OKAY; -__T:mp_clear (&t); +LBL_T:mp_clear (&t); return err; } #endif @@ -5352,8 +5355,8 @@ *result = MP_NO; for (ix = 0; ix < PRIME_SIZE; ix++) { - /* what is a mod __prime_tab[ix] */ - if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) { + /* what is a mod LBL_prime_tab[ix] */ + if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) { return err; } @@ -5410,7 +5413,7 @@ /* is the input equal to one of the primes in the table? */ for (ix = 0; ix < PRIME_SIZE; ix++) { - if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) { + if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) { *result = 1; return MP_OKAY; } @@ -5433,20 +5436,20 @@ for (ix = 0; ix < t; ix++) { /* set the prime */ - mp_set (&b, __prime_tab[ix]); + mp_set (&b, ltm_prime_tab[ix]); if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) { - goto __B; + goto LBL_B; } if (res == MP_NO) { - goto __B; + goto LBL_B; } } /* passed the test */ *result = MP_YES; -__B:mp_clear (&b); +LBL_B:mp_clear (&b); return err; } #endif @@ -5496,12 +5499,12 @@ return err; } if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) { - goto __N1; + goto LBL_N1; } /* set 2**s * r = n1 */ if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) { - goto __N1; + goto LBL_N1; } /* count the number of least significant bits @@ -5511,15 +5514,15 @@ /* now divide n - 1 by 2**s */ if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) { - goto __R; + goto LBL_R; } /* compute y = b**r mod a */ if ((err = mp_init (&y)) != MP_OKAY) { - goto __R; + goto LBL_R; } if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } /* if y != 1 and y != n1 do */ @@ -5528,12 +5531,12 @@ /* while j <= s-1 and y != n1 */ while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) { if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) { - goto __Y; + goto LBL_Y; } /* if y == 1 then composite */ if (mp_cmp_d (&y, 1) == MP_EQ) { - goto __Y; + goto LBL_Y; } ++j; @@ -5541,15 +5544,15 @@ /* if y != n1 then composite */ if (mp_cmp (&y, &n1) != MP_EQ) { - goto __Y; + goto LBL_Y; } } /* probably prime now */ *result = MP_YES; -__Y:mp_clear (&y); -__R:mp_clear (&r); -__N1:mp_clear (&n1); +LBL_Y:mp_clear (&y); +LBL_R:mp_clear (&r); +LBL_N1:mp_clear (&n1); return err; } #endif @@ -5594,10 +5597,10 @@ a->sign = MP_ZPOS; /* simple algo if a is less than the largest prime in the table */ - if (mp_cmp_d(a, __prime_tab[PRIME_SIZE-1]) == MP_LT) { + if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) { /* find which prime it is bigger than */ for (x = PRIME_SIZE - 2; x >= 0; x--) { - if (mp_cmp_d(a, __prime_tab[x]) != MP_LT) { + if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) { if (bbs_style == 1) { /* ok we found a prime smaller or * equal [so the next is larger] @@ -5605,17 +5608,17 @@ * however, the prime must be * congruent to 3 mod 4 */ - if ((__prime_tab[x + 1] & 3) != 3) { + if ((ltm_prime_tab[x + 1] & 3) != 3) { /* scan upwards for a prime congruent to 3 mod 4 */ for (y = x + 1; y < PRIME_SIZE; y++) { - if ((__prime_tab[y] & 3) == 3) { - mp_set(a, __prime_tab[y]); + if ((ltm_prime_tab[y] & 3) == 3) { + mp_set(a, ltm_prime_tab[y]); return MP_OKAY; } } } } else { - mp_set(a, __prime_tab[x + 1]); + mp_set(a, ltm_prime_tab[x + 1]); return MP_OKAY; } } @@ -5653,7 +5656,7 @@ /* generate the restable */ for (x = 1; x < PRIME_SIZE; x++) { - if ((err = mp_mod_d(a, __prime_tab[x], res_tab + x)) != MP_OKAY) { + if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) { return err; } } @@ -5679,8 +5682,8 @@ res_tab[x] += kstep; /* subtract the modulus [instead of using division] */ - if (res_tab[x] >= __prime_tab[x]) { - res_tab[x] -= __prime_tab[x]; + if (res_tab[x] >= ltm_prime_tab[x]) { + res_tab[x] -= ltm_prime_tab[x]; } /* set flag if zero */ @@ -5692,7 +5695,7 @@ /* add the step */ if ((err = mp_add_d(a, step, a)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } /* if didn't pass sieve and step == MAX then skip test */ @@ -5702,9 +5705,9 @@ /* is this prime? */ for (x = 0; x < t; x++) { - mp_set(&b, __prime_tab[t]); + mp_set(&b, ltm_prime_tab[t]); if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) { - goto __ERR; + goto LBL_ERR; } if (res == MP_NO) { break; @@ -5717,7 +5720,7 @@ } err = MP_OKAY; -__ERR: +LBL_ERR: mp_clear(&b); return err; } @@ -5828,7 +5831,7 @@ } /* calc the byte size */ - bsize = (size>>3)+(size&7?1:0); + bsize = (size>>3) + ((size&7)?1:0); /* we need a buffer of bsize bytes */ tmp = OPT_CAST(unsigned char) XMALLOC(bsize); @@ -5837,19 +5840,19 @@ } /* calc the maskAND value for the MSbyte*/ - maskAND = 0xFF >> (8 - (size & 7)); + maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7))); /* calc the maskOR_msb */ maskOR_msb = 0; - maskOR_msb_offset = (size - 2) >> 3; + maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0; if (flags & LTM_PRIME_2MSB_ON) { maskOR_msb |= 1 << ((size - 2) & 7); } else if (flags & LTM_PRIME_2MSB_OFF) { maskAND &= ~(1 << ((size - 2) & 7)); - } + } /* get the maskOR_lsb */ - maskOR_lsb = 0; + maskOR_lsb = 1; if (flags & LTM_PRIME_BBS) { maskOR_lsb |= 3; } @@ -5943,22 +5946,29 @@ return MP_VAL; } - /* init a copy of the input */ - if ((res = mp_init_copy (&t, a)) != MP_OKAY) { - return res; + if (mp_iszero(a) == MP_YES) { + *size = 2; + return MP_OKAY; } /* digs is the digit count */ digs = 0; /* if it's negative add one for the sign */ - if (t.sign == MP_NEG) { + if (a->sign == MP_NEG) { ++digs; - t.sign = MP_ZPOS; - } + } + + /* init a copy of the input */ + if ((res = mp_init_copy (&t, a)) != MP_OKAY) { + return res; + } + + /* force temp to positive */ + t.sign = MP_ZPOS; /* fetch out all of the digits */ - while (mp_iszero (&t) == 0) { + while (mp_iszero (&t) == MP_NO) { if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { mp_clear (&t); return res; @@ -6032,14 +6042,14 @@ /* first place a random non-zero digit */ do { - d = ((mp_digit) abs (rand ())); + d = ((mp_digit) abs (rand ())) & MP_MASK; } while (d == 0); if ((res = mp_add_d (a, d, a)) != MP_OKAY) { return res; } - while (digits-- > 0) { + while (--digits > 0) { if ((res = mp_lshd (a, 1)) != MP_OKAY) { return res; } @@ -6074,7 +6084,7 @@ */ /* read a string [ASCII] in a given radix */ -int mp_read_radix (mp_int * a, char *str, int radix) +int mp_read_radix (mp_int * a, const char *str, int radix) { int y, res, neg; char ch; @@ -6257,8 +6267,7 @@ * precomputed via mp_reduce_setup. * From HAC pp.604 Algorithm 14.42 */ -int -mp_reduce (mp_int * x, mp_int * m, mp_int * mu) +int mp_reduce (mp_int * x, mp_int * m, mp_int * mu) { mp_int q; int res, um = m->used; @@ -6278,11 +6287,11 @@ } } else { #ifdef BN_S_MP_MUL_HIGH_DIGS_C - if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) { + if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { goto CLEANUP; } #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C) - if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) { + if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { goto CLEANUP; } #else @@ -6355,8 +6364,7 @@ */ /* reduces a modulo n where n is of the form 2**p - d */ -int -mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) +int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) { mp_int q; int p, res; @@ -6398,6 +6406,68 @@ /* End: bn_mp_reduce_2k.c */ +/* Start: bn_mp_reduce_2k_l.c */ +#include <tommath.h> +#ifdef BN_MP_REDUCE_2K_L_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, [email protected], http://math.libtomcrypt.org + */ + +/* reduces a modulo n where n is of the form 2**p - d + This differs from reduce_2k since "d" can be larger + than a single digit. +*/ +int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) +{ + mp_int q; + int p, res; + + if ((res = mp_init(&q)) != MP_OKAY) { + return res; + } + + p = mp_count_bits(n); +top: + /* q = a/2**p, a = a mod 2**p */ + if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { + goto ERR; + } + + /* q = q * d */ + if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { + goto ERR; + } + + /* a = a + q */ + if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { + goto ERR; + } + + if (mp_cmp_mag(a, n) != MP_LT) { + s_mp_sub(a, n, a); + goto top; + } + +ERR: + mp_clear(&q); + return res; +} + +#endif + +/* End: bn_mp_reduce_2k_l.c */ + /* Start: bn_mp_reduce_2k_setup.c */ #include <tommath.h> #ifdef BN_MP_REDUCE_2K_SETUP_C @@ -6417,8 +6487,7 @@ */ /* determines the setup value */ -int -mp_reduce_2k_setup(mp_int *a, mp_digit *d) +int mp_reduce_2k_setup(mp_int *a, mp_digit *d) { int res, p; mp_int tmp; @@ -6446,6 +6515,50 @@ /* End: bn_mp_reduce_2k_setup.c */ +/* Start: bn_mp_reduce_2k_setup_l.c */ +#include <tommath.h> +#ifdef BN_MP_REDUCE_2K_SETUP_L_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, [email protected], http://math.libtomcrypt.org + */ + +/* determines the setup value */ +int mp_reduce_2k_setup_l(mp_int *a, mp_int *d) +{ + int res; + mp_int tmp; + + if ((res = mp_init(&tmp)) != MP_OKAY) { + return res; + } + + if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { + goto ERR; + } + + if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) { + goto ERR; + } + +ERR: + mp_clear(&tmp); + return res; +} +#endif + +/* End: bn_mp_reduce_2k_setup_l.c */ + /* Start: bn_mp_reduce_is_2k.c */ #include <tommath.h> #ifdef BN_MP_REDUCE_IS_2K_C @@ -6471,9 +6584,9 @@ mp_digit iz; if (a->used == 0) { - return 0; + return MP_NO; } else if (a->used == 1) { - return 1; + return MP_YES; } else if (a->used > 1) { iy = mp_count_bits(a); iz = 1; @@ -6482,7 +6595,7 @@ /* Test every bit from the second digit up, must be 1 */ for (ix = DIGIT_BIT; ix < iy; ix++) { if ((a->dp[iw] & iz) == 0) { - return 0; + return MP_NO; } iz <<= 1; if (iz > (mp_digit)MP_MASK) { @@ -6491,13 +6604,57 @@ } } } - return 1; + return MP_YES; } #endif /* End: bn_mp_reduce_is_2k.c */ +/* Start: bn_mp_reduce_is_2k_l.c */ +#include <tommath.h> +#ifdef BN_MP_REDUCE_IS_2K_L_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, [email protected], http://math.libtomcrypt.org + */ + +/* determines if reduce_2k_l can be used */ +int mp_reduce_is_2k_l(mp_int *a) +{ + int ix, iy; + + if (a->used == 0) { + return MP_NO; + } else if (a->used == 1) { + return MP_YES; + } else if (a->used > 1) { + /* if more than half of the digits are -1 we're sold */ + for (iy = ix = 0; ix < a->used; ix++) { + if (a->dp[ix] == MP_MASK) { + ++iy; + } + } + return (iy >= (a->used/2)) ? MP_YES : MP_NO; + + } + return MP_NO; +} + +#endif + +/* End: bn_mp_reduce_is_2k_l.c */ + /* Start: bn_mp_reduce_setup.c */ #include <tommath.h> #ifdef BN_MP_REDUCE_SETUP_C @@ -7132,8 +7289,7 @@ */ /* store in signed [big endian] format */ -int -mp_to_signed_bin (mp_int * a, unsigned char *b) +int mp_to_signed_bin (mp_int * a, unsigned char *b) { int res; @@ -7147,6 +7303,37 @@ /* End: bn_mp_to_signed_bin.c */ +/* Start: bn_mp_to_signed_bin_n.c */ +#include <tommath.h> +#ifdef BN_MP_TO_SIGNED_BIN_N_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, [email protected], http://math.libtomcrypt.org + */ + +/* store in signed [big endian] format */ +int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen) +{ + if (*outlen < (unsigned long)mp_signed_bin_size(a)) { + return MP_VAL; + } + *outlen = mp_signed_bin_size(a); + return mp_to_signed_bin(a, b); +} +#endif + +/* End: bn_mp_to_signed_bin_n.c */ + /* Start: bn_mp_to_unsigned_bin.c */ #include <tommath.h> #ifdef BN_MP_TO_UNSIGNED_BIN_C @@ -7166,8 +7353,7 @@ */ /* store in unsigned [big endian] format */ -int -mp_to_unsigned_bin (mp_int * a, unsigned char *b) +int mp_to_unsigned_bin (mp_int * a, unsigned char *b) { int x, res; mp_int t; @@ -7196,6 +7382,37 @@ /* End: bn_mp_to_unsigned_bin.c */ +/* Start: bn_mp_to_unsigned_bin_n.c */ +#include <tommath.h> +#ifdef BN_MP_TO_UNSIGNED_BIN_N_C +/* LibTomMath, multiple-precision integer library -- Tom St Denis + * + * LibTomMath is a library that provides multiple-precision + * integer arithmetic as well as number theoretic functionality. + * + * The library was designed directly after the MPI library by + * Michael Fromberger but has been written from scratch with + * additional optimizations in place. + * + * The library is free for all purposes without any express + * guarantee it works. + * + * Tom St Denis, [email protected], http://math.libtomcrypt.org + */ + +/* store in unsigned [big endian] format */ +int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen) +{ + if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) { + return MP_VAL; + } + *outlen = mp_unsigned_bin_size(a); + return mp_to_unsigned_bin(a, b); +} +#endif + +/* End: bn_mp_to_unsigned_bin_n.c */ + /* Start: bn_mp_toom_mul.c */ #include <tommath.h> #ifdef BN_MP_TOOM_MUL_C @@ -7216,9 +7433,10 @@ /* multiplication using the Toom-Cook 3-way algorithm * - * Much more complicated than Karatsuba but has a lower asymptotic running time of - * O(N**1.464). This algorithm is only particularly useful on VERY large - * inputs (we're talking 1000s of digits here...). + * Much more complicated than Karatsuba but has a lower + * asymptotic running time of O(N**1.464). This algorithm is + * only particularly useful on VERY large inputs + * (we're talking 1000s of digits here...). */ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) { @@ -7888,8 +8106,7 @@ */ /* get the size for an unsigned equivalent */ -int -mp_unsigned_bin_size (mp_int * a) +int mp_unsigned_bin_size (mp_int * a) { int size = mp_count_bits (a); return (size / 8 + ((size & 7) != 0 ? 1 : 0)); @@ -7938,7 +8155,7 @@ } for (ix = 0; ix < px; ix++) { - + t.dp[ix] ^= x->dp[ix]; } mp_clamp (&t); mp_exch (c, &t); @@ -7968,12 +8185,18 @@ */ /* set to zero */ -void -mp_zero (mp_int * a) -{ +void mp_zero (mp_int * a) +{ + int n; + mp_digit *tmp; + a->sign = MP_ZPOS; a->used = 0; - memset (a->dp, 0, sizeof (mp_digit) * a->alloc); + + tmp = a->dp; + for (n = 0; n < a->alloc; n++) { + *tmp++ = 0; + } } #endif @@ -7996,7 +8219,7 @@ * * Tom St Denis, [email protected], http://math.libtomcrypt.org */ -const mp_digit __prime_tab[] = { +const mp_digit ltm_prime_tab[] = { 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, @@ -8212,11 +8435,12 @@ #define TAB_SIZE 256 #endif -int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) +int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) { mp_int M[TAB_SIZE], res, mu; mp_digit buf; int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; + int (*redux)(mp_int*,mp_int*,mp_int*); /* find window size */ x = mp_count_bits (X); @@ -8261,11 +8485,20 @@ /* create mu, used for Barrett reduction */ if ((err = mp_init (&mu)) != MP_OKAY) { - goto __M; - } - if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) { - goto __MU; - } + goto LBL_M; + } + + if (redmode == 0) { + if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) { + goto LBL_MU; + } + redux = mp_reduce; + } else { + if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) { + goto LBL_MU; + } + redux = mp_reduce_2k_l; + } /* create M table * @@ -8276,23 +8509,26 @@ * computed though accept for M[0] and M[1] */ if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) { - goto __MU; + goto LBL_MU; } /* compute the value at M[1<<(winsize-1)] by squaring * M[1] (winsize-1) times */ if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { - goto __MU; + goto LBL_MU; } for (x = 0; x < (winsize - 1); x++) { + /* square it */ if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { - goto __MU; - } - if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { - goto __MU; + goto LBL_MU; + } + + /* reduce modulo P */ + if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { + goto LBL_MU; } } @@ -8301,16 +8537,16 @@ */ for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { - goto __MU; - } - if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) { - goto __MU; + goto LBL_MU; + } + if ((err = redux (&M[x], P, &mu)) != MP_OKAY) { + goto LBL_MU; } } /* setup result */ if ((err = mp_init (&res)) != MP_OKAY) { - goto __MU; + goto LBL_MU; } mp_set (&res, 1); @@ -8350,10 +8586,10 @@ /* if the bit is zero and mode == 1 then we square */ if (mode == 1 && y == 0) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } - if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) { - goto __RES; + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; } continue; } @@ -8367,19 +8603,19 @@ /* square first */ for (x = 0; x < winsize; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } - if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) { - goto __RES; + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; } } /* then multiply */ if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } - if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) { - goto __RES; + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; } /* empty window and reset */ @@ -8394,20 +8630,20 @@ /* square then multiply if the bit is set */ for (x = 0; x < bitcpy; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } - if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) { - goto __RES; + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; } bitbuf <<= 1; if ((bitbuf & (1 << winsize)) != 0) { /* then multiply */ if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { - goto __RES; + goto LBL_RES; } - if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) { - goto __RES; + if ((err = redux (&res, P, &mu)) != MP_OKAY) { + goto LBL_RES; } } } @@ -8415,9 +8651,9 @@ mp_exch (&res, Y); err = MP_OKAY; -__RES:mp_clear (&res); -__MU:mp_clear (&mu); -__M: +LBL_RES:mp_clear (&res); +LBL_MU:mp_clear (&mu); +LBL_M: mp_clear(&M[1]); for (x = 1<<(winsize-1); x < (1 << winsize); x++) { mp_clear (&M[x]); @@ -8450,8 +8686,7 @@ * HAC pp. 595, Algorithm 14.12 Modified so you can control how * many digits of output are created. */ -int -s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) +int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) { mp_int t; int res, pa, pb, ix, iy; @@ -8619,8 +8854,7 @@ */ /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ -int -s_mp_sqr (mp_int * a, mp_int * b) +int s_mp_sqr (mp_int * a, mp_int * b) { mp_int t; int res, ix, iy, pa; @@ -8797,11 +9031,12 @@ CPU /Compiler /MUL CUTOFF/SQR CUTOFF ------------------------------------------------------------- Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-) + AMD Athlon64 /GCC v3.4.4 / 74/ 124/LTM 0.34 */ -int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */ - KARATSUBA_SQR_CUTOFF = 128, /* Min. number of digits before Karatsuba squaring is used. */ +int KARATSUBA_MUL_CUTOFF = 74, /* Min. number of digits before Karatsuba multiplication is used. */ + KARATSUBA_SQR_CUTOFF = 124, /* Min. number of digits before Karatsuba squaring is used. */ TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */ TOOM_SQR_CUTOFF = 400;