dropbear: pre_gen/mpi.c comparison

comparison pre_gen/mpi.c @ 190:d8254fc979e9 libtommath-orig LTM_0.35

Initial import of libtommath 0.35

author	Matt Johnston <matt@ucc.asn.au>
date	Fri, 06 May 2005 08:59:30 +0000
parents	d29b64170cf0
children

comparison

equal deleted inserted replaced

-:d29b64170cf0
+:d8254fc979e9
 * that is c = 1/a mod b
 *
 * Based on slow invmod except this is optimized for the case where b is
 * odd as per HAC Note 14.64 on pp. 610
 */
-int
+int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
-fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
 {
 mp_int  x, y, u, v, B, D;
 int     res, neg;
 /* 2. [modified] b must be odd   */
 return res;
 }
 /* x == modulus, y == value to invert */
 if ((res = mp_copy (b, &x)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 /* we need y = |a| */
-if ((res = mp_abs (a, &y)) != MP_OKAY) {
+if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 mp_set (&D, 1);
 top:
 /* 4.  while u is even do */
 while (mp_iseven (&u) == 1) {
 /* 4.1 u = u/2 */
 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 /* 4.2 if B is odd then */
 if (mp_isodd (&B) == 1) {
 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* B = B/2 */
 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* 5.  while v is even do */
 while (mp_iseven (&v) == 1) {
 /* 5.1 v = v/2 */
 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 /* 5.2 if D is odd then */
 if (mp_isodd (&D) == 1) {
 /* D = (D-x)/2 */
 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* D = D/2 */
 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* 6.  if u >= v then */
 if (mp_cmp (&u, &v) != MP_LT) {
 /* u = u - v, B = B - D */
 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 } else {
 /* v - v - u, D = D - B */
 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* if not zero goto step 4 */
 if (mp_iszero (&u) == 0) {
 /* now a = C, b = D, gcd == g*v */
 /* if v != 1 then there is no inverse */
 if (mp_cmp_d (&v, 1) != MP_EQ) {
 res = MP_VAL;
-goto __ERR;
+goto LBL_ERR;
 }
 /* b is now the inverse */
 neg = a->sign;
 while (D.sign == MP_NEG) {
 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 mp_exch (&D, c);
 c->sign = neg;
 res = MP_OKAY;
-__ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
+LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
 return res;
 }
 #endif
 /* End: bn_fast_mp_invmod.c */
 * which uses the comba method to quickly calculate the columns of the
 * reduction.
 *
 * Based on Algorithm 14.32 on pp.601 of HAC.
 */
-int
+int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
-fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
 {
 int     ix, res, olduse;
 mp_word W[MP_WARRAY];
 /* get old used count */
 * required for fast Barrett reduction).
 *
 * Based on Algorithm 14.12 on pp.595 of HAC.
 *
 */
-int
+int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
-fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 {
 int     olduse, res, pa, ix, iz;
 mp_digit W[MP_WARRAY];
 register mp_word  _W;
 /* number of output digits to produce */
 pa = MIN(digs, a->used + b->used);
 /* clear the carry */
 _W = 0;
-for (ix = 0; ix <= pa; ix++) {
+for (ix = 0; ix < pa; ix++) {
 int      tx, ty;
 int      iy;
 mp_digit *tmpx, *tmpy;
 /* get offsets into the two bignums */
 /* setup temp aliases */
 tmpx = a->dp + tx;
 tmpy = b->dp + ty;
-/* this is the number of times the loop will iterrate, essentially its
+/* this is the number of times the loop will iterrate, essentially
 while (tx++ < a->used && ty-- >= 0) { ... }
 */
 iy = MIN(a->used-tx, ty+1);
 /* execute loop */
 /* make next carry */
 _W = _W >> ((mp_word)DIGIT_BIT);
 }
+/* store final carry */
+W[ix] = (mp_digit)(_W & MP_MASK);
 /* setup dest */
 olduse  = c->used;
-c->used = digs;
+c->used = pa;
 {
 register mp_digit *tmpc;
 tmpc = c->dp;
-for (ix = 0; ix < digs; ix++) {
+for (ix = 0; ix < pa+1; ix++) {
 /* now extract the previous digit [below the carry] */
 *tmpc++ = W[ix];
 }
 /* clear unused digits [that existed in the old copy of c] */
 * This is used in the Barrett reduction since for one of the multiplications
 * only the higher digits were needed.  This essentially halves the work.
 *
 * Based on Algorithm 14.12 on pp.595 of HAC.
 */
-int
+int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
-fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 {
 int     olduse, res, pa, ix, iz;
 mp_digit W[MP_WARRAY];
 mp_word  _W;
 }
 /* number of output digits to produce */
 pa = a->used + b->used;
 _W = 0;
-for (ix = digs; ix <= pa; ix++) {
+for (ix = digs; ix < pa; ix++) {
 int      tx, ty, iy;
 mp_digit *tmpx, *tmpy;
 /* get offsets into the two bignums */
 ty = MIN(b->used-1, ix);
 W[ix] = ((mp_digit)_W) & MP_MASK;
 /* make next carry */
 _W = _W >> ((mp_word)DIGIT_BIT);
 }
+/* store final carry */
+W[ix] = (mp_digit)(_W & MP_MASK);
 /* setup dest */
 olduse  = c->used;
 c->used = pa;
 * guarantee it works.
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
-/* fast squaring
-*
-* This is the comba method where the columns of the product
-* are computed first then the carries are computed.  This
-* has the effect of making a very simple inner loop that
-* is executed the most
-*
-* W2 represents the outer products and W the inner.
-*
-* A further optimizations is made because the inner
-* products are of the form "A * B * 2".  The *2 part does
-* not need to be computed until the end which is good
-* because 64-bit shifts are slow!
-*
-* Based on Algorithm 14.16 on pp.597 of HAC.
-*
-*/
 /* the jist of squaring...
+* you do like mult except the offset of the tmpx [one that
-you do like mult except the offset of the tmpx [one that starts closer to zero]
+* starts closer to zero] can't equal the offset of tmpy.
-can't equal the offset of tmpy.  So basically you set up iy like before then you min it with
+* So basically you set up iy like before then you min it with
-(ty-tx) so that it never happens.  You double all those you add in the inner loop
+* (ty-tx) so that it never happens.  You double all those
+* you add in the inner loop
 After that loop you do the squares and add them in.
-Remove W2 and don't memset W
 */
 int fast_s_mp_sqr (mp_int * a, mp_int * b)
 {
 int       olduse, res, pa, ix, iz;
 }
 }
 /* number of output digits to produce */
 W1 = 0;
-for (ix = 0; ix <= pa; ix++) {
+for (ix = 0; ix < pa; ix++) {
 int      tx, ty, iy;
 mp_word  _W;
 mp_digit *tmpy;
 /* clear counter */
 /* setup temp aliases */
 tmpx = a->dp + tx;
 tmpy = a->dp + ty;
-/* this is the number of times the loop will iterrate, essentially its
+/* this is the number of times the loop will iterrate, essentially
 while (tx++ < a->used && ty-- >= 0) { ... }
 */
 iy = MIN(a->used-tx, ty+1);
 /* now for squaring tx can never equal ty
 if ((ix&1) == 0) {
 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
 }
 /* store it */
-W[ix] = _W;
+W[ix] = (mp_digit)(_W & MP_MASK);
 /* make next carry */
 W1 = _W >> ((mp_word)DIGIT_BIT);
 }
 }
 mp_set(&tq, 1);
 n = mp_count_bits(a) - mp_count_bits(b);
-if (((res = mp_copy(a, &ta)) != MP_OKAY) ||
+if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
-((res = mp_copy(b, &tb)) != MP_OKAY) ||
+((res = mp_abs(b, &tb)) != MP_OKAY) ||
 ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
 ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
-goto __ERR;
+goto LBL_ERR;
 }
 while (n-- >= 0) {
 if (mp_cmp(&tb, &ta) != MP_GT) {
 if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
 ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
 ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* now q == quotient and ta == remainder */
 n  = a->sign;
 n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
 if (c != NULL) {
 mp_exch(c, &q);
-c->sign  = n2;
+c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
 }
 if (d != NULL) {
 mp_exch(d, &ta);
-d->sign = n;
+d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
 }
-__ERR:
+LBL_ERR:
 mp_clear_multi(&ta, &tb, &tq, &q, NULL);
 return res;
 }
 #else
 return res;
 }
 q.used = a->used + 2;
 if ((res = mp_init (&t1)) != MP_OKAY) {
-goto __Q;
+goto LBL_Q;
 }
 if ((res = mp_init (&t2)) != MP_OKAY) {
-goto __T1;
+goto LBL_T1;
 }
 if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
-goto __T2;
+goto LBL_T2;
 }
 if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
-goto __X;
+goto LBL_X;
 }
 /* fix the sign */
 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
 x.sign = y.sign = MP_ZPOS;
 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
 norm = mp_count_bits(&y) % DIGIT_BIT;
 if (norm < (int)(DIGIT_BIT-1)) {
 norm = (DIGIT_BIT-1) - norm;
 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 } else {
 norm = 0;
 }
 n = x.used - 1;
 t = y.used - 1;
 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
-goto __Y;
+goto LBL_Y;
 }
 while (mp_cmp (&x, &y) != MP_LT) {
 ++(q.dp[n - t]);
 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 }
 /* reset y by shifting it back down */
 mp_rshd (&y, n - t);
 mp_zero (&t1);
 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
 t1.dp[1] = y.dp[t];
 t1.used = 2;
 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 /* find right hand */
 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
 t2.used = 3;
 } while (mp_cmp_mag(&t1, &t2) == MP_GT);
 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
 if (x.sign == MP_NEG) {
 if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
 }
 }
 mp_exch (&x, d);
 }
 res = MP_OKAY;
-__Y:mp_clear (&y);
+LBL_Y:mp_clear (&y);
-__X:mp_clear (&x);
+LBL_X:mp_clear (&x);
-__T2:mp_clear (&t2);
+LBL_T2:mp_clear (&t2);
-__T1:mp_clear (&t1);
+LBL_T1:mp_clear (&t1);
-__Q:mp_clear (&q);
+LBL_Q:mp_clear (&q);
 return res;
 }
 #endif
 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
 *
 * Based on algorithm from the paper
 *
 * "Generating Efficient Primes for Discrete Log Cryptosystems"
-*                 Chae Hoon Lim, Pil Loong Lee,
+*                 Chae Hoon Lim, Pil Joong Lee,
 *          POSTECH Information Research Laboratories
 *
 * The modulus must be of a special format [see manual]
 *
 * Has been modified to use algorithm 7.10 from the LTM book instead
 err = mp_exptmod(&tmpG, &tmpX, P, Y);
 mp_clear_multi(&tmpG, &tmpX, NULL);
 return err;
 #else
 /* no invmod */
-return MP_VAL
+return MP_VAL;
 #endif
 }
+/* modified diminished radix reduction */
+#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C)
+if (mp_reduce_is_2k_l(P) == MP_YES) {
+return s_mp_exptmod(G, X, P, Y, 1);
+}
+#endif
 #ifdef BN_MP_DR_IS_MODULUS_C
 /* is it a DR modulus? */
 dr = mp_dr_is_modulus(P);
 #else
+/* default to no */
 dr = 0;
 #endif
 #ifdef BN_MP_REDUCE_IS_2K_C
-/* if not, is it a uDR modulus? */
+/* if not, is it a unrestricted DR modulus? */
 if (dr == 0) {
 dr = mp_reduce_is_2k(P) << 1;
 }
 #endif
-/* if the modulus is odd or dr != 0 use the fast method */
+/* if the modulus is odd or dr != 0 use the montgomery method */
 #ifdef BN_MP_EXPTMOD_FAST_C
 if (mp_isodd (P) == 1 || dr !=  0) {
 return mp_exptmod_fast (G, X, P, Y, dr);
 } else {
 #endif
 #ifdef BN_S_MP_EXPTMOD_C
 /* otherwise use the generic Barrett reduction technique */
-return s_mp_exptmod (G, X, P, Y);
+return s_mp_exptmod (G, X, P, Y, 0);
 #else
 /* no exptmod for evens */
 return MP_VAL;
 #endif
 #ifdef BN_MP_EXPTMOD_FAST_C
 #define TAB_SIZE 32
 #else
 #define TAB_SIZE 256
 #endif
-int
+int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
-mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
 {
 mp_int  M[TAB_SIZE], res;
 mp_digit buf, mp;
 int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
 /* determine and setup reduction code */
 if (redmode == 0) {
 #ifdef BN_MP_MONTGOMERY_SETUP_C
 /* now setup montgomery  */
 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
-goto __M;
+goto LBL_M;
 }
 #else
 err = MP_VAL;
-goto __M;
+goto LBL_M;
 #endif
 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
 if (((P->used * 2 + 1) < MP_WARRAY) &&
 #ifdef BN_MP_MONTGOMERY_REDUCE_C
 /* use slower baseline Montgomery method */
 redux = mp_montgomery_reduce;
 #else
 err = MP_VAL;
-goto __M;
+goto LBL_M;
 #endif
 }
 } else if (redmode == 1) {
 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
 /* setup DR reduction for moduli of the form B**k - b */
 mp_dr_setup(P, &mp);
 redux = mp_dr_reduce;
 #else
 err = MP_VAL;
-goto __M;
+goto LBL_M;
 #endif
 } else {
 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
 /* setup DR reduction for moduli of the form 2**k - b */
 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
-goto __M;
+goto LBL_M;
 }
 redux = mp_reduce_2k;
 #else
 err = MP_VAL;
-goto __M;
+goto LBL_M;
 #endif
 }
 /* setup result */
 if ((err = mp_init (&res)) != MP_OKAY) {
-goto __M;
+goto LBL_M;
 }
 /* create M table
 *
 if (redmode == 0) {
 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
 /* now we need R mod m */
 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 #else
 err = MP_VAL;
-goto __RES;
+goto LBL_RES;
 #endif
 /* now set M[1] to G * R mod m */
 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 } else {
 mp_set(&res, 1);
 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 }
 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 for (x = 0; x < (winsize - 1); x++) {
 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 }
 /* create upper table */
 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 }
 /* set initial mode and bit cnt */
 mode   = 0;
 }
 /* if the bit is zero and mode == 1 then we square */
 if (mode == 1 && y == 0) {
 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 if ((err = redux (&res, P, mp)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 continue;
 }
 /* else we add it to the window */
 if (bitcpy == winsize) {
 /* ok window is filled so square as required and multiply  */
 /* square first */
 for (x = 0; x < winsize; x++) {
 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 if ((err = redux (&res, P, mp)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 }
 /* then multiply */
 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 if ((err = redux (&res, P, mp)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 /* empty window and reset */
 bitcpy = 0;
 bitbuf = 0;
 /* if bits remain then square/multiply */
 if (mode == 2 && bitcpy > 0) {
 /* square then multiply if the bit is set */
 for (x = 0; x < bitcpy; x++) {
 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 if ((err = redux (&res, P, mp)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 /* get next bit of the window */
 bitbuf <<= 1;
 if ((bitbuf & (1 << winsize)) != 0) {
 /* then multiply */
 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 if ((err = redux (&res, P, mp)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 }
 }
 }
 * actually multiplied by R mod n.  So we have
 * to reduce one more time to cancel out the factor
 * of R.
 */
 if ((err = redux(&res, P, mp)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 }
 /* swap res with Y */
 mp_exch (&res, Y);
 err = MP_OKAY;
-__RES:mp_clear (&res);
+LBL_RES:mp_clear (&res);
-__M:
+LBL_M:
 mp_clear(&M[1]);
 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
 mp_clear (&M[x]);
 }
 return err;
 /* (v1,v2,v3) = (t1,t2,t3) */
 if ((err = mp_copy(&t1, &v1)) != MP_OKAY)                                  { goto _ERR; }
 if ((err = mp_copy(&t2, &v2)) != MP_OKAY)                                  { goto _ERR; }
 if ((err = mp_copy(&t3, &v3)) != MP_OKAY)                                  { goto _ERR; }
+}
+/* make sure U3 >= 0 */
+if (u3.sign == MP_NEG) {
+mp_neg(&u1, &u1);
+mp_neg(&u2, &u2);
+mp_neg(&u3, &u3);
 }
 /* copy result out */
 if (U1 != NULL) { mp_exch(U1, &u1); }
 if (U2 != NULL) { mp_exch(U2, &u2); }
 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
 return res;
 }
 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
-goto __U;
+goto LBL_U;
 }
 /* must be positive for the remainder of the algorithm */
 u.sign = v.sign = MP_ZPOS;
 k     = MIN(u_lsb, v_lsb);
 if (k > 0) {
 /* divide the power of two out */
 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
-goto __V;
+goto LBL_V;
 }
 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
-goto __V;
+goto LBL_V;
 }
 }
 /* divide any remaining factors of two out */
 if (u_lsb != k) {
 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
-goto __V;
+goto LBL_V;
 }
 }
 if (v_lsb != k) {
 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
-goto __V;
+goto LBL_V;
 }
 }
 while (mp_iszero(&v) == 0) {
 /* make sure v is the largest */
 mp_exch(&u, &v);
 }
 /* subtract smallest from largest */
 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
-goto __V;
+goto LBL_V;
 }
 /* Divide out all factors of two */
 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
-goto __V;
+goto LBL_V;
 }
 }
 /* multiply by 2**k which we divided out at the beginning */
 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
-goto __V;
+goto LBL_V;
 }
 c->sign = MP_ZPOS;
 res = MP_OKAY;
-__V:mp_clear (&u);
+LBL_V:mp_clear (&u);
-__U:mp_clear (&v);
+LBL_U:mp_clear (&v);
 return res;
 }
 #endif
 /* End: bn_mp_gcd.c */
 &A, &B, &C, &D, NULL)) != MP_OKAY) {
 return res;
 }
 /* x = a, y = b */
-if ((res = mp_copy (a, &x)) != MP_OKAY) {
+if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_copy (b, &y)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 /* 2. [modified] if x,y are both even then return an error! */
 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
 res = MP_VAL;
-goto __ERR;
+goto LBL_ERR;
 }
 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 mp_set (&A, 1);
 mp_set (&D, 1);
 top:
 /* 4.  while u is even do */
 while (mp_iseven (&u) == 1) {
 /* 4.1 u = u/2 */
 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 /* 4.2 if A or B is odd then */
 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
 /* A = (A+y)/2, B = (B-x)/2 */
 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* A = A/2, B = B/2 */
 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* 5.  while v is even do */
 while (mp_iseven (&v) == 1) {
 /* 5.1 v = v/2 */
 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 /* 5.2 if C or D is odd then */
 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
 /* C = (C+y)/2, D = (D-x)/2 */
 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* C = C/2, D = D/2 */
 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* 6.  if u >= v then */
 if (mp_cmp (&u, &v) != MP_LT) {
 /* u = u - v, A = A - C, B = B - D */
 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 } else {
 /* v - v - u, C = C - A, D = D - B */
 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* if not zero goto step 4 */
 if (mp_iszero (&u) == 0)
 /* now a = C, b = D, gcd == g*v */
 /* if v != 1 then there is no inverse */
 if (mp_cmp_d (&v, 1) != MP_EQ) {
 res = MP_VAL;
-goto __ERR;
+goto LBL_ERR;
 }
 /* if its too low */
 while (mp_cmp_d(&C, 0) == MP_LT) {
 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* too big */
 while (mp_cmp_mag(&C, b) != MP_LT) {
 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 }
 /* C is now the inverse */
 mp_exch (&C, c);
 res = MP_OKAY;
-__ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
+LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
 return res;
 }
 #endif
 /* End: bn_mp_invmod_slow.c */
 if ((res = mp_init_copy (&a1, a)) != MP_OKAY) {
 return res;
 }
 if ((res = mp_init (&p1)) != MP_OKAY) {
-goto __A1;
+goto LBL_A1;
 }
 /* divide out larger power of two */
 k = mp_cnt_lsb(&a1);
 if ((res = mp_div_2d(&a1, k, &a1, NULL)) != MP_OKAY) {
-goto __P1;
+goto LBL_P1;
 }
 /* step 4.  if e is even set s=1 */
 if ((k & 1) == 0) {
 s = 1;
 if (mp_cmp_d (&a1, 1) == MP_EQ) {
 *c = s;
 } else {
 /* n1 = n mod a1 */
 if ((res = mp_mod (p, &a1, &p1)) != MP_OKAY) {
-goto __P1;
+goto LBL_P1;
 }
 if ((res = mp_jacobi (&p1, &a1, &r)) != MP_OKAY) {
-goto __P1;
+goto LBL_P1;
 }
 *c = s * r;
 }
 /* done */
 res = MP_OKAY;
-__P1:mp_clear (&p1);
+LBL_P1:mp_clear (&p1);
-__A1:mp_clear (&a1);
+LBL_A1:mp_clear (&a1);
 return res;
 }
 #endif
 /* End: bn_mp_jacobi.c */
 return res;
 }
 /* t1 = get the GCD of the two inputs */
 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
-goto __T;
+goto LBL_T;
 }
 /* divide the smallest by the GCD */
 if (mp_cmp_mag(a, b) == MP_LT) {
 /* store quotient in t2 such that t2 * b is the LCM */
 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
-goto __T;
+goto LBL_T;
 }
 res = mp_mul(b, &t2, c);
 } else {
 /* store quotient in t2 such that t2 * a is the LCM */
 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
-goto __T;
+goto LBL_T;
 }
 res = mp_mul(a, &t2, c);
 }
 /* fix the sign to positive */
 c->sign = MP_ZPOS;
-__T:
+LBL_T:
 mp_clear_multi (&t1, &t2, NULL);
 return res;
 }
 #endif
 mp_zero (c);
 return MP_OKAY;
 }
 /* if the modulus is larger than the value than return */
-if (b > (int) (a->used * DIGIT_BIT)) {
+if (b >= (int) (a->used * DIGIT_BIT)) {
 res = mp_copy (a, c);
 return res;
 }
 /* copy */
 {
 int     x, bits, res;
 /* how many bits of last digit does b use */
 bits = mp_count_bits (b) % DIGIT_BIT;
 if (b->used > 1) {
 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
 return res;
 }
 /* send carry into next iteration */
 u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
 }
-/* store final carry [if any] */
+/* store final carry [if any] and increment ix offset  */
 *tmpc++ = u;
+++ix;
 /* now zero digits above the top */
 while (ix++ < olduse) {
 *tmpc++ = 0;
 }
 if ((res = mp_init (&t1)) != MP_OKAY) {
 return res;
 }
 if ((res = mp_init (&t2)) != MP_OKAY) {
-goto __T1;
+goto LBL_T1;
 }
 if ((res = mp_init (&t3)) != MP_OKAY) {
-goto __T2;
+goto LBL_T2;
 }
 /* if a is negative fudge the sign but keep track */
 neg     = a->sign;
 a->sign = MP_ZPOS;
 mp_set (&t2, 2);
 do {
 /* t1 = t2 */
 if ((res = mp_copy (&t2, &t1)) != MP_OKAY) {
-goto __T3;
+goto LBL_T3;
 }
 /* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */
 /* t3 = t1**(b-1) */
 if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) {
-goto __T3;
+goto LBL_T3;
 }
 /* numerator */
 /* t2 = t1**b */
 if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) {
-goto __T3;
+goto LBL_T3;
 }
 /* t2 = t1**b - a */
 if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) {
-goto __T3;
+goto LBL_T3;
 }
 /* denominator */
 /* t3 = t1**(b-1) * b  */
 if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) {
-goto __T3;
+goto LBL_T3;
 }
 /* t3 = (t1**b - a)/(b * t1**(b-1)) */
 if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) {
-goto __T3;
+goto LBL_T3;
 }
 if ((res = mp_sub (&t1, &t3, &t2)) != MP_OKAY) {
-goto __T3;
+goto LBL_T3;
 }
 }  while (mp_cmp (&t1, &t2) != MP_EQ);
 /* result can be off by a few so check */
 for (;;) {
 if ((res = mp_expt_d (&t1, b, &t2)) != MP_OKAY) {
-goto __T3;
+goto LBL_T3;
 }
 if (mp_cmp (&t2, a) == MP_GT) {
 if ((res = mp_sub_d (&t1, 1, &t1)) != MP_OKAY) {
-goto __T3;
+goto LBL_T3;
 }
 } else {
 break;
 }
 }
 /* set the sign of the result */
 c->sign = neg;
 res = MP_OKAY;
-__T3:mp_clear (&t3);
+LBL_T3:mp_clear (&t3);
-__T2:mp_clear (&t2);
+LBL_T2:mp_clear (&t2);
-__T1:mp_clear (&t1);
+LBL_T1:mp_clear (&t1);
 return res;
 }
 #endif
 /* End: bn_mp_n_root.c */
 /* b = -a */
 int mp_neg (mp_int * a, mp_int * b)
 {
 int     res;
-if ((res = mp_copy (a, b)) != MP_OKAY) {
+if (a != b) {
-return res;
+if ((res = mp_copy (a, b)) != MP_OKAY) {
-}
+return res;
+}
+}
 if (mp_iszero(b) != MP_YES) {
 b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS;
-}
+} else {
+b->sign = MP_ZPOS;
+}
 return MP_OKAY;
 }
 #endif
 /* End: bn_mp_neg.c */
 return err;
 }
 /* compute t = b**a mod a */
 if ((err = mp_exptmod (b, a, a, &t)) != MP_OKAY) {
-goto __T;
+goto LBL_T;
 }
 /* is it equal to b? */
 if (mp_cmp (&t, b) == MP_EQ) {
 *result = MP_YES;
 }
 err = MP_OKAY;
-__T:mp_clear (&t);
+LBL_T:mp_clear (&t);
 return err;
 }
 #endif
 /* End: bn_mp_prime_fermat.c */
 /* default to not */
 *result = MP_NO;
 for (ix = 0; ix < PRIME_SIZE; ix++) {
-/* what is a mod __prime_tab[ix] */
+/* what is a mod LBL_prime_tab[ix] */
-if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
+if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
 return err;
 }
 /* is the residue zero? */
 if (res == 0) {
 return MP_VAL;
 }
 /* is the input equal to one of the primes in the table? */
 for (ix = 0; ix < PRIME_SIZE; ix++) {
-if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
+if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
 *result = 1;
 return MP_OKAY;
 }
 }
 return err;
 }
 for (ix = 0; ix < t; ix++) {
 /* set the prime */
-mp_set (&b, __prime_tab[ix]);
+mp_set (&b, ltm_prime_tab[ix]);
 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
-goto __B;
+goto LBL_B;
 }
 if (res == MP_NO) {
-goto __B;
+goto LBL_B;
 }
 }
 /* passed the test */
 *result = MP_YES;
-__B:mp_clear (&b);
+LBL_B:mp_clear (&b);
 return err;
 }
 #endif
 /* End: bn_mp_prime_is_prime.c */
 /* get n1 = a - 1 */
 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
 return err;
 }
 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
-goto __N1;
+goto LBL_N1;
 }
 /* set 2**s * r = n1 */
 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
-goto __N1;
+goto LBL_N1;
 }
 /* count the number of least significant bits
 * which are zero
 */
 s = mp_cnt_lsb(&r);
 /* now divide n - 1 by 2**s */
 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
-goto __R;
+goto LBL_R;
 }
 /* compute y = b**r mod a */
 if ((err = mp_init (&y)) != MP_OKAY) {
-goto __R;
+goto LBL_R;
 }
 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 /* if y != 1 and y != n1 do */
 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
 j = 1;
 /* while j <= s-1 and y != n1 */
 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
-goto __Y;
+goto LBL_Y;
 }
 /* if y == 1 then composite */
 if (mp_cmp_d (&y, 1) == MP_EQ) {
-goto __Y;
+goto LBL_Y;
 }
 ++j;
 }
 /* if y != n1 then composite */
 if (mp_cmp (&y, &n1) != MP_EQ) {
-goto __Y;
+goto LBL_Y;
 }
 }
 /* probably prime now */
 *result = MP_YES;
-__Y:mp_clear (&y);
+LBL_Y:mp_clear (&y);
-__R:mp_clear (&r);
+LBL_R:mp_clear (&r);
-__N1:mp_clear (&n1);
+LBL_N1:mp_clear (&n1);
 return err;
 }
 #endif
 /* End: bn_mp_prime_miller_rabin.c */
 /* force positive */
 a->sign = MP_ZPOS;
 /* simple algo if a is less than the largest prime in the table */
-if (mp_cmp_d(a, __prime_tab[PRIME_SIZE-1]) == MP_LT) {
+if (mp_cmp_d(a, ltm_prime_tab[PRIME_SIZE-1]) == MP_LT) {
 /* find which prime it is bigger than */
 for (x = PRIME_SIZE - 2; x >= 0; x--) {
-if (mp_cmp_d(a, __prime_tab[x]) != MP_LT) {
+if (mp_cmp_d(a, ltm_prime_tab[x]) != MP_LT) {
 if (bbs_style == 1) {
 /* ok we found a prime smaller or
 * equal [so the next is larger]
 *
 * however, the prime must be
 * congruent to 3 mod 4
 */
-if ((__prime_tab[x + 1] & 3) != 3) {
+if ((ltm_prime_tab[x + 1] & 3) != 3) {
 /* scan upwards for a prime congruent to 3 mod 4 */
 for (y = x + 1; y < PRIME_SIZE; y++) {
-if ((__prime_tab[y] & 3) == 3) {
+if ((ltm_prime_tab[y] & 3) == 3) {
-mp_set(a, __prime_tab[y]);
+mp_set(a, ltm_prime_tab[y]);
 return MP_OKAY;
 }
 }
 }
 } else {
-mp_set(a, __prime_tab[x + 1]);
+mp_set(a, ltm_prime_tab[x + 1]);
 return MP_OKAY;
 }
 }
 }
 /* at this point a maybe 1 */
 }
 }
 /* generate the restable */
 for (x = 1; x < PRIME_SIZE; x++) {
-if ((err = mp_mod_d(a, __prime_tab[x], res_tab + x)) != MP_OKAY) {
+if ((err = mp_mod_d(a, ltm_prime_tab[x], res_tab + x)) != MP_OKAY) {
 return err;
 }
 }
 /* init temp used for Miller-Rabin Testing */
 for (x = 1; x < PRIME_SIZE; x++) {
 /* add the step to each residue */
 res_tab[x] += kstep;
 /* subtract the modulus [instead of using division] */
-if (res_tab[x] >= __prime_tab[x]) {
+if (res_tab[x] >= ltm_prime_tab[x]) {
-res_tab[x]  -= __prime_tab[x];
+res_tab[x]  -= ltm_prime_tab[x];
 }
 /* set flag if zero */
 if (res_tab[x] == 0) {
 y = 1;
 }
 } while (y == 1 && step < ((((mp_digit)1)<<DIGIT_BIT) - kstep));
 /* add the step */
 if ((err = mp_add_d(a, step, a)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 /* if didn't pass sieve and step == MAX then skip test */
 if (y == 1 && step >= ((((mp_digit)1)<<DIGIT_BIT) - kstep)) {
 continue;
 }
 /* is this prime? */
 for (x = 0; x < t; x++) {
-mp_set(&b, __prime_tab[t]);
+mp_set(&b, ltm_prime_tab[t]);
 if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
-goto __ERR;
+goto LBL_ERR;
 }
 if (res == MP_NO) {
 break;
 }
 }
 break;
 }
 }
 err = MP_OKAY;
-__ERR:
+LBL_ERR:
 mp_clear(&b);
 return err;
 }
 #endif
 if (flags & LTM_PRIME_SAFE) {
 flags |= LTM_PRIME_BBS;
 }
 /* calc the byte size */
-bsize = (size>>3)+(size&7?1:0);
+bsize = (size>>3) + ((size&7)?1:0);
 /* we need a buffer of bsize bytes */
 tmp = OPT_CAST(unsigned char) XMALLOC(bsize);
 if (tmp == NULL) {
 return MP_MEM;
 }
 /* calc the maskAND value for the MSbyte*/
-maskAND = 0xFF >> (8 - (size & 7));
+maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
 /* calc the maskOR_msb */
 maskOR_msb        = 0;
-maskOR_msb_offset = (size - 2) >> 3;
+maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
 if (flags & LTM_PRIME_2MSB_ON) {
 maskOR_msb     |= 1 << ((size - 2) & 7);
 } else if (flags & LTM_PRIME_2MSB_OFF) {
 maskAND        &= ~(1 << ((size - 2) & 7));
 }
 /* get the maskOR_lsb */
-maskOR_lsb         = 0;
+maskOR_lsb         = 1;
 if (flags & LTM_PRIME_BBS) {
 maskOR_lsb     |= 3;
 }
 do {
 /* make sure the radix is in range */
 if (radix < 2 || radix > 64) {
 return MP_VAL;
 }
+if (mp_iszero(a) == MP_YES) {
+*size = 2;
+return MP_OKAY;
+}
+/* digs is the digit count */
+digs = 0;
+/* if it's negative add one for the sign */
+if (a->sign == MP_NEG) {
+++digs;
+}
 /* init a copy of the input */
 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
 return res;
 }
-/* digs is the digit count */
+/* force temp to positive */
-digs = 0;
+t.sign = MP_ZPOS;
-/* if it's negative add one for the sign */
-if (t.sign == MP_NEG) {
-++digs;
-t.sign = MP_ZPOS;
-}
 /* fetch out all of the digits */
-while (mp_iszero (&t) == 0) {
+while (mp_iszero (&t) == MP_NO) {
 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
 mp_clear (&t);
 return res;
 }
 ++digs;
 return MP_OKAY;
 }
 /* first place a random non-zero digit */
 do {
-d = ((mp_digit) abs (rand ()));
+d = ((mp_digit) abs (rand ())) & MP_MASK;
 } while (d == 0);
 if ((res = mp_add_d (a, d, a)) != MP_OKAY) {
 return res;
 }
-while (digits-- > 0) {
+while (--digits > 0) {
 if ((res = mp_lshd (a, 1)) != MP_OKAY) {
 return res;
 }
 if ((res = mp_add_d (a, ((mp_digit) abs (rand ())), a)) != MP_OKAY) {
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
 /* read a string [ASCII] in a given radix */
-int mp_read_radix (mp_int * a, char *str, int radix)
+int mp_read_radix (mp_int * a, const char *str, int radix)
 {
 int     y, res, neg;
 char    ch;
 /* make sure the radix is ok */
 /* reduces x mod m, assumes 0 < x < m**2, mu is
 * precomputed via mp_reduce_setup.
 * From HAC pp.604 Algorithm 14.42
 */
-int
+int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
-mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
 {
 mp_int  q;
 int     res, um = m->used;
 /* q = x */
 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
 goto CLEANUP;
 }
 } else {
 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
-if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
+if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
 goto CLEANUP;
 }
 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
-if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
+if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
 goto CLEANUP;
 }
 #else
 {
 res = MP_VAL;
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
 /* reduces a modulo n where n is of the form 2**p - d */
-int
+int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
-mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
 {
 mp_int q;
 int    p, res;
 if ((res = mp_init(&q)) != MP_OKAY) {
 #endif
 /* End: bn_mp_reduce_2k.c */
+/* Start: bn_mp_reduce_2k_l.c */
+#include <tommath.h>
+#ifdef BN_MP_REDUCE_2K_L_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+*
+* LibTomMath is a library that provides multiple-precision
+* integer arithmetic as well as number theoretic functionality.
+*
+* The library was designed directly after the MPI library by
+* Michael Fromberger but has been written from scratch with
+* additional optimizations in place.
+*
+* The library is free for all purposes without any express
+* guarantee it works.
+*
+* Tom St Denis, [email protected], http://math.libtomcrypt.org
+*/
+/* reduces a modulo n where n is of the form 2**p - d
+This differs from reduce_2k since "d" can be larger
+than a single digit.
+*/
+int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
+{
+mp_int q;
+int    p, res;
+if ((res = mp_init(&q)) != MP_OKAY) {
+return res;
+}
+p = mp_count_bits(n);
+top:
+/* q = a/2**p, a = a mod 2**p */
+if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
+goto ERR;
+}
+/* q = q * d */
+if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
+goto ERR;
+}
+/* a = a + q */
+if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
+goto ERR;
+}
+if (mp_cmp_mag(a, n) != MP_LT) {
+s_mp_sub(a, n, a);
+goto top;
+}
+ERR:
+mp_clear(&q);
+return res;
+}
+#endif
+/* End: bn_mp_reduce_2k_l.c */
 /* Start: bn_mp_reduce_2k_setup.c */
 #include <tommath.h>
 #ifdef BN_MP_REDUCE_2K_SETUP_C
 /* LibTomMath, multiple-precision integer library -- Tom St Denis
 *
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
 /* determines the setup value */
-int
+int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
-mp_reduce_2k_setup(mp_int *a, mp_digit *d)
 {
 int res, p;
 mp_int tmp;
 if ((res = mp_init(&tmp)) != MP_OKAY) {
 }
 #endif
 /* End: bn_mp_reduce_2k_setup.c */
+/* Start: bn_mp_reduce_2k_setup_l.c */
+#include <tommath.h>
+#ifdef BN_MP_REDUCE_2K_SETUP_L_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+*
+* LibTomMath is a library that provides multiple-precision
+* integer arithmetic as well as number theoretic functionality.
+*
+* The library was designed directly after the MPI library by
+* Michael Fromberger but has been written from scratch with
+* additional optimizations in place.
+*
+* The library is free for all purposes without any express
+* guarantee it works.
+*
+* Tom St Denis, [email protected], http://math.libtomcrypt.org
+*/
+/* determines the setup value */
+int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
+{
+int    res;
+mp_int tmp;
+if ((res = mp_init(&tmp)) != MP_OKAY) {
+return res;
+}
+if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
+goto ERR;
+}
+if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
+goto ERR;
+}
+ERR:
+mp_clear(&tmp);
+return res;
+}
+#endif
+/* End: bn_mp_reduce_2k_setup_l.c */
 /* Start: bn_mp_reduce_is_2k.c */
 #include <tommath.h>
 #ifdef BN_MP_REDUCE_IS_2K_C
 /* LibTomMath, multiple-precision integer library -- Tom St Denis
 *
 {
 int ix, iy, iw;
 mp_digit iz;
 if (a->used == 0) {
-return 0;
+return MP_NO;
 } else if (a->used == 1) {
-return 1;
+return MP_YES;
 } else if (a->used > 1) {
 iy = mp_count_bits(a);
 iz = 1;
 iw = 1;
 /* Test every bit from the second digit up, must be 1 */
 for (ix = DIGIT_BIT; ix < iy; ix++) {
 if ((a->dp[iw] & iz) == 0) {
-return 0;
+return MP_NO;
 }
 iz <<= 1;
 if (iz > (mp_digit)MP_MASK) {
 ++iw;
 iz = 1;
 }
 }
 }
-return 1;
+return MP_YES;
 }
 #endif
 /* End: bn_mp_reduce_is_2k.c */
+/* Start: bn_mp_reduce_is_2k_l.c */
+#include <tommath.h>
+#ifdef BN_MP_REDUCE_IS_2K_L_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+*
+* LibTomMath is a library that provides multiple-precision
+* integer arithmetic as well as number theoretic functionality.
+*
+* The library was designed directly after the MPI library by
+* Michael Fromberger but has been written from scratch with
+* additional optimizations in place.
+*
+* The library is free for all purposes without any express
+* guarantee it works.
+*
+* Tom St Denis, [email protected], http://math.libtomcrypt.org
+*/
+/* determines if reduce_2k_l can be used */
+int mp_reduce_is_2k_l(mp_int *a)
+{
+int ix, iy;
+if (a->used == 0) {
+return MP_NO;
+} else if (a->used == 1) {
+return MP_YES;
+} else if (a->used > 1) {
+/* if more than half of the digits are -1 we're sold */
+for (iy = ix = 0; ix < a->used; ix++) {
+if (a->dp[ix] == MP_MASK) {
+++iy;
+}
+}
+return (iy >= (a->used/2)) ? MP_YES : MP_NO;
+}
+return MP_NO;
+}
+#endif
+/* End: bn_mp_reduce_is_2k_l.c */
 /* Start: bn_mp_reduce_setup.c */
 #include <tommath.h>
 #ifdef BN_MP_REDUCE_SETUP_C
 /* LibTomMath, multiple-precision integer library -- Tom St Denis
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
 /* store in signed [big endian] format */
-int
+int mp_to_signed_bin (mp_int * a, unsigned char *b)
-mp_to_signed_bin (mp_int * a, unsigned char *b)
 {
 int     res;
 if ((res = mp_to_unsigned_bin (a, b + 1)) != MP_OKAY) {
 return res;
 }
 #endif
 /* End: bn_mp_to_signed_bin.c */
+/* Start: bn_mp_to_signed_bin_n.c */
+#include <tommath.h>
+#ifdef BN_MP_TO_SIGNED_BIN_N_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+*
+* LibTomMath is a library that provides multiple-precision
+* integer arithmetic as well as number theoretic functionality.
+*
+* The library was designed directly after the MPI library by
+* Michael Fromberger but has been written from scratch with
+* additional optimizations in place.
+*
+* The library is free for all purposes without any express
+* guarantee it works.
+*
+* Tom St Denis, [email protected], http://math.libtomcrypt.org
+*/
+/* store in signed [big endian] format */
+int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
+{
+if (*outlen < (unsigned long)mp_signed_bin_size(a)) {
+return MP_VAL;
+}
+*outlen = mp_signed_bin_size(a);
+return mp_to_signed_bin(a, b);
+}
+#endif
+/* End: bn_mp_to_signed_bin_n.c */
 /* Start: bn_mp_to_unsigned_bin.c */
 #include <tommath.h>
 #ifdef BN_MP_TO_UNSIGNED_BIN_C
 /* LibTomMath, multiple-precision integer library -- Tom St Denis
 *
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
 /* store in unsigned [big endian] format */
-int
+int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
-mp_to_unsigned_bin (mp_int * a, unsigned char *b)
 {
 int     x, res;
 mp_int  t;
 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
 }
 #endif
 /* End: bn_mp_to_unsigned_bin.c */
+/* Start: bn_mp_to_unsigned_bin_n.c */
+#include <tommath.h>
+#ifdef BN_MP_TO_UNSIGNED_BIN_N_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis
+*
+* LibTomMath is a library that provides multiple-precision
+* integer arithmetic as well as number theoretic functionality.
+*
+* The library was designed directly after the MPI library by
+* Michael Fromberger but has been written from scratch with
+* additional optimizations in place.
+*
+* The library is free for all purposes without any express
+* guarantee it works.
+*
+* Tom St Denis, [email protected], http://math.libtomcrypt.org
+*/
+/* store in unsigned [big endian] format */
+int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
+{
+if (*outlen < (unsigned long)mp_unsigned_bin_size(a)) {
+return MP_VAL;
+}
+*outlen = mp_unsigned_bin_size(a);
+return mp_to_unsigned_bin(a, b);
+}
+#endif
+/* End: bn_mp_to_unsigned_bin_n.c */
 /* Start: bn_mp_toom_mul.c */
 #include <tommath.h>
 #ifdef BN_MP_TOOM_MUL_C
 /* LibTomMath, multiple-precision integer library -- Tom St Denis
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
 /* multiplication using the Toom-Cook 3-way algorithm
 *
-* Much more complicated than Karatsuba but has a lower asymptotic running time of
+* Much more complicated than Karatsuba but has a lower
-* O(N**1.464).  This algorithm is only particularly useful on VERY large
+* asymptotic running time of O(N**1.464).  This algorithm is
-* inputs (we're talking 1000s of digits here...).
+* only particularly useful on VERY large inputs
+* (we're talking 1000s of digits here...).
 */
 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
 {
 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
 int res, B;
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
 /* get the size for an unsigned equivalent */
-int
+int mp_unsigned_bin_size (mp_int * a)
-mp_unsigned_bin_size (mp_int * a)
 {
 int     size = mp_count_bits (a);
 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
 }
 #endif
 px = a->used;
 x = a;
 }
 for (ix = 0; ix < px; ix++) {
+t.dp[ix] ^= x->dp[ix];
 }
 mp_clamp (&t);
 mp_exch (c, &t);
 mp_clear (&t);
 return MP_OKAY;
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
 /* set to zero */
-void
+void mp_zero (mp_int * a)
-mp_zero (mp_int * a)
+{
-{
+int       n;
+mp_digit *tmp;
 a->sign = MP_ZPOS;
 a->used = 0;
-memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
+tmp = a->dp;
+for (n = 0; n < a->alloc; n++) {
+*tmp++ = 0;
+}
 }
 #endif
 /* End: bn_mp_zero.c */
 * The library is free for all purposes without any express
 * guarantee it works.
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
-const mp_digit __prime_tab[] = {
+const mp_digit ltm_prime_tab[] = {
 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
 #ifndef MP_8BIT
 #define TAB_SIZE 32
 #else
 #define TAB_SIZE 256
 #endif
-int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
+int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
 {
 mp_int  M[TAB_SIZE], res, mu;
 mp_digit buf;
 int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
+int (*redux)(mp_int*,mp_int*,mp_int*);
 /* find window size */
 x = mp_count_bits (X);
 if (x <= 7) {
 winsize = 2;
 }
 }
 /* create mu, used for Barrett reduction */
 if ((err = mp_init (&mu)) != MP_OKAY) {
-goto __M;
+goto LBL_M;
 }
-if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
-goto __MU;
+if (redmode == 0) {
-}
+if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
+goto LBL_MU;
+}
+redux = mp_reduce;
+} else {
+if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
+goto LBL_MU;
+}
+redux = mp_reduce_2k_l;
+}
 /* create M table
 *
 * The M table contains powers of the base,
 * e.g. M[x] = G**x mod P
 *
 * The first half of the table is not
 * computed though accept for M[0] and M[1]
 */
 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
-goto __MU;
+goto LBL_MU;
 }
 /* compute the value at M[1<<(winsize-1)] by squaring
 * M[1] (winsize-1) times
 */
 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
-goto __MU;
+goto LBL_MU;
 }
 for (x = 0; x < (winsize - 1); x++) {
+/* square it */
 if ((err = mp_sqr (&M[1 << (winsize - 1)],
 &M[1 << (winsize - 1)])) != MP_OKAY) {
-goto __MU;
+goto LBL_MU;
 }
-if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
-goto __MU;
+/* reduce modulo P */
+if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
+goto LBL_MU;
 }
 }
 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
 */
 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
-goto __MU;
+goto LBL_MU;
 }
-if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
+if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
-goto __MU;
+goto LBL_MU;
 }
 }
 /* setup result */
 if ((err = mp_init (&res)) != MP_OKAY) {
-goto __MU;
+goto LBL_MU;
 }
 mp_set (&res, 1);
 /* set initial mode and bit cnt */
 mode   = 0;
 }
 /* if the bit is zero and mode == 1 then we square */
 if (mode == 1 && y == 0) {
 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
-if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
+if ((err = redux (&res, P, &mu)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 continue;
 }
 /* else we add it to the window */
 if (bitcpy == winsize) {
 /* ok window is filled so square as required and multiply  */
 /* square first */
 for (x = 0; x < winsize; x++) {
 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
-if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
+if ((err = redux (&res, P, &mu)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 }
 /* then multiply */
 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
-if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
+if ((err = redux (&res, P, &mu)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 /* empty window and reset */
 bitcpy = 0;
 bitbuf = 0;
 /* if bits remain then square/multiply */
 if (mode == 2 && bitcpy > 0) {
 /* square then multiply if the bit is set */
 for (x = 0; x < bitcpy; x++) {
 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
-if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
+if ((err = redux (&res, P, &mu)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 bitbuf <<= 1;
 if ((bitbuf & (1 << winsize)) != 0) {
 /* then multiply */
 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
-if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
+if ((err = redux (&res, P, &mu)) != MP_OKAY) {
-goto __RES;
+goto LBL_RES;
 }
 }
 }
 }
 mp_exch (&res, Y);
 err = MP_OKAY;
-__RES:mp_clear (&res);
+LBL_RES:mp_clear (&res);
-__MU:mp_clear (&mu);
+LBL_MU:mp_clear (&mu);
-__M:
+LBL_M:
 mp_clear(&M[1]);
 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
 mp_clear (&M[x]);
 }
 return err;
 /* multiplies |a| * |b| and only computes upto digs digits of result
 * HAC pp. 595, Algorithm 14.12  Modified so you can control how
 * many digits of output are created.
 */
-int
+int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
-s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 {
 mp_int  t;
 int     res, pa, pb, ix, iy;
 mp_digit u;
 mp_word r;
 *
 * Tom St Denis, [email protected], http://math.libtomcrypt.org
 */
 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
-int
+int s_mp_sqr (mp_int * a, mp_int * b)
-s_mp_sqr (mp_int * a, mp_int * b)
 {
 mp_int  t;
 int     res, ix, iy, pa;
 mp_word r;
 mp_digit u, tmpx, *tmpt;
 /* Known optimal configurations
 CPU                    /Compiler     /MUL CUTOFF/SQR CUTOFF
 -------------------------------------------------------------
 Intel P4 Northwood     /GCC v3.4.1   /        88/       128/LTM 0.32 ;-)
+AMD Athlon64           /GCC v3.4.4   /        74/       124/LTM 0.34
 */
-int     KARATSUBA_MUL_CUTOFF = 88,      /* Min. number of digits before Karatsuba multiplication is used. */
+int     KARATSUBA_MUL_CUTOFF = 74,      /* Min. number of digits before Karatsuba multiplication is used. */
-KARATSUBA_SQR_CUTOFF = 128,     /* Min. number of digits before Karatsuba squaring is used. */
+KARATSUBA_SQR_CUTOFF = 124,     /* Min. number of digits before Karatsuba squaring is used. */
 TOOM_MUL_CUTOFF      = 350,      /* no optimal values of these are known yet so set em high */
 TOOM_SQR_CUTOFF      = 400;
 #endif

Mercurial > dropbear

comparison pre_gen/mpi.c @ 190:d8254fc979e9 libtommath-orig LTM_0.35