Mercurial > dropbear
annotate libtommath/bn_s_mp_sqr_fast.c @ 1938:77bc00dcc19f default tip main master
Bump version to 2022.82
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Fri, 01 Apr 2022 14:43:27 +0800 |
parents | 1051e4eea25a |
children |
rev | line source |
---|---|
1692
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
1 #include "tommath_private.h" |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
2 #ifdef BN_S_MP_SQR_FAST_C |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
4 /* SPDX-License-Identifier: Unlicense */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
5 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
6 /* the jist of squaring... |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
7 * you do like mult except the offset of the tmpx [one that |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
8 * starts closer to zero] can't equal the offset of tmpy. |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
9 * So basically you set up iy like before then you min it with |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
10 * (ty-tx) so that it never happens. You double all those |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
11 * you add in the inner loop |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
12 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
13 After that loop you do the squares and add them in. |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
14 */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
15 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
16 mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b) |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
17 { |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
18 int olduse, pa, ix, iz; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
19 mp_digit W[MP_WARRAY], *tmpx; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
20 mp_word W1; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
21 mp_err err; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
22 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
23 /* grow the destination as required */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
24 pa = a->used + a->used; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
25 if (b->alloc < pa) { |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
26 if ((err = mp_grow(b, pa)) != MP_OKAY) { |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
27 return err; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
28 } |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
29 } |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
30 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
31 /* number of output digits to produce */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
32 W1 = 0; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
33 for (ix = 0; ix < pa; ix++) { |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
34 int tx, ty, iy; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
35 mp_word _W; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
36 mp_digit *tmpy; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
37 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
38 /* clear counter */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
39 _W = 0; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
40 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
41 /* get offsets into the two bignums */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
42 ty = MP_MIN(a->used-1, ix); |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
43 tx = ix - ty; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
44 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
45 /* setup temp aliases */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
46 tmpx = a->dp + tx; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
47 tmpy = a->dp + ty; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
48 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
49 /* this is the number of times the loop will iterrate, essentially |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
50 while (tx++ < a->used && ty-- >= 0) { ... } |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
51 */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
52 iy = MP_MIN(a->used-tx, ty+1); |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
53 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
54 /* now for squaring tx can never equal ty |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
55 * we halve the distance since they approach at a rate of 2x |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
56 * and we have to round because odd cases need to be executed |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
57 */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
58 iy = MP_MIN(iy, ((ty-tx)+1)>>1); |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
59 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
60 /* execute loop */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
61 for (iz = 0; iz < iy; iz++) { |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
62 _W += (mp_word)*tmpx++ * (mp_word)*tmpy--; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
63 } |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
64 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
65 /* double the inner product and add carry */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
66 _W = _W + _W + W1; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
67 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
68 /* even columns have the square term in them */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
69 if (((unsigned)ix & 1u) == 0u) { |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
70 _W += (mp_word)a->dp[ix>>1] * (mp_word)a->dp[ix>>1]; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
71 } |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
72 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
73 /* store it */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
74 W[ix] = (mp_digit)_W & MP_MASK; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
75 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
76 /* make next carry */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
77 W1 = _W >> (mp_word)MP_DIGIT_BIT; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
78 } |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
79 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
80 /* setup dest */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
81 olduse = b->used; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
82 b->used = a->used+a->used; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
83 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
84 { |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
85 mp_digit *tmpb; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
86 tmpb = b->dp; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
87 for (ix = 0; ix < pa; ix++) { |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
88 *tmpb++ = W[ix] & MP_MASK; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
89 } |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
90 |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
91 /* clear unused digits [that existed in the old copy of c] */ |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
92 MP_ZERO_DIGITS(tmpb, olduse - ix); |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
93 } |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
94 mp_clamp(b); |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
95 return MP_OKAY; |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
96 } |
1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff
changeset
|
97 #endif |