annotate libtommath/bn_s_mp_sqr_fast.c @ 1938:77bc00dcc19f default tip main master

Bump version to 2022.82
author Matt Johnston <matt@ucc.asn.au>
date Fri, 01 Apr 2022 14:43:27 +0800
parents 1051e4eea25a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1692
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
1 #include "tommath_private.h"
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
2 #ifdef BN_S_MP_SQR_FAST_C
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
4 /* SPDX-License-Identifier: Unlicense */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
5
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
6 /* the jist of squaring...
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
7 * you do like mult except the offset of the tmpx [one that
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
8 * starts closer to zero] can't equal the offset of tmpy.
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
9 * So basically you set up iy like before then you min it with
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
10 * (ty-tx) so that it never happens. You double all those
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
11 * you add in the inner loop
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
12
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
13 After that loop you do the squares and add them in.
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
14 */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
15
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
16 mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b)
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
17 {
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
18 int olduse, pa, ix, iz;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
19 mp_digit W[MP_WARRAY], *tmpx;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
20 mp_word W1;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
21 mp_err err;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
22
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
23 /* grow the destination as required */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
24 pa = a->used + a->used;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
25 if (b->alloc < pa) {
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
26 if ((err = mp_grow(b, pa)) != MP_OKAY) {
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
27 return err;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
28 }
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
29 }
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
30
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
31 /* number of output digits to produce */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
32 W1 = 0;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
33 for (ix = 0; ix < pa; ix++) {
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
34 int tx, ty, iy;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
35 mp_word _W;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
36 mp_digit *tmpy;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
37
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
38 /* clear counter */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
39 _W = 0;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
40
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
41 /* get offsets into the two bignums */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
42 ty = MP_MIN(a->used-1, ix);
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
43 tx = ix - ty;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
44
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
45 /* setup temp aliases */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
46 tmpx = a->dp + tx;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
47 tmpy = a->dp + ty;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
48
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
49 /* this is the number of times the loop will iterrate, essentially
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
50 while (tx++ < a->used && ty-- >= 0) { ... }
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
51 */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
52 iy = MP_MIN(a->used-tx, ty+1);
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
53
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
54 /* now for squaring tx can never equal ty
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
55 * we halve the distance since they approach at a rate of 2x
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
56 * and we have to round because odd cases need to be executed
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
57 */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
58 iy = MP_MIN(iy, ((ty-tx)+1)>>1);
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
59
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
60 /* execute loop */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
61 for (iz = 0; iz < iy; iz++) {
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
62 _W += (mp_word)*tmpx++ * (mp_word)*tmpy--;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
63 }
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
64
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
65 /* double the inner product and add carry */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
66 _W = _W + _W + W1;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
67
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
68 /* even columns have the square term in them */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
69 if (((unsigned)ix & 1u) == 0u) {
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
70 _W += (mp_word)a->dp[ix>>1] * (mp_word)a->dp[ix>>1];
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
71 }
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
72
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
73 /* store it */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
74 W[ix] = (mp_digit)_W & MP_MASK;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
75
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
76 /* make next carry */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
77 W1 = _W >> (mp_word)MP_DIGIT_BIT;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
78 }
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
79
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
80 /* setup dest */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
81 olduse = b->used;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
82 b->used = a->used+a->used;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
83
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
84 {
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
85 mp_digit *tmpb;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
86 tmpb = b->dp;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
87 for (ix = 0; ix < pa; ix++) {
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
88 *tmpb++ = W[ix] & MP_MASK;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
89 }
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
90
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
91 /* clear unused digits [that existed in the old copy of c] */
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
92 MP_ZERO_DIGITS(tmpb, olduse - ix);
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
93 }
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
94 mp_clamp(b);
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
95 return MP_OKAY;
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
96 }
1051e4eea25a Update LibTomMath to 1.2.0 (#84)
Steffen Jaeckel <s@jaeckel.eu>
parents:
diff changeset
97 #endif