Mercurial > dropbear
comparison libtommath/bn_s_mp_montgomery_reduce_fast.c @ 1692:1051e4eea25a
Update LibTomMath to 1.2.0 (#84)
* update C files
* update other files
* update headers
* update makefiles
* remove mp_set/get_double()
* use ltm 1.2.0 API
* update ltm_desc
* use bundled tommath if system-tommath is too old
* XMALLOC etc. were changed to MP_MALLOC etc.
author | Steffen Jaeckel <s@jaeckel.eu> |
---|---|
date | Tue, 26 May 2020 17:36:47 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1691:2d3745d58843 | 1692:1051e4eea25a |
---|---|
1 #include "tommath_private.h" | |
2 #ifdef BN_S_MP_MONTGOMERY_REDUCE_FAST_C | |
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis */ | |
4 /* SPDX-License-Identifier: Unlicense */ | |
5 | |
6 /* computes xR**-1 == x (mod N) via Montgomery Reduction | |
7 * | |
8 * This is an optimized implementation of montgomery_reduce | |
9 * which uses the comba method to quickly calculate the columns of the | |
10 * reduction. | |
11 * | |
12 * Based on Algorithm 14.32 on pp.601 of HAC. | |
13 */ | |
14 mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho) | |
15 { | |
16 int ix, olduse; | |
17 mp_err err; | |
18 mp_word W[MP_WARRAY]; | |
19 | |
20 if (x->used > MP_WARRAY) { | |
21 return MP_VAL; | |
22 } | |
23 | |
24 /* get old used count */ | |
25 olduse = x->used; | |
26 | |
27 /* grow a as required */ | |
28 if (x->alloc < (n->used + 1)) { | |
29 if ((err = mp_grow(x, n->used + 1)) != MP_OKAY) { | |
30 return err; | |
31 } | |
32 } | |
33 | |
34 /* first we have to get the digits of the input into | |
35 * an array of double precision words W[...] | |
36 */ | |
37 { | |
38 mp_word *_W; | |
39 mp_digit *tmpx; | |
40 | |
41 /* alias for the W[] array */ | |
42 _W = W; | |
43 | |
44 /* alias for the digits of x*/ | |
45 tmpx = x->dp; | |
46 | |
47 /* copy the digits of a into W[0..a->used-1] */ | |
48 for (ix = 0; ix < x->used; ix++) { | |
49 *_W++ = *tmpx++; | |
50 } | |
51 | |
52 /* zero the high words of W[a->used..m->used*2] */ | |
53 if (ix < ((n->used * 2) + 1)) { | |
54 MP_ZERO_BUFFER(_W, sizeof(mp_word) * (size_t)(((n->used * 2) + 1) - ix)); | |
55 } | |
56 } | |
57 | |
58 /* now we proceed to zero successive digits | |
59 * from the least significant upwards | |
60 */ | |
61 for (ix = 0; ix < n->used; ix++) { | |
62 /* mu = ai * m' mod b | |
63 * | |
64 * We avoid a double precision multiplication (which isn't required) | |
65 * by casting the value down to a mp_digit. Note this requires | |
66 * that W[ix-1] have the carry cleared (see after the inner loop) | |
67 */ | |
68 mp_digit mu; | |
69 mu = ((W[ix] & MP_MASK) * rho) & MP_MASK; | |
70 | |
71 /* a = a + mu * m * b**i | |
72 * | |
73 * This is computed in place and on the fly. The multiplication | |
74 * by b**i is handled by offseting which columns the results | |
75 * are added to. | |
76 * | |
77 * Note the comba method normally doesn't handle carries in the | |
78 * inner loop In this case we fix the carry from the previous | |
79 * column since the Montgomery reduction requires digits of the | |
80 * result (so far) [see above] to work. This is | |
81 * handled by fixing up one carry after the inner loop. The | |
82 * carry fixups are done in order so after these loops the | |
83 * first m->used words of W[] have the carries fixed | |
84 */ | |
85 { | |
86 int iy; | |
87 mp_digit *tmpn; | |
88 mp_word *_W; | |
89 | |
90 /* alias for the digits of the modulus */ | |
91 tmpn = n->dp; | |
92 | |
93 /* Alias for the columns set by an offset of ix */ | |
94 _W = W + ix; | |
95 | |
96 /* inner loop */ | |
97 for (iy = 0; iy < n->used; iy++) { | |
98 *_W++ += (mp_word)mu * (mp_word)*tmpn++; | |
99 } | |
100 } | |
101 | |
102 /* now fix carry for next digit, W[ix+1] */ | |
103 W[ix + 1] += W[ix] >> (mp_word)MP_DIGIT_BIT; | |
104 } | |
105 | |
106 /* now we have to propagate the carries and | |
107 * shift the words downward [all those least | |
108 * significant digits we zeroed]. | |
109 */ | |
110 { | |
111 mp_digit *tmpx; | |
112 mp_word *_W, *_W1; | |
113 | |
114 /* nox fix rest of carries */ | |
115 | |
116 /* alias for current word */ | |
117 _W1 = W + ix; | |
118 | |
119 /* alias for next word, where the carry goes */ | |
120 _W = W + ++ix; | |
121 | |
122 for (; ix < ((n->used * 2) + 1); ix++) { | |
123 *_W++ += *_W1++ >> (mp_word)MP_DIGIT_BIT; | |
124 } | |
125 | |
126 /* copy out, A = A/b**n | |
127 * | |
128 * The result is A/b**n but instead of converting from an | |
129 * array of mp_word to mp_digit than calling mp_rshd | |
130 * we just copy them in the right order | |
131 */ | |
132 | |
133 /* alias for destination word */ | |
134 tmpx = x->dp; | |
135 | |
136 /* alias for shifted double precision result */ | |
137 _W = W + n->used; | |
138 | |
139 for (ix = 0; ix < (n->used + 1); ix++) { | |
140 *tmpx++ = *_W++ & (mp_word)MP_MASK; | |
141 } | |
142 | |
143 /* zero oldused digits, if the input a was larger than | |
144 * m->used+1 we'll have to clear the digits | |
145 */ | |
146 MP_ZERO_DIGITS(tmpx, olduse - ix); | |
147 } | |
148 | |
149 /* set the max used and clamp */ | |
150 x->used = n->used + 1; | |
151 mp_clamp(x); | |
152 | |
153 /* if A >= m then A = A - m */ | |
154 if (mp_cmp_mag(x, n) != MP_LT) { | |
155 return s_mp_sub(x, n, x); | |
156 } | |
157 return MP_OKAY; | |
158 } | |
159 #endif |