Mercurial > dropbear
comparison libtommath/bn_fast_mp_montgomery_reduce.c @ 1655:f52919ffd3b1
update ltm to 1.1.0 and enable FIPS 186.4 compliant key-generation (#79)
* make key-generation compliant to FIPS 186.4
* fix includes in tommath_class.h
* update fuzzcorpus instead of error-out
* fixup fuzzing make-targets
* update Makefile.in
* apply necessary patches to ltm sources
* clean-up not required ltm files
* update to vanilla ltm 1.1.0
this already only contains the required files
* remove set/get double
author | Steffen Jaeckel <s_jaeckel@gmx.de> |
---|---|
date | Mon, 16 Sep 2019 15:50:38 +0200 |
parents | 8bba51a55704 |
children |
comparison
equal
deleted
inserted
replaced
1654:cc0fc5131c5c | 1655:f52919ffd3b1 |
---|---|
1 #include <tommath_private.h> | 1 #include "tommath_private.h" |
2 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C | 2 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C |
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis | 3 /* LibTomMath, multiple-precision integer library -- Tom St Denis |
4 * | 4 * |
5 * LibTomMath is a library that provides multiple-precision | 5 * LibTomMath is a library that provides multiple-precision |
6 * integer arithmetic as well as number theoretic functionality. | 6 * integer arithmetic as well as number theoretic functionality. |
7 * | 7 * |
8 * The library was designed directly after the MPI library by | 8 * The library was designed directly after the MPI library by |
9 * Michael Fromberger but has been written from scratch with | 9 * Michael Fromberger but has been written from scratch with |
10 * additional optimizations in place. | 10 * additional optimizations in place. |
11 * | 11 * |
12 * The library is free for all purposes without any express | 12 * SPDX-License-Identifier: Unlicense |
13 * guarantee it works. | |
14 * | |
15 * Tom St Denis, [email protected], http://libtom.org | |
16 */ | 13 */ |
17 | 14 |
18 /* computes xR**-1 == x (mod N) via Montgomery Reduction | 15 /* computes xR**-1 == x (mod N) via Montgomery Reduction |
19 * | 16 * |
20 * This is an optimized implementation of montgomery_reduce | 17 * This is an optimized implementation of montgomery_reduce |
21 * which uses the comba method to quickly calculate the columns of the | 18 * which uses the comba method to quickly calculate the columns of the |
22 * reduction. | 19 * reduction. |
23 * | 20 * |
24 * Based on Algorithm 14.32 on pp.601 of HAC. | 21 * Based on Algorithm 14.32 on pp.601 of HAC. |
25 */ | 22 */ |
26 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) | 23 int fast_mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho) |
27 { | 24 { |
28 int ix, res, olduse; | 25 int ix, res, olduse; |
29 mp_word W[MP_WARRAY]; | 26 mp_word W[MP_WARRAY]; |
30 | 27 |
31 /* get old used count */ | 28 if (x->used > (int)MP_WARRAY) { |
32 olduse = x->used; | 29 return MP_VAL; |
30 } | |
33 | 31 |
34 /* grow a as required */ | 32 /* get old used count */ |
35 if (x->alloc < (n->used + 1)) { | 33 olduse = x->used; |
36 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { | |
37 return res; | |
38 } | |
39 } | |
40 | 34 |
41 /* first we have to get the digits of the input into | 35 /* grow a as required */ |
42 * an array of double precision words W[...] | 36 if (x->alloc < (n->used + 1)) { |
43 */ | 37 if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) { |
44 { | 38 return res; |
45 mp_word *_W; | 39 } |
46 mp_digit *tmpx; | 40 } |
47 | 41 |
48 /* alias for the W[] array */ | 42 /* first we have to get the digits of the input into |
49 _W = W; | 43 * an array of double precision words W[...] |
44 */ | |
45 { | |
46 mp_word *_W; | |
47 mp_digit *tmpx; | |
50 | 48 |
51 /* alias for the digits of x*/ | 49 /* alias for the W[] array */ |
52 tmpx = x->dp; | 50 _W = W; |
53 | 51 |
54 /* copy the digits of a into W[0..a->used-1] */ | 52 /* alias for the digits of x*/ |
55 for (ix = 0; ix < x->used; ix++) { | 53 tmpx = x->dp; |
56 *_W++ = *tmpx++; | |
57 } | |
58 | 54 |
59 /* zero the high words of W[a->used..m->used*2] */ | 55 /* copy the digits of a into W[0..a->used-1] */ |
60 for (; ix < ((n->used * 2) + 1); ix++) { | 56 for (ix = 0; ix < x->used; ix++) { |
61 *_W++ = 0; | 57 *_W++ = *tmpx++; |
62 } | 58 } |
63 } | |
64 | 59 |
65 /* now we proceed to zero successive digits | 60 /* zero the high words of W[a->used..m->used*2] */ |
66 * from the least significant upwards | 61 for (; ix < ((n->used * 2) + 1); ix++) { |
67 */ | 62 *_W++ = 0; |
68 for (ix = 0; ix < n->used; ix++) { | 63 } |
69 /* mu = ai * m' mod b | 64 } |
70 * | |
71 * We avoid a double precision multiplication (which isn't required) | |
72 * by casting the value down to a mp_digit. Note this requires | |
73 * that W[ix-1] have the carry cleared (see after the inner loop) | |
74 */ | |
75 mp_digit mu; | |
76 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); | |
77 | 65 |
78 /* a = a + mu * m * b**i | 66 /* now we proceed to zero successive digits |
79 * | 67 * from the least significant upwards |
80 * This is computed in place and on the fly. The multiplication | 68 */ |
81 * by b**i is handled by offseting which columns the results | 69 for (ix = 0; ix < n->used; ix++) { |
82 * are added to. | 70 /* mu = ai * m' mod b |
83 * | 71 * |
84 * Note the comba method normally doesn't handle carries in the | 72 * We avoid a double precision multiplication (which isn't required) |
85 * inner loop In this case we fix the carry from the previous | 73 * by casting the value down to a mp_digit. Note this requires |
86 * column since the Montgomery reduction requires digits of the | 74 * that W[ix-1] have the carry cleared (see after the inner loop) |
87 * result (so far) [see above] to work. This is | 75 */ |
88 * handled by fixing up one carry after the inner loop. The | 76 mp_digit mu; |
89 * carry fixups are done in order so after these loops the | 77 mu = ((W[ix] & MP_MASK) * rho) & MP_MASK; |
90 * first m->used words of W[] have the carries fixed | |
91 */ | |
92 { | |
93 int iy; | |
94 mp_digit *tmpn; | |
95 mp_word *_W; | |
96 | 78 |
97 /* alias for the digits of the modulus */ | 79 /* a = a + mu * m * b**i |
98 tmpn = n->dp; | 80 * |
81 * This is computed in place and on the fly. The multiplication | |
82 * by b**i is handled by offseting which columns the results | |
83 * are added to. | |
84 * | |
85 * Note the comba method normally doesn't handle carries in the | |
86 * inner loop In this case we fix the carry from the previous | |
87 * column since the Montgomery reduction requires digits of the | |
88 * result (so far) [see above] to work. This is | |
89 * handled by fixing up one carry after the inner loop. The | |
90 * carry fixups are done in order so after these loops the | |
91 * first m->used words of W[] have the carries fixed | |
92 */ | |
93 { | |
94 int iy; | |
95 mp_digit *tmpn; | |
96 mp_word *_W; | |
99 | 97 |
100 /* Alias for the columns set by an offset of ix */ | 98 /* alias for the digits of the modulus */ |
101 _W = W + ix; | 99 tmpn = n->dp; |
102 | 100 |
103 /* inner loop */ | 101 /* Alias for the columns set by an offset of ix */ |
104 for (iy = 0; iy < n->used; iy++) { | 102 _W = W + ix; |
105 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); | 103 |
104 /* inner loop */ | |
105 for (iy = 0; iy < n->used; iy++) { | |
106 *_W++ += (mp_word)mu * (mp_word)*tmpn++; | |
107 } | |
106 } | 108 } |
107 } | |
108 | 109 |
109 /* now fix carry for next digit, W[ix+1] */ | 110 /* now fix carry for next digit, W[ix+1] */ |
110 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); | 111 W[ix + 1] += W[ix] >> (mp_word)DIGIT_BIT; |
111 } | 112 } |
112 | 113 |
113 /* now we have to propagate the carries and | 114 /* now we have to propagate the carries and |
114 * shift the words downward [all those least | 115 * shift the words downward [all those least |
115 * significant digits we zeroed]. | 116 * significant digits we zeroed]. |
116 */ | 117 */ |
117 { | 118 { |
118 mp_digit *tmpx; | 119 mp_digit *tmpx; |
119 mp_word *_W, *_W1; | 120 mp_word *_W, *_W1; |
120 | 121 |
121 /* nox fix rest of carries */ | 122 /* nox fix rest of carries */ |
122 | 123 |
123 /* alias for current word */ | 124 /* alias for current word */ |
124 _W1 = W + ix; | 125 _W1 = W + ix; |
125 | 126 |
126 /* alias for next word, where the carry goes */ | 127 /* alias for next word, where the carry goes */ |
127 _W = W + ++ix; | 128 _W = W + ++ix; |
128 | 129 |
129 for (; ix <= ((n->used * 2) + 1); ix++) { | 130 for (; ix <= ((n->used * 2) + 1); ix++) { |
130 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); | 131 *_W++ += *_W1++ >> (mp_word)DIGIT_BIT; |
131 } | 132 } |
132 | 133 |
133 /* copy out, A = A/b**n | 134 /* copy out, A = A/b**n |
134 * | 135 * |
135 * The result is A/b**n but instead of converting from an | 136 * The result is A/b**n but instead of converting from an |
136 * array of mp_word to mp_digit than calling mp_rshd | 137 * array of mp_word to mp_digit than calling mp_rshd |
137 * we just copy them in the right order | 138 * we just copy them in the right order |
138 */ | 139 */ |
139 | 140 |
140 /* alias for destination word */ | 141 /* alias for destination word */ |
141 tmpx = x->dp; | 142 tmpx = x->dp; |
142 | 143 |
143 /* alias for shifted double precision result */ | 144 /* alias for shifted double precision result */ |
144 _W = W + n->used; | 145 _W = W + n->used; |
145 | 146 |
146 for (ix = 0; ix < (n->used + 1); ix++) { | 147 for (ix = 0; ix < (n->used + 1); ix++) { |
147 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); | 148 *tmpx++ = *_W++ & (mp_word)MP_MASK; |
148 } | 149 } |
149 | 150 |
150 /* zero oldused digits, if the input a was larger than | 151 /* zero oldused digits, if the input a was larger than |
151 * m->used+1 we'll have to clear the digits | 152 * m->used+1 we'll have to clear the digits |
152 */ | 153 */ |
153 for (; ix < olduse; ix++) { | 154 for (; ix < olduse; ix++) { |
154 *tmpx++ = 0; | 155 *tmpx++ = 0; |
155 } | 156 } |
156 } | 157 } |
157 | 158 |
158 /* set the max used and clamp */ | 159 /* set the max used and clamp */ |
159 x->used = n->used + 1; | 160 x->used = n->used + 1; |
160 mp_clamp (x); | 161 mp_clamp(x); |
161 | 162 |
162 /* if A >= m then A = A - m */ | 163 /* if A >= m then A = A - m */ |
163 if (mp_cmp_mag (x, n) != MP_LT) { | 164 if (mp_cmp_mag(x, n) != MP_LT) { |
164 return s_mp_sub (x, n, x); | 165 return s_mp_sub(x, n, x); |
165 } | 166 } |
166 return MP_OKAY; | 167 return MP_OKAY; |
167 } | 168 } |
168 #endif | 169 #endif |
169 | 170 |
170 /* ref: $Format:%D$ */ | 171 /* ref: HEAD -> master, tag: v1.1.0 */ |
171 /* git commit: $Format:%H$ */ | 172 /* git commit: 08549ad6bc8b0cede0b357a9c341c5c6473a9c55 */ |
172 /* commit time: $Format:%ai$ */ | 173 /* commit time: 2019-01-28 20:32:32 +0100 */ |