Mercurial > dropbear
comparison bn_mp_karatsuba_mul.c @ 282:91fbc376f010 libtommath-orig libtommath-0.35
Import of libtommath 0.35
From ltm-0.35.tar.bz2 SHA1 of 3f193dbae9351e92d02530994fa18236f7fde01c
author | Matt Johnston <matt@ucc.asn.au> |
---|---|
date | Wed, 08 Mar 2006 13:16:18 +0000 |
parents | |
children | 97db060d0ef5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 282:91fbc376f010 |
---|---|
1 #include <tommath.h> | |
2 #ifdef BN_MP_KARATSUBA_MUL_C | |
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis | |
4 * | |
5 * LibTomMath is a library that provides multiple-precision | |
6 * integer arithmetic as well as number theoretic functionality. | |
7 * | |
8 * The library was designed directly after the MPI library by | |
9 * Michael Fromberger but has been written from scratch with | |
10 * additional optimizations in place. | |
11 * | |
12 * The library is free for all purposes without any express | |
13 * guarantee it works. | |
14 * | |
15 * Tom St Denis, [email protected], http://math.libtomcrypt.org | |
16 */ | |
17 | |
18 /* c = |a| * |b| using Karatsuba Multiplication using | |
19 * three half size multiplications | |
20 * | |
21 * Let B represent the radix [e.g. 2**DIGIT_BIT] and | |
22 * let n represent half of the number of digits in | |
23 * the min(a,b) | |
24 * | |
25 * a = a1 * B**n + a0 | |
26 * b = b1 * B**n + b0 | |
27 * | |
28 * Then, a * b => | |
29 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0 | |
30 * | |
31 * Note that a1b1 and a0b0 are used twice and only need to be | |
32 * computed once. So in total three half size (half # of | |
33 * digit) multiplications are performed, a0b0, a1b1 and | |
34 * (a1-b1)(a0-b0) | |
35 * | |
36 * Note that a multiplication of half the digits requires | |
37 * 1/4th the number of single precision multiplications so in | |
38 * total after one call 25% of the single precision multiplications | |
39 * are saved. Note also that the call to mp_mul can end up back | |
40 * in this function if the a0, a1, b0, or b1 are above the threshold. | |
41 * This is known as divide-and-conquer and leads to the famous | |
42 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than | |
43 * the standard O(N**2) that the baseline/comba methods use. | |
44 * Generally though the overhead of this method doesn't pay off | |
45 * until a certain size (N ~ 80) is reached. | |
46 */ | |
47 int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) | |
48 { | |
49 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; | |
50 int B, err; | |
51 | |
52 /* default the return code to an error */ | |
53 err = MP_MEM; | |
54 | |
55 /* min # of digits */ | |
56 B = MIN (a->used, b->used); | |
57 | |
58 /* now divide in two */ | |
59 B = B >> 1; | |
60 | |
61 /* init copy all the temps */ | |
62 if (mp_init_size (&x0, B) != MP_OKAY) | |
63 goto ERR; | |
64 if (mp_init_size (&x1, a->used - B) != MP_OKAY) | |
65 goto X0; | |
66 if (mp_init_size (&y0, B) != MP_OKAY) | |
67 goto X1; | |
68 if (mp_init_size (&y1, b->used - B) != MP_OKAY) | |
69 goto Y0; | |
70 | |
71 /* init temps */ | |
72 if (mp_init_size (&t1, B * 2) != MP_OKAY) | |
73 goto Y1; | |
74 if (mp_init_size (&x0y0, B * 2) != MP_OKAY) | |
75 goto T1; | |
76 if (mp_init_size (&x1y1, B * 2) != MP_OKAY) | |
77 goto X0Y0; | |
78 | |
79 /* now shift the digits */ | |
80 x0.used = y0.used = B; | |
81 x1.used = a->used - B; | |
82 y1.used = b->used - B; | |
83 | |
84 { | |
85 register int x; | |
86 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; | |
87 | |
88 /* we copy the digits directly instead of using higher level functions | |
89 * since we also need to shift the digits | |
90 */ | |
91 tmpa = a->dp; | |
92 tmpb = b->dp; | |
93 | |
94 tmpx = x0.dp; | |
95 tmpy = y0.dp; | |
96 for (x = 0; x < B; x++) { | |
97 *tmpx++ = *tmpa++; | |
98 *tmpy++ = *tmpb++; | |
99 } | |
100 | |
101 tmpx = x1.dp; | |
102 for (x = B; x < a->used; x++) { | |
103 *tmpx++ = *tmpa++; | |
104 } | |
105 | |
106 tmpy = y1.dp; | |
107 for (x = B; x < b->used; x++) { | |
108 *tmpy++ = *tmpb++; | |
109 } | |
110 } | |
111 | |
112 /* only need to clamp the lower words since by definition the | |
113 * upper words x1/y1 must have a known number of digits | |
114 */ | |
115 mp_clamp (&x0); | |
116 mp_clamp (&y0); | |
117 | |
118 /* now calc the products x0y0 and x1y1 */ | |
119 /* after this x0 is no longer required, free temp [x0==t2]! */ | |
120 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) | |
121 goto X1Y1; /* x0y0 = x0*y0 */ | |
122 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) | |
123 goto X1Y1; /* x1y1 = x1*y1 */ | |
124 | |
125 /* now calc x1-x0 and y1-y0 */ | |
126 if (mp_sub (&x1, &x0, &t1) != MP_OKAY) | |
127 goto X1Y1; /* t1 = x1 - x0 */ | |
128 if (mp_sub (&y1, &y0, &x0) != MP_OKAY) | |
129 goto X1Y1; /* t2 = y1 - y0 */ | |
130 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) | |
131 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */ | |
132 | |
133 /* add x0y0 */ | |
134 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) | |
135 goto X1Y1; /* t2 = x0y0 + x1y1 */ | |
136 if (mp_sub (&x0, &t1, &t1) != MP_OKAY) | |
137 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */ | |
138 | |
139 /* shift by B */ | |
140 if (mp_lshd (&t1, B) != MP_OKAY) | |
141 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ | |
142 if (mp_lshd (&x1y1, B * 2) != MP_OKAY) | |
143 goto X1Y1; /* x1y1 = x1y1 << 2*B */ | |
144 | |
145 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) | |
146 goto X1Y1; /* t1 = x0y0 + t1 */ | |
147 if (mp_add (&t1, &x1y1, c) != MP_OKAY) | |
148 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ | |
149 | |
150 /* Algorithm succeeded set the return code to MP_OKAY */ | |
151 err = MP_OKAY; | |
152 | |
153 X1Y1:mp_clear (&x1y1); | |
154 X0Y0:mp_clear (&x0y0); | |
155 T1:mp_clear (&t1); | |
156 Y1:mp_clear (&y1); | |
157 Y0:mp_clear (&y0); | |
158 X1:mp_clear (&x1); | |
159 X0:mp_clear (&x0); | |
160 ERR: | |
161 return err; | |
162 } | |
163 #endif |