monotone

monotone Mtn Source Tree

Root/botan/mp_comba.cpp

1/*************************************************
2* Comba Multiplication and Squaring Source File *
3* (C) 1999-2007 The Botan Project *
4*************************************************/
5
6#include <botan/mp_core.h>
7#include <botan/mp_asmi.h>
8
9namespace Botan {
10
11extern "C" {
12
13/*************************************************
14* Comba 4x4 Multiplication *
15*************************************************/
16void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
17 {
18 word w2 = 0, w1 = 0, w0 = 0;
19
20 word3_muladd(&w2, &w1, &w0, x[0], y[0]);
21 z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
22
23 word3_muladd(&w2, &w1, &w0, x[0], y[1]);
24 word3_muladd(&w2, &w1, &w0, x[1], y[0]);
25 z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
26
27 word3_muladd(&w2, &w1, &w0, x[0], y[2]);
28 word3_muladd(&w2, &w1, &w0, x[1], y[1]);
29 word3_muladd(&w2, &w1, &w0, x[2], y[0]);
30 z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
31
32 word3_muladd(&w2, &w1, &w0, x[0], y[3]);
33 word3_muladd(&w2, &w1, &w0, x[1], y[2]);
34 word3_muladd(&w2, &w1, &w0, x[2], y[1]);
35 word3_muladd(&w2, &w1, &w0, x[3], y[0]);
36 z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
37
38 word3_muladd(&w2, &w1, &w0, x[1], y[3]);
39 word3_muladd(&w2, &w1, &w0, x[2], y[2]);
40 word3_muladd(&w2, &w1, &w0, x[3], y[1]);
41 z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
42
43 word3_muladd(&w2, &w1, &w0, x[2], y[3]);
44 word3_muladd(&w2, &w1, &w0, x[3], y[2]);
45 z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
46
47 word3_muladd(&w2, &w1, &w0, x[3], y[3]);
48 z[6] = w0;
49 z[7] = w1;
50 }
51
52/*************************************************
53* Comba 6x6 Multiplication *
54*************************************************/
55void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
56 {
57 word w2 = 0, w1 = 0, w0 = 0;
58
59 word3_muladd(&w2, &w1, &w0, x[0], y[0]);
60 z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
61
62 word3_muladd(&w2, &w1, &w0, x[0], y[1]);
63 word3_muladd(&w2, &w1, &w0, x[1], y[0]);
64 z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
65
66 word3_muladd(&w2, &w1, &w0, x[0], y[2]);
67 word3_muladd(&w2, &w1, &w0, x[1], y[1]);
68 word3_muladd(&w2, &w1, &w0, x[2], y[0]);
69 z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
70
71 word3_muladd(&w2, &w1, &w0, x[0], y[3]);
72 word3_muladd(&w2, &w1, &w0, x[1], y[2]);
73 word3_muladd(&w2, &w1, &w0, x[2], y[1]);
74 word3_muladd(&w2, &w1, &w0, x[3], y[0]);
75 z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
76
77 word3_muladd(&w2, &w1, &w0, x[0], y[4]);
78 word3_muladd(&w2, &w1, &w0, x[1], y[3]);
79 word3_muladd(&w2, &w1, &w0, x[2], y[2]);
80 word3_muladd(&w2, &w1, &w0, x[3], y[1]);
81 word3_muladd(&w2, &w1, &w0, x[4], y[0]);
82 z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
83
84 word3_muladd(&w2, &w1, &w0, x[0], y[5]);
85 word3_muladd(&w2, &w1, &w0, x[1], y[4]);
86 word3_muladd(&w2, &w1, &w0, x[2], y[3]);
87 word3_muladd(&w2, &w1, &w0, x[3], y[2]);
88 word3_muladd(&w2, &w1, &w0, x[4], y[1]);
89 word3_muladd(&w2, &w1, &w0, x[5], y[0]);
90 z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
91
92 word3_muladd(&w2, &w1, &w0, x[1], y[5]);
93 word3_muladd(&w2, &w1, &w0, x[2], y[4]);
94 word3_muladd(&w2, &w1, &w0, x[3], y[3]);
95 word3_muladd(&w2, &w1, &w0, x[4], y[2]);
96 word3_muladd(&w2, &w1, &w0, x[5], y[1]);
97 z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
98
99 word3_muladd(&w2, &w1, &w0, x[2], y[5]);
100 word3_muladd(&w2, &w1, &w0, x[3], y[4]);
101 word3_muladd(&w2, &w1, &w0, x[4], y[3]);
102 word3_muladd(&w2, &w1, &w0, x[5], y[2]);
103 z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
104
105 word3_muladd(&w2, &w1, &w0, x[3], y[5]);
106 word3_muladd(&w2, &w1, &w0, x[4], y[4]);
107 word3_muladd(&w2, &w1, &w0, x[5], y[3]);
108 z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
109
110 word3_muladd(&w2, &w1, &w0, x[4], y[5]);
111 word3_muladd(&w2, &w1, &w0, x[5], y[4]);
112 z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
113
114 word3_muladd(&w2, &w1, &w0, x[5], y[5]);
115 z[10] = w0;
116 z[11] = w1;
117 }
118
119/*************************************************
120* Comba 8x8 Multiplication *
121*************************************************/
122void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
123 {
124 word w2 = 0, w1 = 0, w0 = 0;
125
126 word3_muladd(&w2, &w1, &w0, x[0], y[0]);
127 z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
128
129 word3_muladd(&w2, &w1, &w0, x[0], y[1]);
130 word3_muladd(&w2, &w1, &w0, x[1], y[0]);
131 z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
132
133 word3_muladd(&w2, &w1, &w0, x[0], y[2]);
134 word3_muladd(&w2, &w1, &w0, x[1], y[1]);
135 word3_muladd(&w2, &w1, &w0, x[2], y[0]);
136 z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
137
138 word3_muladd(&w2, &w1, &w0, x[0], y[3]);
139 word3_muladd(&w2, &w1, &w0, x[1], y[2]);
140 word3_muladd(&w2, &w1, &w0, x[2], y[1]);
141 word3_muladd(&w2, &w1, &w0, x[3], y[0]);
142 z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
143
144 word3_muladd(&w2, &w1, &w0, x[0], y[4]);
145 word3_muladd(&w2, &w1, &w0, x[1], y[3]);
146 word3_muladd(&w2, &w1, &w0, x[2], y[2]);
147 word3_muladd(&w2, &w1, &w0, x[3], y[1]);
148 word3_muladd(&w2, &w1, &w0, x[4], y[0]);
149 z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
150
151 word3_muladd(&w2, &w1, &w0, x[0], y[5]);
152 word3_muladd(&w2, &w1, &w0, x[1], y[4]);
153 word3_muladd(&w2, &w1, &w0, x[2], y[3]);
154 word3_muladd(&w2, &w1, &w0, x[3], y[2]);
155 word3_muladd(&w2, &w1, &w0, x[4], y[1]);
156 word3_muladd(&w2, &w1, &w0, x[5], y[0]);
157 z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
158
159 word3_muladd(&w2, &w1, &w0, x[0], y[6]);
160 word3_muladd(&w2, &w1, &w0, x[1], y[5]);
161 word3_muladd(&w2, &w1, &w0, x[2], y[4]);
162 word3_muladd(&w2, &w1, &w0, x[3], y[3]);
163 word3_muladd(&w2, &w1, &w0, x[4], y[2]);
164 word3_muladd(&w2, &w1, &w0, x[5], y[1]);
165 word3_muladd(&w2, &w1, &w0, x[6], y[0]);
166 z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
167
168 word3_muladd(&w2, &w1, &w0, x[0], y[7]);
169 word3_muladd(&w2, &w1, &w0, x[1], y[6]);
170 word3_muladd(&w2, &w1, &w0, x[2], y[5]);
171 word3_muladd(&w2, &w1, &w0, x[3], y[4]);
172 word3_muladd(&w2, &w1, &w0, x[4], y[3]);
173 word3_muladd(&w2, &w1, &w0, x[5], y[2]);
174 word3_muladd(&w2, &w1, &w0, x[6], y[1]);
175 word3_muladd(&w2, &w1, &w0, x[7], y[0]);
176 z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
177
178 word3_muladd(&w2, &w1, &w0, x[1], y[7]);
179 word3_muladd(&w2, &w1, &w0, x[2], y[6]);
180 word3_muladd(&w2, &w1, &w0, x[3], y[5]);
181 word3_muladd(&w2, &w1, &w0, x[4], y[4]);
182 word3_muladd(&w2, &w1, &w0, x[5], y[3]);
183 word3_muladd(&w2, &w1, &w0, x[6], y[2]);
184 word3_muladd(&w2, &w1, &w0, x[7], y[1]);
185 z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
186
187 word3_muladd(&w2, &w1, &w0, x[2], y[7]);
188 word3_muladd(&w2, &w1, &w0, x[3], y[6]);
189 word3_muladd(&w2, &w1, &w0, x[4], y[5]);
190 word3_muladd(&w2, &w1, &w0, x[5], y[4]);
191 word3_muladd(&w2, &w1, &w0, x[6], y[3]);
192 word3_muladd(&w2, &w1, &w0, x[7], y[2]);
193 z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
194
195 word3_muladd(&w2, &w1, &w0, x[3], y[7]);
196 word3_muladd(&w2, &w1, &w0, x[4], y[6]);
197 word3_muladd(&w2, &w1, &w0, x[5], y[5]);
198 word3_muladd(&w2, &w1, &w0, x[6], y[4]);
199 word3_muladd(&w2, &w1, &w0, x[7], y[3]);
200 z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
201
202 word3_muladd(&w2, &w1, &w0, x[4], y[7]);
203 word3_muladd(&w2, &w1, &w0, x[5], y[6]);
204 word3_muladd(&w2, &w1, &w0, x[6], y[5]);
205 word3_muladd(&w2, &w1, &w0, x[7], y[4]);
206 z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
207
208 word3_muladd(&w2, &w1, &w0, x[5], y[7]);
209 word3_muladd(&w2, &w1, &w0, x[6], y[6]);
210 word3_muladd(&w2, &w1, &w0, x[7], y[5]);
211 z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
212
213 word3_muladd(&w2, &w1, &w0, x[6], y[7]);
214 word3_muladd(&w2, &w1, &w0, x[7], y[6]);
215 z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
216
217 word3_muladd(&w2, &w1, &w0, x[7], y[7]);
218 z[14] = w0;
219 z[15] = w1;
220 }
221
222/*************************************************
223* Comba 4x4 Squaring *
224*************************************************/
225void bigint_comba_sqr4(word z[8], const word x[4])
226 {
227 word w2 = 0, w1 = 0, w0 = 0;
228
229 word3_muladd(&w2, &w1, &w0, x[0], x[0]);
230 z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
231
232 word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
233 z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
234
235 word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
236 word3_muladd(&w2, &w1, &w0, x[1], x[1]);
237 z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
238
239 word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
240 word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
241 z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
242
243 word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
244 word3_muladd(&w2, &w1, &w0, x[2], x[2]);
245 z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
246
247 word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
248 z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
249
250 word3_muladd(&w2, &w1, &w0, x[3], x[3]);
251 z[6] = w0;
252 z[7] = w1;
253 }
254
255/*************************************************
256* Comba 6x6 Squaring *
257*************************************************/
258void bigint_comba_sqr6(word z[12], const word x[6])
259 {
260 word w2 = 0, w1 = 0, w0 = 0;
261
262 word3_muladd(&w2, &w1, &w0, x[0], x[0]);
263 z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
264
265 word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
266 z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
267
268 word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
269 word3_muladd(&w2, &w1, &w0, x[1], x[1]);
270 z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
271
272 word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
273 word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
274 z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
275
276 word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
277 word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
278 word3_muladd(&w2, &w1, &w0, x[2], x[2]);
279 z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
280
281 word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
282 word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
283 word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
284 z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
285
286 word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
287 word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
288 word3_muladd(&w2, &w1, &w0, x[3], x[3]);
289 z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
290
291 word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
292 word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
293 z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
294
295 word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
296 word3_muladd(&w2, &w1, &w0, x[4], x[4]);
297 z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
298
299 word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
300 z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
301
302 word3_muladd(&w2, &w1, &w0, x[5], x[5]);
303 z[10] = w0;
304 z[11] = w1;
305 }
306
307/*************************************************
308* Comba 8x8 Squaring *
309*************************************************/
310void bigint_comba_sqr8(word z[16], const word x[8])
311 {
312 word w2 = 0, w1 = 0, w0 = 0;
313
314 word3_muladd(&w2, &w1, &w0, x[0], x[0]);
315 z[0] = w0; w0 = w1; w1 = w2; w2 = 0;
316
317 word3_muladd_2(&w2, &w1, &w0, x[0], x[1]);
318 z[1] = w0; w0 = w1; w1 = w2; w2 = 0;
319
320 word3_muladd_2(&w2, &w1, &w0, x[0], x[2]);
321 word3_muladd(&w2, &w1, &w0, x[1], x[1]);
322 z[2] = w0; w0 = w1; w1 = w2; w2 = 0;
323
324 word3_muladd_2(&w2, &w1, &w0, x[0], x[3]);
325 word3_muladd_2(&w2, &w1, &w0, x[1], x[2]);
326 z[3] = w0; w0 = w1; w1 = w2; w2 = 0;
327
328 word3_muladd_2(&w2, &w1, &w0, x[0], x[4]);
329 word3_muladd_2(&w2, &w1, &w0, x[1], x[3]);
330 word3_muladd(&w2, &w1, &w0, x[2], x[2]);
331 z[4] = w0; w0 = w1; w1 = w2; w2 = 0;
332
333 word3_muladd_2(&w2, &w1, &w0, x[0], x[5]);
334 word3_muladd_2(&w2, &w1, &w0, x[1], x[4]);
335 word3_muladd_2(&w2, &w1, &w0, x[2], x[3]);
336 z[5] = w0; w0 = w1; w1 = w2; w2 = 0;
337
338 word3_muladd_2(&w2, &w1, &w0, x[0], x[6]);
339 word3_muladd_2(&w2, &w1, &w0, x[1], x[5]);
340 word3_muladd_2(&w2, &w1, &w0, x[2], x[4]);
341 word3_muladd(&w2, &w1, &w0, x[3], x[3]);
342 z[6] = w0; w0 = w1; w1 = w2; w2 = 0;
343
344 word3_muladd_2(&w2, &w1, &w0, x[0], x[7]);
345 word3_muladd_2(&w2, &w1, &w0, x[1], x[6]);
346 word3_muladd_2(&w2, &w1, &w0, x[2], x[5]);
347 word3_muladd_2(&w2, &w1, &w0, x[3], x[4]);
348 z[7] = w0; w0 = w1; w1 = w2; w2 = 0;
349
350 word3_muladd_2(&w2, &w1, &w0, x[1], x[7]);
351 word3_muladd_2(&w2, &w1, &w0, x[2], x[6]);
352 word3_muladd_2(&w2, &w1, &w0, x[3], x[5]);
353 word3_muladd(&w2, &w1, &w0, x[4], x[4]);
354 z[8] = w0; w0 = w1; w1 = w2; w2 = 0;
355
356 word3_muladd_2(&w2, &w1, &w0, x[2], x[7]);
357 word3_muladd_2(&w2, &w1, &w0, x[3], x[6]);
358 word3_muladd_2(&w2, &w1, &w0, x[4], x[5]);
359 z[9] = w0; w0 = w1; w1 = w2; w2 = 0;
360
361 word3_muladd_2(&w2, &w1, &w0, x[3], x[7]);
362 word3_muladd_2(&w2, &w1, &w0, x[4], x[6]);
363 word3_muladd(&w2, &w1, &w0, x[5], x[5]);
364 z[10] = w0; w0 = w1; w1 = w2; w2 = 0;
365
366 word3_muladd_2(&w2, &w1, &w0, x[4], x[7]);
367 word3_muladd_2(&w2, &w1, &w0, x[5], x[6]);
368 z[11] = w0; w0 = w1; w1 = w2; w2 = 0;
369
370 word3_muladd_2(&w2, &w1, &w0, x[5], x[7]);
371 word3_muladd(&w2, &w1, &w0, x[6], x[6]);
372 z[12] = w0; w0 = w1; w1 = w2; w2 = 0;
373
374 word3_muladd_2(&w2, &w1, &w0, x[6], x[7]);
375 z[13] = w0; w0 = w1; w1 = w2; w2 = 0;
376
377 word3_muladd(&w2, &w1, &w0, x[7], x[7]);
378 z[14] = w0;
379 z[15] = w1;
380 }
381
382}
383
384}

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status