1 /**
2 * Comba Multiplication / Squaring
3 * 
4 * Copyright:
5 * (C) 1999-2010,2014 Jack Lloyd
6 * (C) 2014-2015 Etienne Cimon
7 *      2006 Luca Piccarreta
8 *
9 * License:
10 * Botan is released under the Simplified BSD License (see LICENSE.md)
11 */
12 module botan_math.mp_comba;
13 
14 import botan_math.mp_word;
15 /*
16 * Comba 4x4 Squaring
17 */
18 void bigint_comba_sqr4(ref word[8] z, const ref word[4] x)
19 {
20 	word w2 = 0, w1 = 0, w0 = 0;
21 	
22 	word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]);
23 	z[ 0] = w0; w0 = 0;
24 	
25 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]);
26 	z[ 1] = w1; w1 = 0;
27 	
28 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]);
29 	word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]);
30 	z[ 2] = w2; w2 = 0;
31 	
32 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]);
33 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]);
34 	z[ 3] = w0; w0 = 0;
35 	
36 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]);
37 	word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]);
38 	
39 	z[ 4] = w1; w1 = 0;
40 	
41 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]);
42 	z[ 5] = w2; w2 = 0;
43 	
44 	word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]);
45 	z[ 6] = w0;
46 	z[ 7] = w1;
47 }
48 
49 /*
50 * Comba 4x4 Multiplication
51 */
52 void bigint_comba_mul4(ref word[8] z, const ref word[4] x, const ref word[4] y)
53 {
54 	version(D_InlineAsm_X86_64) {		
55 		import botan_math.x86_64.mp_comba_mul;
56 		mixin(mp_bigint_comba_mul!4);
57 	} 
58 	else
59 	{
60 		word w2 = 0, w1 = 0, w0 = 0;
61 		word carry;
62 		{
63 			carry = w0;
64 			w0 = word_madd2(x[0], y[0], &carry);
65 			w1 += carry;
66 			w2 += (w1 < carry) ? 1 : 0;
67 		}
68 		z[ 0] = w0; w0 = 0;
69 		
70 		{ //2
71 			carry = w1;
72 			w1 = word_madd2(x[0], y[1], &carry);
73 			w2 += carry;
74 			w0 += (w2 < carry) ? 1 : 0;
75 		}
76 		{
77 			carry = w1;
78 			w1 = word_madd2(x[1], y[0], &carry);
79 			w2 += carry;
80 			w0 += (w2 < carry) ? 1 : 0;
81 		}
82 		z[ 1] = w1; w1 = 0;
83 		
84 		{ //4
85 			carry = w2;
86 			w2 = word_madd2(x[0], y[2], &carry);
87 			w0 += carry;
88 			w1 += (w0 < carry) ? 1 : 0;
89 		}
90 		{ //5
91 			carry = w2;
92 			w2 = word_madd2(x[1], y[1], &carry);
93 			w0 += carry;
94 			w1 += (w0 < carry) ? 1 : 0;
95 		}
96 		{ //6
97 			carry = w2;
98 			w2 = word_madd2(x[2], y[0], &carry);
99 			w0 += carry;
100 			w1 += (w0 < carry) ? 1 : 0;
101 		}
102 		z[ 2] = w2; w2 = 0;
103 		
104 		{ //7
105 			carry = w0;
106 			w0 = word_madd2(x[0], y[3], &carry);
107 			w1 += carry;
108 			w2 += (w1 < carry) ? 1 : 0;
109 		}
110 		{//8
111 			carry = w0;
112 			w0 = word_madd2(x[1], y[2], &carry);
113 			w1 += carry;
114 			w2 += (w1 < carry) ? 1 : 0;
115 		}
116 		{//9
117 			carry = w0;
118 			w0 = word_madd2(x[2], y[1], &carry);
119 			w1 += carry;
120 			w2 += (w1 < carry) ? 1 : 0;
121 		}
122 		{//10
123 			carry = w0;
124 			w0 = word_madd2(x[3], y[0], &carry);
125 			w1 += carry;
126 			w2 += (w1 < carry) ? 1 : 0;
127 		}
128 		z[ 3] = w0; w0 = 0;
129 		
130 		{//11
131 			carry = w1;
132 			w1 = word_madd2(x[1], y[3], &carry);
133 			w2 += carry;
134 			w0 += (w2 < carry) ? 1 : 0;
135 		}
136 		{//12
137 			carry = w1;
138 			w1 = word_madd2(x[2], y[2], &carry);
139 			w2 += carry;
140 			w0 += (w2 < carry) ? 1 : 0;
141 		}
142 		{//13
143 			carry = w1;
144 			w1 = word_madd2(x[3], y[1], &carry);
145 			w2 += carry;
146 			w0 += (w2 < carry) ? 1 : 0;
147 		}
148 		z[ 4] = w1; w1 = 0;
149 		
150 		{//14
151 			carry = w2;
152 			w2 = word_madd2(x[2], y[3], &carry);
153 			w0 += carry;
154 			w1 += (w0 < carry) ? 1 : 0;
155 		}
156 		{//15
157 			carry = w2;
158 			w2 = word_madd2(x[3], y[2], &carry);
159 			w0 += carry;
160 			w1 += (w0 < carry) ? 1 : 0;
161 		}
162 		z[ 5] = w2; w2 = 0;
163 		
164 		{//16
165 			carry = w0;
166 			w0 = word_madd2(x[3], y[3], &carry);
167 			w1 += carry;
168 			w2 += (w1 < carry) ? 1 : 0;
169 		}
170 		z[ 6] = w0;
171 		z[ 7] = w1;
172 	}
173 }
174 
175 /*
176 * Comba 6x6 Squaring
177 */
178 void bigint_comba_sqr6(ref word[12] z, const ref word[6] x)
179 {
180 	word w2 = 0, w1 = 0, w0 = 0;
181 	
182 	word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]);
183 	z[ 0] = w0; w0 = 0;
184 	
185 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]);
186 	z[ 1] = w1; w1 = 0;
187 	
188 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]);
189 	word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]);
190 	z[ 2] = w2; w2 = 0;
191 	
192 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]);
193 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]);
194 	z[ 3] = w0; w0 = 0;
195 	
196 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]);
197 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]);
198 	word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]);
199 	z[ 4] = w1; w1 = 0;
200 	
201 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]);
202 	word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]);
203 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]);
204 	z[ 5] = w2; w2 = 0;
205 	
206 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]);
207 	word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]);
208 	word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]);
209 	z[ 6] = w0; w0 = 0;
210 	
211 	word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]);
212 	word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]);
213 	z[ 7] = w1; w1 = 0;
214 	
215 	word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]);
216 	word3_muladd(&w1, &w0, &w2, x[ 4], x[ 4]);
217 	z[ 8] = w2; w2 = 0;
218 	
219 	word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]);
220 	z[ 9] = w0; w0 = 0;
221 	
222 	word3_muladd(&w0, &w2, &w1, x[ 5], x[ 5]);
223 	z[10] = w1;
224 	z[11] = w2;
225 }
226 
227 /*
228 * Comba 6x6 Multiplication
229 */
230 void bigint_comba_mul6(ref word[12] z, const ref word[6] x, const ref word[6] y)
231 {
232 	version(D_InlineAsm_X86_64) {
233 
234 		import botan_math.x86_64.mp_comba_mul;
235 		mixin(mp_bigint_comba_mul!6);
236 	} else
237 	{
238 
239 		word w2 = 0, w1 = 0, w0 = 0;
240 		word carry;
241 		{
242 			carry = w0;
243 			w0 = word_madd2(x[0], y[0], &carry);
244 			w1 += carry;
245 			w2 += (w1 < carry) ? 1 : 0;
246 		}
247 		z[ 0] = w0; w0 = 0;
248 		
249 		{
250 			carry = w1;
251 			w1 = word_madd2(x[0], y[1], &carry);
252 			w2 += carry;
253 			w0 += (w2 < carry) ? 1 : 0;
254 		}
255 		{
256 			carry = w1;
257 			w1 = word_madd2(x[1], y[0], &carry);
258 			w2 += carry;
259 			w0 += (w2 < carry) ? 1 : 0;
260 		}
261 		z[ 1] = w1; w1 = 0;
262 		
263 		{
264 			carry = w2;
265 			w2 = word_madd2(x[0], y[2], &carry);
266 			w0 += carry;
267 			w1 += (w0 < carry) ? 1 : 0;
268 		}
269 		{
270 			carry = w2;
271 			w2 = word_madd2(x[1], y[1], &carry);
272 			w0 += carry;
273 			w1 += (w0 < carry) ? 1 : 0;
274 		}
275 		{
276 			carry = w2;
277 			w2 = word_madd2(x[2], y[0], &carry);
278 			w0 += carry;
279 			w1 += (w0 < carry) ? 1 : 0;
280 		}
281 		z[ 2] = w2; w2 = 0;
282 		
283 		{
284 			carry = w0;
285 			w0 = word_madd2(x[0], y[3], &carry);
286 			w1 += carry;
287 			w2 += (w1 < carry) ? 1 : 0;
288 		}
289 		{
290 			carry = w0;
291 			w0 = word_madd2(x[1], y[2], &carry);
292 			w1 += carry;
293 			w2 += (w1 < carry) ? 1 : 0;
294 		}
295 		{
296 			carry = w0;
297 			w0 = word_madd2(x[2], y[1], &carry);
298 			w1 += carry;
299 			w2 += (w1 < carry) ? 1 : 0;
300 		}
301 		{
302 			carry = w0;
303 			w0 = word_madd2(x[3], y[0], &carry);
304 			w1 += carry;
305 			w2 += (w1 < carry) ? 1 : 0;
306 		}
307 		z[ 3] = w0; w0 = 0;
308 		
309 		{
310 			carry = w1;
311 			w1 = word_madd2(x[0], y[4], &carry);
312 			w2 += carry;
313 			w0 += (w2 < carry) ? 1 : 0;
314 		}
315 		{
316 			carry = w1;
317 			w1 = word_madd2(x[1], y[3], &carry);
318 			w2 += carry;
319 			w0 += (w2 < carry) ? 1 : 0;
320 		}
321 		{
322 			carry = w1;
323 			w1 = word_madd2(x[2], y[2], &carry);
324 			w2 += carry;
325 			w0 += (w2 < carry) ? 1 : 0;
326 		}
327 		{
328 			carry = w1;
329 			w1 = word_madd2(x[3], y[1], &carry);
330 			w2 += carry;
331 			w0 += (w2 < carry) ? 1 : 0;
332 		}
333 		{
334 			carry = w1;
335 			w1 = word_madd2(x[4], y[0], &carry);
336 			w2 += carry;
337 			w0 += (w2 < carry) ? 1 : 0;
338 		}
339 		z[ 4] = w1; w1 = 0;
340 		
341 		{
342 			carry = w2;
343 			w2 = word_madd2(x[0], y[5], &carry);
344 			w0 += carry;
345 			w1 += (w0 < carry) ? 1 : 0;
346 		}
347 		{
348 			carry = w2;
349 			w2 = word_madd2(x[1], y[4], &carry);
350 			w0 += carry;
351 			w1 += (w0 < carry) ? 1 : 0;
352 		}
353 		{
354 			carry = w2;
355 			w2 = word_madd2(x[2], y[3], &carry);
356 			w0 += carry;
357 			w1 += (w0 < carry) ? 1 : 0;
358 		}
359 		{
360 			carry = w2;
361 			w2 = word_madd2(x[3], y[2], &carry);
362 			w0 += carry;
363 			w1 += (w0 < carry) ? 1 : 0;
364 		}
365 		{
366 			carry = w2;
367 			w2 = word_madd2(x[4], y[1], &carry);
368 			w0 += carry;
369 			w1 += (w0 < carry) ? 1 : 0;
370 		}
371 		{
372 			carry = w2;
373 			w2 = word_madd2(x[5], y[0], &carry);
374 			w0 += carry;
375 			w1 += (w0 < carry) ? 1 : 0;
376 		}
377 		z[ 5] = w2; w2 = 0;
378 		
379 		{
380 			carry = w0;
381 			w0 = word_madd2(x[1], y[5], &carry);
382 			w1 += carry;
383 			w2 += (w1 < carry) ? 1 : 0;
384 		}
385 		{
386 			carry = w0;
387 			w0 = word_madd2(x[2], y[4], &carry);
388 			w1 += carry;
389 			w2 += (w1 < carry) ? 1 : 0;
390 		}
391 		{
392 			carry = w0;
393 			w0 = word_madd2(x[3], y[3], &carry);
394 			w1 += carry;
395 			w2 += (w1 < carry) ? 1 : 0;
396 		}
397 		{
398 			carry = w0;
399 			w0 = word_madd2(x[4], y[2], &carry);
400 			w1 += carry;
401 			w2 += (w1 < carry) ? 1 : 0;
402 		}
403 		{
404 			carry = w0;
405 			w0 = word_madd2(x[5], y[1], &carry);
406 			w1 += carry;
407 			w2 += (w1 < carry) ? 1 : 0;
408 		}
409 		z[ 6] = w0; w0 = 0;
410 		
411 		{
412 			carry = w1;
413 			w1 = word_madd2(x[2], y[5], &carry);
414 			w2 += carry;
415 			w0 += (w2 < carry) ? 1 : 0;
416 		}
417 		{
418 			carry = w1;
419 			w1 = word_madd2(x[3], y[4], &carry);
420 			w2 += carry;
421 			w0 += (w2 < carry) ? 1 : 0;
422 		}
423 		{
424 			carry = w1;
425 			w1 = word_madd2(x[4], y[3], &carry);
426 			w2 += carry;
427 			w0 += (w2 < carry) ? 1 : 0;
428 		}
429 		{
430 			carry = w1;
431 			w1 = word_madd2(x[5], y[2], &carry);
432 			w2 += carry;
433 			w0 += (w2 < carry) ? 1 : 0;
434 		}
435 		z[ 7] = w1; w1 = 0;
436 		
437 		{
438 			carry = w2;
439 			w2 = word_madd2(x[3], y[5], &carry);
440 			w0 += carry;
441 			w1 += (w0 < carry) ? 1 : 0;
442 		}
443 		{
444 			carry = w2;
445 			w2 = word_madd2(x[4], y[4], &carry);
446 			w0 += carry;
447 			w1 += (w0 < carry) ? 1 : 0;
448 		}
449 		{
450 			carry = w2;
451 			w2 = word_madd2(x[5], y[3], &carry);
452 			w0 += carry;
453 			w1 += (w0 < carry) ? 1 : 0;
454 		}
455 		z[ 8] = w2; w2 = 0;
456 		
457 		{
458 			carry = w0;
459 			w0 = word_madd2(x[4], y[5], &carry);
460 			w1 += carry;
461 			w2 += (w1 < carry) ? 1 : 0;
462 		}
463 		{
464 			carry = w0;
465 			w0 = word_madd2(x[5], y[4], &carry);
466 			w1 += carry;
467 			w2 += (w1 < carry) ? 1 : 0;
468 		}
469 		z[ 9] = w0; w0 = 0;
470 		
471 		{
472 			carry = w1;
473 			w1 = word_madd2(x[5], y[5], &carry);
474 			w2 += carry;
475 			w0 += (w2 < carry) ? 1 : 0;
476 		}
477 		z[10] = w1;
478 		z[11] = w2;
479 	}
480 }
481 
482 /*
483 * Comba 8x8 Squaring
484 */
485 void bigint_comba_sqr8(ref word[16] z, const ref word[8] x)
486 {
487 	word w2 = 0, w1 = 0, w0 = 0;
488 	
489 	word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]);
490 	z[ 0] = w0; w0 = 0;
491 	
492 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]);
493 	z[ 1] = w1; w1 = 0;
494 	
495 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]);
496 	word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]);
497 	z[ 2] = w2; w2 = 0;
498 	
499 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]);
500 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]);
501 	z[ 3] = w0; w0 = 0;
502 	
503 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]);
504 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]);
505 	word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]);
506 	z[ 4] = w1; w1 = 0;
507 	
508 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]);
509 	word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]);
510 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]);
511 	z[ 5] = w2; w2 = 0;
512 	
513 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]);
514 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]);
515 	word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]);
516 	word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]);
517 	z[ 6] = w0; w0 = 0;
518 	
519 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]);
520 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]);
521 	word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]);
522 	word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]);
523 	z[ 7] = w1; w1 = 0;
524 	
525 	word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]);
526 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]);
527 	word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]);
528 	word3_muladd(&w1, &w0, &w2, x[ 4], x[ 4]);
529 	z[ 8] = w2; w2 = 0;
530 	
531 	word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]);
532 	word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]);
533 	word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]);
534 	z[ 9] = w0; w0 = 0;
535 	
536 	word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]);
537 	word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]);
538 	word3_muladd(&w0, &w2, &w1, x[ 5], x[ 5]);
539 	z[10] = w1; w1 = 0;
540 	
541 	word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]);
542 	word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]);
543 	z[11] = w2; w2 = 0;
544 	
545 	word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]);
546 	word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]);
547 	z[12] = w0; w0 = 0;
548 	
549 	word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]);
550 	z[13] = w1; w1 = 0;
551 	
552 	word3_muladd(&w1, &w0, &w2, x[ 7], x[ 7]);
553 	z[14] = w2;
554 	z[15] = w0;
555 }
556 
557 /*
558 * Comba 8x8 Multiplication
559 */
560 void bigint_comba_mul8(ref word[16] z, const ref word[8] x, const ref word[8] y)
561 {
562 	version(D_InlineAsm_X86_64) {		
563 		import botan_math.x86_64.mp_comba_mul;
564 		mixin(mp_bigint_comba_mul!8);
565 	} else
566 	{
567 		word w2 = 0, w1 = 0, w0 = 0;
568 		size_t carry;
569 		
570 		void word3_mulladd_021(size_t i, size_t j) {
571 			carry = w1;
572 			w1 = word_madd2(x.ptr[i], y.ptr[j], &carry);
573 			w2 += carry;
574 			w0 += (w2 < carry) ? 1 : 0;
575 		}
576 		
577 		void word3_mulladd_102(size_t i, size_t j) {
578 			carry = w2;
579 			w2 = word_madd2(x.ptr[i], y.ptr[j], &carry);
580 			w0 += carry;
581 			w1 += (w0 < carry) ? 1 : 0;
582 		}
583 		
584 		void word3_mulladd_210(size_t i, size_t j) {
585 			carry = w0;
586 			w0 = word_madd2(x.ptr[i], y.ptr[j], &carry);
587 			w1 += carry;
588 			w2 += (w1 < carry) ? 1 : 0;
589 		}
590 		
591 		
592 		word3_mulladd_210(0, 0); //1
593 		
594 		z[ 0] = w0; w0 = 0;
595 		
596 		word3_mulladd_021(0, 1); //2
597 		word3_mulladd_021(1, 0); //3
598 		z[ 1] = w1; w1 = 0;
599 		
600 		word3_mulladd_102(0, 2); //4
601 		word3_mulladd_102(1, 1); //5
602 		word3_mulladd_102(2, 0); //6
603 		z[ 2] = w2; w2 = 0;
604 		
605 		word3_mulladd_210(0, 3); //7
606 		word3_mulladd_210(1, 2); //8
607 		word3_mulladd_210(2, 1); //9
608 		word3_mulladd_210(3, 0); //10
609 		z[ 3] = w0; w0 = 0;
610 		
611 		word3_mulladd_021(0, 4); //11
612 		word3_mulladd_021(1, 3); //12
613 		word3_mulladd_021(2, 2); //13
614 		word3_mulladd_021(3, 1); //14
615 		word3_mulladd_021(4, 0); //15
616 		z[ 4] = w1; w1 = 0;
617 		
618 		word3_mulladd_102(0, 5); //16
619 		word3_mulladd_102(1, 4); //17
620 		word3_mulladd_102(2, 3); //18
621 		word3_mulladd_102(3, 2); //19
622 		word3_mulladd_102(4, 1); //20
623 		word3_mulladd_102(5, 0); //21
624 		z[ 5] = w2; w2 = 0;
625 		
626 		word3_mulladd_210(0, 6); //22
627 		word3_mulladd_210(1, 5); //23
628 		word3_mulladd_210(2, 4); //24
629 		word3_mulladd_210(3, 3); //25
630 		word3_mulladd_210(4, 2); //26
631 		word3_mulladd_210(5, 1); //27
632 		word3_mulladd_210(6, 0); //28
633 		z[ 6] = w0; w0 = 0;
634 		
635 		word3_mulladd_021(0, 7); //29
636 		word3_mulladd_021(1, 6); //30
637 		word3_mulladd_021(2, 5); //31
638 		word3_mulladd_021(3, 4); //32
639 		word3_mulladd_021(4, 3); //33
640 		word3_mulladd_021(5, 2); //34
641 		word3_mulladd_021(6, 1); //35
642 		word3_mulladd_021(7, 0); //36
643 		z[ 7] = w1; w1 = 0;
644 		
645 		word3_mulladd_102(1, 7); //37
646 		word3_mulladd_102(2, 6); //38
647 		word3_mulladd_102(3, 5); //39
648 		word3_mulladd_102(4, 4); //40
649 		word3_mulladd_102(5, 3); //41
650 		word3_mulladd_102(6, 2); //42
651 		word3_mulladd_102(7, 1); //43
652 		z[ 8] = w2; w2 = 0;
653 		
654 		word3_mulladd_210(2, 7); //44
655 		word3_mulladd_210(3, 6); //45
656 		word3_mulladd_210(4, 5); //46
657 		word3_mulladd_210(5, 4); //47
658 		word3_mulladd_210(6, 3); //48
659 		word3_mulladd_210(7, 2); //49
660 		z[ 9] = w0; w0 = 0;
661 		
662 		word3_mulladd_021(3, 7); //50
663 		word3_mulladd_021(4, 6); //51
664 		word3_mulladd_021(5, 5); //52
665 		word3_mulladd_021(6, 4); //53
666 		word3_mulladd_021(7, 3); //54
667 		z[10] = w1; w1 = 0;
668 		
669 		word3_mulladd_102(4, 7); //55
670 		word3_mulladd_102(5, 6); //56
671 		word3_mulladd_102(6, 5); //57
672 		word3_mulladd_102(7, 4); //58
673 		z[11] = w2; w2 = 0;
674 		
675 		word3_mulladd_210(5, 7); //59
676 		word3_mulladd_210(6, 6); //60
677 		word3_mulladd_210(7, 5); //61
678 		z[12] = w0; w0 = 0;
679 		
680 		word3_mulladd_021(6, 7); //62
681 		word3_mulladd_021(7, 6); //63
682 		z[13] = w1; w1 = 0;
683 		
684 		word3_mulladd_102(7, 7); //64
685 		z[14] = w2;
686 		z[15] = w0;
687 	}
688 }
689 
690 /*
691 * Comba 9x9 Squaring
692 */
693 void bigint_comba_sqr9(ref word[18] z, const ref word[9] x)
694 {
695 	word w2 = 0, w1 = 0, w0 = 0;
696 	
697 	word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]);
698 	z[ 0] = w0; w0 = 0;
699 	
700 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]);
701 	z[ 1] = w1; w1 = 0;
702 	
703 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]);
704 	word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]);
705 	z[ 2] = w2; w2 = 0;
706 	
707 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]);
708 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]);
709 	z[ 3] = w0; w0 = 0;
710 	
711 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]);
712 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]);
713 	word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]);
714 	z[ 4] = w1; w1 = 0;
715 	
716 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]);
717 	word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]);
718 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]);
719 	z[ 5] = w2; w2 = 0;
720 	
721 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]);
722 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]);
723 	word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]);
724 	word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]);
725 	z[ 6] = w0; w0 = 0;
726 	
727 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]);
728 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]);
729 	word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]);
730 	word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]);
731 	z[ 7] = w1; w1 = 0;
732 	
733 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 8]);
734 	word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]);
735 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]);
736 	word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]);
737 	word3_muladd(&w1, &w0, &w2, x[ 4], x[ 4]);
738 	z[ 8] = w2; w2 = 0;
739 	
740 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]);
741 	word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]);
742 	word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]);
743 	word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]);
744 	z[ 9] = w0; w0 = 0;
745 	
746 	word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 8]);
747 	word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]);
748 	word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]);
749 	word3_muladd(&w0, &w2, &w1, x[ 5], x[ 5]);
750 	z[10] = w1; w1 = 0;
751 	
752 	word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 8]);
753 	word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]);
754 	word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]);
755 	z[11] = w2; w2 = 0;
756 	
757 	word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]);
758 	word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]);
759 	word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]);
760 	z[12] = w0; w0 = 0;
761 	
762 	word3_muladd_2(&w0, &w2, &w1, x[ 5], x[ 8]);
763 	word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]);
764 	z[13] = w1; w1 = 0;
765 	
766 	word3_muladd_2(&w1, &w0, &w2, x[ 6], x[ 8]);
767 	word3_muladd(&w1, &w0, &w2, x[ 7], x[ 7]);
768 	z[14] = w2; w2 = 0;
769 	
770 	word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]);
771 	z[15] = w0; w0 = 0;
772 	
773 	word3_muladd(&w0, &w2, &w1, x[ 8], x[ 8]);
774 	z[16] = w1;
775 	z[17] = w2;
776 }
777 
778 /*
779 * Comba 9x9 Multiplication
780 */
781 void bigint_comba_mul9(ref word[18] z, const ref word[9] x, const ref word[9] y)
782 {
783 	version(D_InlineAsm_X86_64) {		
784 		import botan_math.x86_64.mp_comba_mul;
785 		mixin(mp_bigint_comba_mul!9);
786 	} else {
787 		word w2 = 0, w1 = 0, w0 = 0;
788 		
789 		word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); //1
790 		z[ 0] = w0; w0 = 0;
791 		
792 		word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); //2
793 		word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); //3
794 		z[ 1] = w1; w1 = 0;
795 		
796 		word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); //4
797 		word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); //5
798 		word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); //6
799 		z[ 2] = w2; w2 = 0;
800 		
801 		word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); //7
802 		word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); //8
803 		word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); //9
804 		word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); //10
805 		z[ 3] = w0; w0 = 0;
806 		
807 		word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]); //11
808 		word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); //12
809 		word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); //13
810 		word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); //14
811 		word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]); //15
812 		z[ 4] = w1; w1 = 0;
813 		
814 		word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]); //16
815 		word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]); //17
816 		word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); //18
817 		word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); //19
818 		word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]); //20
819 		word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]); //21
820 		z[ 5] = w2; w2 = 0;
821 		
822 		word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); //22
823 		word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); //23
824 		word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); //24
825 		word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); //25
826 		word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); //26
827 		word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); //27
828 		word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); //28
829 		z[ 6] = w0; w0 = 0;
830 		
831 		word3_muladd(&w0, &w2, &w1, x[ 0], y[ 7]); //29
832 		word3_muladd(&w0, &w2, &w1, x[ 1], y[ 6]); //30
833 		word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]); //31
834 		word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]); //32
835 		word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]); //33
836 		word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]); //34
837 		word3_muladd(&w0, &w2, &w1, x[ 6], y[ 1]); //35
838 		word3_muladd(&w0, &w2, &w1, x[ 7], y[ 0]); //36
839 		z[ 7] = w1; w1 = 0;
840 		
841 		word3_muladd(&w1, &w0, &w2, x[ 0], y[ 8]); //37
842 		word3_muladd(&w1, &w0, &w2, x[ 1], y[ 7]); //38
843 		word3_muladd(&w1, &w0, &w2, x[ 2], y[ 6]); //39
844 		word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]); //40
845 		word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]); //41
846 		word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]); //42
847 		word3_muladd(&w1, &w0, &w2, x[ 6], y[ 2]); //43
848 		word3_muladd(&w1, &w0, &w2, x[ 7], y[ 1]); //44
849 		word3_muladd(&w1, &w0, &w2, x[ 8], y[ 0]); //45
850 		z[ 8] = w2; w2 = 0;
851 		
852 		word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); //46
853 		word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); //47
854 		word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); //48
855 		word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); //49
856 		word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); //50
857 		word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); //51
858 		word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); //52
859 		word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); //53
860 		z[ 9] = w0; w0 = 0;
861 		
862 		word3_muladd(&w0, &w2, &w1, x[ 2], y[ 8]); //54
863 		word3_muladd(&w0, &w2, &w1, x[ 3], y[ 7]); //55
864 		word3_muladd(&w0, &w2, &w1, x[ 4], y[ 6]); //56
865 		word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]); //57
866 		word3_muladd(&w0, &w2, &w1, x[ 6], y[ 4]); //58
867 		word3_muladd(&w0, &w2, &w1, x[ 7], y[ 3]); //59
868 		word3_muladd(&w0, &w2, &w1, x[ 8], y[ 2]); //60
869 		z[10] = w1; w1 = 0;
870 		
871 		word3_muladd(&w1, &w0, &w2, x[ 3], y[ 8]); //61
872 		word3_muladd(&w1, &w0, &w2, x[ 4], y[ 7]); //62
873 		word3_muladd(&w1, &w0, &w2, x[ 5], y[ 6]); //63
874 		word3_muladd(&w1, &w0, &w2, x[ 6], y[ 5]); //64
875 		word3_muladd(&w1, &w0, &w2, x[ 7], y[ 4]); //65
876 		word3_muladd(&w1, &w0, &w2, x[ 8], y[ 3]); //66
877 		z[11] = w2; w2 = 0;
878 		
879 		word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); //67
880 		word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); //68
881 		word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); //69
882 		word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); //70
883 		word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); //71
884 		z[12] = w0; w0 = 0;
885 		
886 		word3_muladd(&w0, &w2, &w1, x[ 5], y[ 8]); //72
887 		word3_muladd(&w0, &w2, &w1, x[ 6], y[ 7]); //73
888 		word3_muladd(&w0, &w2, &w1, x[ 7], y[ 6]); //74
889 		word3_muladd(&w0, &w2, &w1, x[ 8], y[ 5]); //75
890 		z[13] = w1; w1 = 0;
891 		
892 		word3_muladd(&w1, &w0, &w2, x[ 6], y[ 8]); //76
893 		word3_muladd(&w1, &w0, &w2, x[ 7], y[ 7]); //77
894 		word3_muladd(&w1, &w0, &w2, x[ 8], y[ 6]); //78
895 		z[14] = w2; w2 = 0;
896 		
897 		word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); //79
898 		word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); //80
899 		z[15] = w0; w0 = 0;
900 		
901 		word3_muladd(&w0, &w2, &w1, x[ 8], y[ 8]);
902 		z[16] = w1;
903 		z[17] = w2;
904 	}
905 }
906 
907 /*
908 * Comba 16x16 Squaring
909 */
910 void bigint_comba_sqr16(ref word[32] z, const ref word[16] x)
911 {
912 	word w2 = 0, w1 = 0, w0 = 0;
913 	
914 	word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]);
915 	z[ 0] = w0; w0 = 0;
916 	
917 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]);
918 	z[ 1] = w1; w1 = 0;
919 	
920 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]);
921 	word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]);
922 	z[ 2] = w2; w2 = 0;
923 	
924 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]);
925 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]);
926 	z[ 3] = w0; w0 = 0;
927 	
928 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]);
929 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]);
930 	word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]);
931 	z[ 4] = w1; w1 = 0;
932 	
933 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]);
934 	word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]);
935 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]);
936 	z[ 5] = w2; w2 = 0;
937 	
938 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]);
939 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]);
940 	word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]);
941 	word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]);
942 	z[ 6] = w0; w0 = 0;
943 	
944 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]);
945 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]);
946 	word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]);
947 	word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]);
948 	z[ 7] = w1; w1 = 0;
949 	
950 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 8]);
951 	word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]);
952 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]);
953 	word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]);
954 	word3_muladd(&w1, &w0, &w2, x[ 4], x[ 4]);
955 	z[ 8] = w2; w2 = 0;
956 	
957 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 9]);
958 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]);
959 	word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]);
960 	word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]);
961 	word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]);
962 	z[ 9] = w0; w0 = 0;
963 	
964 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[10]);
965 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 9]);
966 	word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 8]);
967 	word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]);
968 	word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]);
969 	word3_muladd(&w0, &w2, &w1, x[ 5], x[ 5]);
970 	z[10] = w1; w1 = 0;
971 	
972 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[11]);
973 	word3_muladd_2(&w1, &w0, &w2, x[ 1], x[10]);
974 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 9]);
975 	word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 8]);
976 	word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]);
977 	word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]);
978 	z[11] = w2; w2 = 0;
979 	
980 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[12]);
981 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[11]);
982 	word3_muladd_2(&w2, &w1, &w0, x[ 2], x[10]);
983 	word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 9]);
984 	word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]);
985 	word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]);
986 	word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]);
987 	z[12] = w0; w0 = 0;
988 	
989 	word3_muladd_2(&w0, &w2, &w1, x[ 0], x[13]);
990 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[12]);
991 	word3_muladd_2(&w0, &w2, &w1, x[ 2], x[11]);
992 	word3_muladd_2(&w0, &w2, &w1, x[ 3], x[10]);
993 	word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 9]);
994 	word3_muladd_2(&w0, &w2, &w1, x[ 5], x[ 8]);
995 	word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]);
996 	z[13] = w1; w1 = 0;
997 	
998 	word3_muladd_2(&w1, &w0, &w2, x[ 0], x[14]);
999 	word3_muladd_2(&w1, &w0, &w2, x[ 1], x[13]);
1000 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[12]);
1001 	word3_muladd_2(&w1, &w0, &w2, x[ 3], x[11]);
1002 	word3_muladd_2(&w1, &w0, &w2, x[ 4], x[10]);
1003 	word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 9]);
1004 	word3_muladd_2(&w1, &w0, &w2, x[ 6], x[ 8]);
1005 	word3_muladd(&w1, &w0, &w2, x[ 7], x[ 7]);
1006 	z[14] = w2; w2 = 0;
1007 	
1008 	word3_muladd_2(&w2, &w1, &w0, x[ 0], x[15]);
1009 	word3_muladd_2(&w2, &w1, &w0, x[ 1], x[14]);
1010 	word3_muladd_2(&w2, &w1, &w0, x[ 2], x[13]);
1011 	word3_muladd_2(&w2, &w1, &w0, x[ 3], x[12]);
1012 	word3_muladd_2(&w2, &w1, &w0, x[ 4], x[11]);
1013 	word3_muladd_2(&w2, &w1, &w0, x[ 5], x[10]);
1014 	word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 9]);
1015 	word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]);
1016 	z[15] = w0; w0 = 0;
1017 	
1018 	word3_muladd_2(&w0, &w2, &w1, x[ 1], x[15]);
1019 	word3_muladd_2(&w0, &w2, &w1, x[ 2], x[14]);
1020 	word3_muladd_2(&w0, &w2, &w1, x[ 3], x[13]);
1021 	word3_muladd_2(&w0, &w2, &w1, x[ 4], x[12]);
1022 	word3_muladd_2(&w0, &w2, &w1, x[ 5], x[11]);
1023 	word3_muladd_2(&w0, &w2, &w1, x[ 6], x[10]);
1024 	word3_muladd_2(&w0, &w2, &w1, x[ 7], x[ 9]);
1025 	word3_muladd(&w0, &w2, &w1, x[ 8], x[ 8]);
1026 	z[16] = w1; w1 = 0;
1027 	
1028 	word3_muladd_2(&w1, &w0, &w2, x[ 2], x[15]);
1029 	word3_muladd_2(&w1, &w0, &w2, x[ 3], x[14]);
1030 	word3_muladd_2(&w1, &w0, &w2, x[ 4], x[13]);
1031 	word3_muladd_2(&w1, &w0, &w2, x[ 5], x[12]);
1032 	word3_muladd_2(&w1, &w0, &w2, x[ 6], x[11]);
1033 	word3_muladd_2(&w1, &w0, &w2, x[ 7], x[10]);
1034 	word3_muladd_2(&w1, &w0, &w2, x[ 8], x[ 9]);
1035 	z[17] = w2; w2 = 0;
1036 	
1037 	word3_muladd_2(&w2, &w1, &w0, x[ 3], x[15]);
1038 	word3_muladd_2(&w2, &w1, &w0, x[ 4], x[14]);
1039 	word3_muladd_2(&w2, &w1, &w0, x[ 5], x[13]);
1040 	word3_muladd_2(&w2, &w1, &w0, x[ 6], x[12]);
1041 	word3_muladd_2(&w2, &w1, &w0, x[ 7], x[11]);
1042 	word3_muladd_2(&w2, &w1, &w0, x[ 8], x[10]);
1043 	word3_muladd(&w2, &w1, &w0, x[ 9], x[ 9]);
1044 	z[18] = w0; w0 = 0;
1045 	
1046 	word3_muladd_2(&w0, &w2, &w1, x[ 4], x[15]);
1047 	word3_muladd_2(&w0, &w2, &w1, x[ 5], x[14]);
1048 	word3_muladd_2(&w0, &w2, &w1, x[ 6], x[13]);
1049 	word3_muladd_2(&w0, &w2, &w1, x[ 7], x[12]);
1050 	word3_muladd_2(&w0, &w2, &w1, x[ 8], x[11]);
1051 	word3_muladd_2(&w0, &w2, &w1, x[ 9], x[10]);
1052 	z[19] = w1; w1 = 0;
1053 	
1054 	word3_muladd_2(&w1, &w0, &w2, x[ 5], x[15]);
1055 	word3_muladd_2(&w1, &w0, &w2, x[ 6], x[14]);
1056 	word3_muladd_2(&w1, &w0, &w2, x[ 7], x[13]);
1057 	word3_muladd_2(&w1, &w0, &w2, x[ 8], x[12]);
1058 	word3_muladd_2(&w1, &w0, &w2, x[ 9], x[11]);
1059 	word3_muladd(&w1, &w0, &w2, x[10], x[10]);
1060 	z[20] = w2; w2 = 0;
1061 	
1062 	word3_muladd_2(&w2, &w1, &w0, x[ 6], x[15]);
1063 	word3_muladd_2(&w2, &w1, &w0, x[ 7], x[14]);
1064 	word3_muladd_2(&w2, &w1, &w0, x[ 8], x[13]);
1065 	word3_muladd_2(&w2, &w1, &w0, x[ 9], x[12]);
1066 	word3_muladd_2(&w2, &w1, &w0, x[10], x[11]);
1067 	z[21] = w0; w0 = 0;
1068 	
1069 	word3_muladd_2(&w0, &w2, &w1, x[ 7], x[15]);
1070 	word3_muladd_2(&w0, &w2, &w1, x[ 8], x[14]);
1071 	word3_muladd_2(&w0, &w2, &w1, x[ 9], x[13]);
1072 	word3_muladd_2(&w0, &w2, &w1, x[10], x[12]);
1073 	word3_muladd(&w0, &w2, &w1, x[11], x[11]);
1074 	z[22] = w1; w1 = 0;
1075 	
1076 	word3_muladd_2(&w1, &w0, &w2, x[ 8], x[15]);
1077 	word3_muladd_2(&w1, &w0, &w2, x[ 9], x[14]);
1078 	word3_muladd_2(&w1, &w0, &w2, x[10], x[13]);
1079 	word3_muladd_2(&w1, &w0, &w2, x[11], x[12]);
1080 	z[23] = w2; w2 = 0;
1081 	
1082 	word3_muladd_2(&w2, &w1, &w0, x[ 9], x[15]);
1083 	word3_muladd_2(&w2, &w1, &w0, x[10], x[14]);
1084 	word3_muladd_2(&w2, &w1, &w0, x[11], x[13]);
1085 	word3_muladd(&w2, &w1, &w0, x[12], x[12]);
1086 	z[24] = w0; w0 = 0;
1087 	
1088 	word3_muladd_2(&w0, &w2, &w1, x[10], x[15]);
1089 	word3_muladd_2(&w0, &w2, &w1, x[11], x[14]);
1090 	word3_muladd_2(&w0, &w2, &w1, x[12], x[13]);
1091 	z[25] = w1; w1 = 0;
1092 	
1093 	word3_muladd_2(&w1, &w0, &w2, x[11], x[15]);
1094 	word3_muladd_2(&w1, &w0, &w2, x[12], x[14]);
1095 	word3_muladd(&w1, &w0, &w2, x[13], x[13]);
1096 	z[26] = w2; w2 = 0;
1097 	
1098 	word3_muladd_2(&w2, &w1, &w0, x[12], x[15]);
1099 	word3_muladd_2(&w2, &w1, &w0, x[13], x[14]);
1100 	z[27] = w0; w0 = 0;
1101 	
1102 	word3_muladd_2(&w0, &w2, &w1, x[13], x[15]);
1103 	word3_muladd(&w0, &w2, &w1, x[14], x[14]);
1104 	z[28] = w1; w1 = 0;
1105 	
1106 	word3_muladd_2(&w1, &w0, &w2, x[14], x[15]);
1107 	z[29] = w2; w2 = 0;
1108 	
1109 	word3_muladd(&w2, &w1, &w0, x[15], x[15]);
1110 	z[30] = w0;
1111 	z[31] = w1;
1112 }
1113 
1114 /*
1115 * Comba 16x16 Multiplication
1116 */
1117 void bigint_comba_mul16(ref word[32] z, const ref word[16] x, const ref word[16] y)
1118 {
1119 	version(D_InlineAsm_X86_64) {		
1120 		import botan_math.x86_64.mp_comba_mul;
1121 		mixin(mp_bigint_comba_mul!16);
1122 	}
1123 	else {
1124 		word w2 = 0, w1 = 0, w0 = 0;
1125 		
1126 		word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); //1
1127 		z[ 0] = w0; w0 = 0;
1128 		
1129 		word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); //2
1130 		word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); //3
1131 		z[ 1] = w1; w1 = 0;
1132 		
1133 		word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); //4
1134 		word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); //5
1135 		word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); //6
1136 		z[ 2] = w2; w2 = 0;
1137 		
1138 		word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); //7
1139 		word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); //8
1140 		word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); //9
1141 		word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); //10
1142 		z[ 3] = w0; w0 = 0;
1143 		
1144 		word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]); //11
1145 		word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); //12
1146 		word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); //13
1147 		word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); //14
1148 		word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]); //15
1149 		z[ 4] = w1; w1 = 0;
1150 		
1151 		word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]); //16
1152 		word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]); //17
1153 		word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); //18
1154 		word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); //19
1155 		word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]); //20
1156 		word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]); //21
1157 		z[ 5] = w2; w2 = 0;
1158 		
1159 		word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); //22
1160 		word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); //23
1161 		word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); //24
1162 		word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); //25
1163 		word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); //26
1164 		word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); //27
1165 		word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); //28
1166 		z[ 6] = w0; w0 = 0;
1167 		
1168 		word3_muladd(&w0, &w2, &w1, x[ 0], y[ 7]); //29
1169 		word3_muladd(&w0, &w2, &w1, x[ 1], y[ 6]); //30
1170 		word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]); //31
1171 		word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]); //32
1172 		word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]); //33
1173 		word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]); //34
1174 		word3_muladd(&w0, &w2, &w1, x[ 6], y[ 1]); //35
1175 		word3_muladd(&w0, &w2, &w1, x[ 7], y[ 0]); //36
1176 		z[ 7] = w1; w1 = 0;
1177 		
1178 		word3_muladd(&w1, &w0, &w2, x[ 0], y[ 8]); //37
1179 		word3_muladd(&w1, &w0, &w2, x[ 1], y[ 7]); //38
1180 		word3_muladd(&w1, &w0, &w2, x[ 2], y[ 6]); //39
1181 		word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]); //40
1182 		word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]); //41
1183 		word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]); //42
1184 		word3_muladd(&w1, &w0, &w2, x[ 6], y[ 2]); //43
1185 		word3_muladd(&w1, &w0, &w2, x[ 7], y[ 1]); //44
1186 		word3_muladd(&w1, &w0, &w2, x[ 8], y[ 0]); //45
1187 		z[ 8] = w2; w2 = 0;
1188 		
1189 		word3_muladd(&w2, &w1, &w0, x[ 0], y[ 9]); //46
1190 		word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); //47
1191 		word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); //48
1192 		word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); //49
1193 		word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); //50
1194 		word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); //51
1195 		word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); //52
1196 		word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); //53
1197 		word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); //54
1198 		word3_muladd(&w2, &w1, &w0, x[ 9], y[ 0]); //55
1199 		z[ 9] = w0; w0 = 0;
1200 		
1201 		word3_muladd(&w0, &w2, &w1, x[ 0], y[10]); //56
1202 		word3_muladd(&w0, &w2, &w1, x[ 1], y[ 9]); //57
1203 		word3_muladd(&w0, &w2, &w1, x[ 2], y[ 8]); //58
1204 		word3_muladd(&w0, &w2, &w1, x[ 3], y[ 7]); //59
1205 		word3_muladd(&w0, &w2, &w1, x[ 4], y[ 6]); //60
1206 		word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]); //61
1207 		word3_muladd(&w0, &w2, &w1, x[ 6], y[ 4]); //62
1208 		word3_muladd(&w0, &w2, &w1, x[ 7], y[ 3]); //63
1209 		word3_muladd(&w0, &w2, &w1, x[ 8], y[ 2]); //64
1210 		word3_muladd(&w0, &w2, &w1, x[ 9], y[ 1]); //65
1211 		word3_muladd(&w0, &w2, &w1, x[10], y[ 0]); //66
1212 		z[10] = w1; w1 = 0;
1213 		
1214 		word3_muladd(&w1, &w0, &w2, x[ 0], y[11]); //67
1215 		word3_muladd(&w1, &w0, &w2, x[ 1], y[10]); //68
1216 		word3_muladd(&w1, &w0, &w2, x[ 2], y[ 9]); //69
1217 		word3_muladd(&w1, &w0, &w2, x[ 3], y[ 8]); //70
1218 		word3_muladd(&w1, &w0, &w2, x[ 4], y[ 7]); //71
1219 		word3_muladd(&w1, &w0, &w2, x[ 5], y[ 6]); //72
1220 		word3_muladd(&w1, &w0, &w2, x[ 6], y[ 5]); //73
1221 		word3_muladd(&w1, &w0, &w2, x[ 7], y[ 4]); //74
1222 		word3_muladd(&w1, &w0, &w2, x[ 8], y[ 3]); //75
1223 		word3_muladd(&w1, &w0, &w2, x[ 9], y[ 2]); //76
1224 		word3_muladd(&w1, &w0, &w2, x[10], y[ 1]); //77
1225 		word3_muladd(&w1, &w0, &w2, x[11], y[ 0]); //78
1226 		z[11] = w2; w2 = 0;
1227 		
1228 		word3_muladd(&w2, &w1, &w0, x[ 0], y[12]); //79
1229 		word3_muladd(&w2, &w1, &w0, x[ 1], y[11]); //80
1230 		word3_muladd(&w2, &w1, &w0, x[ 2], y[10]); //81
1231 		word3_muladd(&w2, &w1, &w0, x[ 3], y[ 9]); //82
1232 		word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); //83
1233 		word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); //84
1234 		word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); //85
1235 		word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); //86
1236 		word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); //87
1237 		word3_muladd(&w2, &w1, &w0, x[ 9], y[ 3]); //88
1238 		word3_muladd(&w2, &w1, &w0, x[10], y[ 2]); //89
1239 		word3_muladd(&w2, &w1, &w0, x[11], y[ 1]); //90
1240 		word3_muladd(&w2, &w1, &w0, x[12], y[ 0]); //91
1241 		z[12] = w0; w0 = 0;
1242 		
1243 		word3_muladd(&w0, &w2, &w1, x[ 0], y[13]); //92
1244 		word3_muladd(&w0, &w2, &w1, x[ 1], y[12]); //93
1245 		word3_muladd(&w0, &w2, &w1, x[ 2], y[11]); //94
1246 		word3_muladd(&w0, &w2, &w1, x[ 3], y[10]); //95
1247 		word3_muladd(&w0, &w2, &w1, x[ 4], y[ 9]); //96
1248 		word3_muladd(&w0, &w2, &w1, x[ 5], y[ 8]); //97
1249 		word3_muladd(&w0, &w2, &w1, x[ 6], y[ 7]); //98
1250 		word3_muladd(&w0, &w2, &w1, x[ 7], y[ 6]); //99
1251 		word3_muladd(&w0, &w2, &w1, x[ 8], y[ 5]); //100
1252 		word3_muladd(&w0, &w2, &w1, x[ 9], y[ 4]); //101
1253 		word3_muladd(&w0, &w2, &w1, x[10], y[ 3]); //102
1254 		word3_muladd(&w0, &w2, &w1, x[11], y[ 2]); //103
1255 		word3_muladd(&w0, &w2, &w1, x[12], y[ 1]); //104
1256 		word3_muladd(&w0, &w2, &w1, x[13], y[ 0]); //105
1257 		z[13] = w1; w1 = 0;
1258 		
1259 		word3_muladd(&w1, &w0, &w2, x[ 0], y[14]); //106
1260 		word3_muladd(&w1, &w0, &w2, x[ 1], y[13]); //107
1261 		word3_muladd(&w1, &w0, &w2, x[ 2], y[12]); //108
1262 		word3_muladd(&w1, &w0, &w2, x[ 3], y[11]); //109
1263 		word3_muladd(&w1, &w0, &w2, x[ 4], y[10]); //110
1264 		word3_muladd(&w1, &w0, &w2, x[ 5], y[ 9]); //111
1265 		word3_muladd(&w1, &w0, &w2, x[ 6], y[ 8]); //112
1266 		word3_muladd(&w1, &w0, &w2, x[ 7], y[ 7]); //113
1267 		word3_muladd(&w1, &w0, &w2, x[ 8], y[ 6]); //114
1268 		word3_muladd(&w1, &w0, &w2, x[ 9], y[ 5]); //115
1269 		word3_muladd(&w1, &w0, &w2, x[10], y[ 4]); //116
1270 		word3_muladd(&w1, &w0, &w2, x[11], y[ 3]); //117
1271 		word3_muladd(&w1, &w0, &w2, x[12], y[ 2]); //118
1272 		word3_muladd(&w1, &w0, &w2, x[13], y[ 1]); //119
1273 		word3_muladd(&w1, &w0, &w2, x[14], y[ 0]); //120
1274 		z[14] = w2; w2 = 0;
1275 		
1276 		word3_muladd(&w2, &w1, &w0, x[ 0], y[15]); //121
1277 		word3_muladd(&w2, &w1, &w0, x[ 1], y[14]); //122
1278 		word3_muladd(&w2, &w1, &w0, x[ 2], y[13]); //123
1279 		word3_muladd(&w2, &w1, &w0, x[ 3], y[12]); //124
1280 		word3_muladd(&w2, &w1, &w0, x[ 4], y[11]); //125
1281 		word3_muladd(&w2, &w1, &w0, x[ 5], y[10]); //126
1282 		word3_muladd(&w2, &w1, &w0, x[ 6], y[ 9]); //127
1283 		word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); //128
1284 		word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); //129
1285 		word3_muladd(&w2, &w1, &w0, x[ 9], y[ 6]); //130
1286 		word3_muladd(&w2, &w1, &w0, x[10], y[ 5]); //131 
1287 		word3_muladd(&w2, &w1, &w0, x[11], y[ 4]); //132
1288 		word3_muladd(&w2, &w1, &w0, x[12], y[ 3]); //133
1289 		word3_muladd(&w2, &w1, &w0, x[13], y[ 2]); //134
1290 		word3_muladd(&w2, &w1, &w0, x[14], y[ 1]); //135
1291 		word3_muladd(&w2, &w1, &w0, x[15], y[ 0]); //136
1292 		z[15] = w0; w0 = 0;
1293 		
1294 		word3_muladd(&w0, &w2, &w1, x[ 1], y[15]); //137
1295 		word3_muladd(&w0, &w2, &w1, x[ 2], y[14]); //138
1296 		word3_muladd(&w0, &w2, &w1, x[ 3], y[13]); //139
1297 		word3_muladd(&w0, &w2, &w1, x[ 4], y[12]); //140
1298 		word3_muladd(&w0, &w2, &w1, x[ 5], y[11]); //141
1299 		word3_muladd(&w0, &w2, &w1, x[ 6], y[10]); //142
1300 		word3_muladd(&w0, &w2, &w1, x[ 7], y[ 9]); //143
1301 		word3_muladd(&w0, &w2, &w1, x[ 8], y[ 8]); //144
1302 		word3_muladd(&w0, &w2, &w1, x[ 9], y[ 7]); //145
1303 		word3_muladd(&w0, &w2, &w1, x[10], y[ 6]); //146
1304 		word3_muladd(&w0, &w2, &w1, x[11], y[ 5]); //147
1305 		word3_muladd(&w0, &w2, &w1, x[12], y[ 4]); //148
1306 		word3_muladd(&w0, &w2, &w1, x[13], y[ 3]); //149
1307 		word3_muladd(&w0, &w2, &w1, x[14], y[ 2]); //150
1308 		word3_muladd(&w0, &w2, &w1, x[15], y[ 1]); //151
1309 		z[16] = w1; w1 = 0;
1310 		
1311 		word3_muladd(&w1, &w0, &w2, x[ 2], y[15]); //152
1312 		word3_muladd(&w1, &w0, &w2, x[ 3], y[14]); //153
1313 		word3_muladd(&w1, &w0, &w2, x[ 4], y[13]); //154
1314 		word3_muladd(&w1, &w0, &w2, x[ 5], y[12]); //155
1315 		word3_muladd(&w1, &w0, &w2, x[ 6], y[11]); //156
1316 		word3_muladd(&w1, &w0, &w2, x[ 7], y[10]); //157
1317 		word3_muladd(&w1, &w0, &w2, x[ 8], y[ 9]); //158
1318 		word3_muladd(&w1, &w0, &w2, x[ 9], y[ 8]); //159
1319 		word3_muladd(&w1, &w0, &w2, x[10], y[ 7]); //160
1320 		word3_muladd(&w1, &w0, &w2, x[11], y[ 6]); //161
1321 		word3_muladd(&w1, &w0, &w2, x[12], y[ 5]); //162
1322 		word3_muladd(&w1, &w0, &w2, x[13], y[ 4]); //163
1323 		word3_muladd(&w1, &w0, &w2, x[14], y[ 3]); //164
1324 		word3_muladd(&w1, &w0, &w2, x[15], y[ 2]); //165
1325 		z[17] = w2; w2 = 0;
1326 		
1327 		word3_muladd(&w2, &w1, &w0, x[ 3], y[15]); //166
1328 		word3_muladd(&w2, &w1, &w0, x[ 4], y[14]); //167
1329 		word3_muladd(&w2, &w1, &w0, x[ 5], y[13]); //168
1330 		word3_muladd(&w2, &w1, &w0, x[ 6], y[12]); //169
1331 		word3_muladd(&w2, &w1, &w0, x[ 7], y[11]); //170
1332 		word3_muladd(&w2, &w1, &w0, x[ 8], y[10]); //171
1333 		word3_muladd(&w2, &w1, &w0, x[ 9], y[ 9]); //172
1334 		word3_muladd(&w2, &w1, &w0, x[10], y[ 8]); //173
1335 		word3_muladd(&w2, &w1, &w0, x[11], y[ 7]); //174
1336 		word3_muladd(&w2, &w1, &w0, x[12], y[ 6]); //175
1337 		word3_muladd(&w2, &w1, &w0, x[13], y[ 5]); //176
1338 		word3_muladd(&w2, &w1, &w0, x[14], y[ 4]); //177
1339 		word3_muladd(&w2, &w1, &w0, x[15], y[ 3]); //178
1340 		z[18] = w0; w0 = 0;
1341 		
1342 		word3_muladd(&w0, &w2, &w1, x[ 4], y[15]); //179
1343 		word3_muladd(&w0, &w2, &w1, x[ 5], y[14]); //180
1344 		word3_muladd(&w0, &w2, &w1, x[ 6], y[13]); //181
1345 		word3_muladd(&w0, &w2, &w1, x[ 7], y[12]); //182
1346 		word3_muladd(&w0, &w2, &w1, x[ 8], y[11]); //183
1347 		word3_muladd(&w0, &w2, &w1, x[ 9], y[10]); //184
1348 		word3_muladd(&w0, &w2, &w1, x[10], y[ 9]); //185
1349 		word3_muladd(&w0, &w2, &w1, x[11], y[ 8]); //186
1350 		word3_muladd(&w0, &w2, &w1, x[12], y[ 7]); //187
1351 		word3_muladd(&w0, &w2, &w1, x[13], y[ 6]); //188
1352 		word3_muladd(&w0, &w2, &w1, x[14], y[ 5]); //189
1353 		word3_muladd(&w0, &w2, &w1, x[15], y[ 4]); //190
1354 		z[19] = w1; w1 = 0;
1355 		
1356 		word3_muladd(&w1, &w0, &w2, x[ 5], y[15]); //191
1357 		word3_muladd(&w1, &w0, &w2, x[ 6], y[14]); //192
1358 		word3_muladd(&w1, &w0, &w2, x[ 7], y[13]); //193
1359 		word3_muladd(&w1, &w0, &w2, x[ 8], y[12]); //194
1360 		word3_muladd(&w1, &w0, &w2, x[ 9], y[11]); //195
1361 		word3_muladd(&w1, &w0, &w2, x[10], y[10]); //196
1362 		word3_muladd(&w1, &w0, &w2, x[11], y[ 9]); //197
1363 		word3_muladd(&w1, &w0, &w2, x[12], y[ 8]); //198
1364 		word3_muladd(&w1, &w0, &w2, x[13], y[ 7]); //199 
1365 		word3_muladd(&w1, &w0, &w2, x[14], y[ 6]); //200
1366 		word3_muladd(&w1, &w0, &w2, x[15], y[ 5]); //201
1367 		z[20] = w2; w2 = 0;
1368 		
1369 		word3_muladd(&w2, &w1, &w0, x[ 6], y[15]); //202
1370 		word3_muladd(&w2, &w1, &w0, x[ 7], y[14]); //203
1371 		word3_muladd(&w2, &w1, &w0, x[ 8], y[13]); //204
1372 		word3_muladd(&w2, &w1, &w0, x[ 9], y[12]); //205
1373 		word3_muladd(&w2, &w1, &w0, x[10], y[11]); //206
1374 		word3_muladd(&w2, &w1, &w0, x[11], y[10]); //207
1375 		word3_muladd(&w2, &w1, &w0, x[12], y[ 9]); //208
1376 		word3_muladd(&w2, &w1, &w0, x[13], y[ 8]); //209
1377 		word3_muladd(&w2, &w1, &w0, x[14], y[ 7]); //210
1378 		word3_muladd(&w2, &w1, &w0, x[15], y[ 6]); //211
1379 		z[21] = w0; w0 = 0;
1380 		
1381 		word3_muladd(&w0, &w2, &w1, x[ 7], y[15]); //212
1382 		word3_muladd(&w0, &w2, &w1, x[ 8], y[14]); //213
1383 		word3_muladd(&w0, &w2, &w1, x[ 9], y[13]); //214
1384 		word3_muladd(&w0, &w2, &w1, x[10], y[12]); //215
1385 		word3_muladd(&w0, &w2, &w1, x[11], y[11]); //216
1386 		word3_muladd(&w0, &w2, &w1, x[12], y[10]); //217
1387 		word3_muladd(&w0, &w2, &w1, x[13], y[ 9]); //218
1388 		word3_muladd(&w0, &w2, &w1, x[14], y[ 8]); //219
1389 		word3_muladd(&w0, &w2, &w1, x[15], y[ 7]); //220
1390 		z[22] = w1; w1 = 0;
1391 		
1392 		word3_muladd(&w1, &w0, &w2, x[ 8], y[15]); //221
1393 		word3_muladd(&w1, &w0, &w2, x[ 9], y[14]); //222
1394 		word3_muladd(&w1, &w0, &w2, x[10], y[13]); //223
1395 		word3_muladd(&w1, &w0, &w2, x[11], y[12]); //224
1396 		word3_muladd(&w1, &w0, &w2, x[12], y[11]); //225
1397 		word3_muladd(&w1, &w0, &w2, x[13], y[10]); //226
1398 		word3_muladd(&w1, &w0, &w2, x[14], y[ 9]); //227
1399 		word3_muladd(&w1, &w0, &w2, x[15], y[ 8]); //228
1400 		z[23] = w2; w2 = 0;
1401 		
1402 		word3_muladd(&w2, &w1, &w0, x[ 9], y[15]); //229
1403 		word3_muladd(&w2, &w1, &w0, x[10], y[14]); //230
1404 		word3_muladd(&w2, &w1, &w0, x[11], y[13]); //231
1405 		word3_muladd(&w2, &w1, &w0, x[12], y[12]); //232
1406 		word3_muladd(&w2, &w1, &w0, x[13], y[11]); //233
1407 		word3_muladd(&w2, &w1, &w0, x[14], y[10]); //234
1408 		word3_muladd(&w2, &w1, &w0, x[15], y[ 9]); //235
1409 		z[24] = w0; w0 = 0;
1410 		
1411 		word3_muladd(&w0, &w2, &w1, x[10], y[15]); //236
1412 		word3_muladd(&w0, &w2, &w1, x[11], y[14]); //237
1413 		word3_muladd(&w0, &w2, &w1, x[12], y[13]); //238
1414 		word3_muladd(&w0, &w2, &w1, x[13], y[12]); //239
1415 		word3_muladd(&w0, &w2, &w1, x[14], y[11]); //240
1416 		word3_muladd(&w0, &w2, &w1, x[15], y[10]); //241
1417 		z[25] = w1; w1 = 0;
1418 		
1419 		word3_muladd(&w1, &w0, &w2, x[11], y[15]); //242
1420 		word3_muladd(&w1, &w0, &w2, x[12], y[14]); //243
1421 		word3_muladd(&w1, &w0, &w2, x[13], y[13]); //244
1422 		word3_muladd(&w1, &w0, &w2, x[14], y[12]); //245
1423 		word3_muladd(&w1, &w0, &w2, x[15], y[11]); //246
1424 		z[26] = w2; w2 = 0;
1425 		
1426 		word3_muladd(&w2, &w1, &w0, x[12], y[15]); //247
1427 		word3_muladd(&w2, &w1, &w0, x[13], y[14]); //248
1428 		word3_muladd(&w2, &w1, &w0, x[14], y[13]); //249
1429 		word3_muladd(&w2, &w1, &w0, x[15], y[12]); //250
1430 		z[27] = w0; w0 = 0;
1431 		
1432 		word3_muladd(&w0, &w2, &w1, x[13], y[15]); //251
1433 		word3_muladd(&w0, &w2, &w1, x[14], y[14]); //252
1434 		word3_muladd(&w0, &w2, &w1, x[15], y[13]); //253
1435 		z[28] = w1; w1 = 0;
1436 		
1437 		word3_muladd(&w1, &w0, &w2, x[14], y[15]); //254
1438 		word3_muladd(&w1, &w0, &w2, x[15], y[14]); //255
1439 		z[29] = w2; w2 = 0;
1440 		
1441 		word3_muladd(&w2, &w1, &w0, x[15], y[15]); //256
1442 		z[30] = w0;
1443 		z[31] = w1;
1444 	}
1445 }