1 /**
2 * Comba word operations
3 * 
4 * Copyright:
5 * (C) 1999-2010,2014 Jack Lloyd
6 * (C) 2014-2015 Etienne Cimon
7 *      2006 Luca Piccarreta
8 *
9 * License:
10 * Botan is released under the Simplified BSD License (see LICENSE.md)
11 */
12 module botan_math.mp_word;
13 import botan_math.mul128;
14 public import botan_math.mp_types;
15 /*
16 * Word Multiply/Add
17 */
18 word word_madd2(word a, word b, word* c)
19 {
20 	static if (BOTAN_HAS_MP_DWORD) {
21 		const dword s = cast(dword)(a) * b + *c;
22 		*c = cast(word)(s >> BOTAN_MP_WORD_BITS);
23 		return cast(word)(s);
24 	} else {
25 		version(D_InlineAsm_X86_64) {
26 			word* _a = &a;
27 			word* _b = &b;
28 			asm pure nothrow @nogc {
29 				mov R8, _a;
30 				mov R9, _b;
31 				mov RCX, c;
32 
33 				mov RAX, [R8];
34 				mov RBX, [R9];
35 				mul RBX;
36 				add RAX, [RCX];
37 				adc RDX, 0;
38 				mov [RCX], RDX;
39 				mov [R8], RAX;
40 			}
41 			return a;
42 		}
43 		else {
44 			static assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size");
45 			
46 			word[2] res;
47 			
48 			mul64x64_128(a, b, res);
49 			
50 			res[0] += *c;
51 			res[1] += (res[0] < *c); // carry?
52 			
53 			*c = res[1];
54 			return res[0];
55 		}
56 	}
57 }
58 
59 /*
60 * Word Multiply/Add
61 */
62 word word_madd3(word a, word b, word c, word* d)
63 {
64 	static if (BOTAN_HAS_MP_DWORD) {
65 		const dword s = cast(dword)(a) * b + c + *d;
66 		*d = cast(word)(s >> BOTAN_MP_WORD_BITS);
67 		return cast(word)(s);
68 	} else {
69 		version(D_InlineAsm_X86_64) {
70 			word* _a = &a;
71 			word* _b = &b;
72 			word* _c = &c;
73 			asm pure nothrow @nogc {
74 				mov R8, _a;
75 				mov R9, _b;
76 				mov R10, _c;
77 
78 				mov RAX, [R8];
79 				mov RBX, [R9];
80 				mul RBX;
81 				mov RBX, d;
82 				add RAX, [R10];
83 				adc RDX, 0;
84 				add RAX, [RBX];
85 				adc RDX, 0;
86 				mov [RBX], RDX;
87 				mov [R8], RAX;
88 			}
89 			return a;
90 		}
91 		else {
92 			static assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size");
93 			
94 			word[2] res;
95 			
96 			mul64x64_128(a, b, res);
97 			
98 			res[0] += c;
99 			res[1] += (res[0] < c); // carry?
100 			
101 			res[0] += *d;
102 			res[1] += (res[0] < *d); // carry?
103 			
104 			*d = res[1];
105 			return res[0];
106 		}
107 	}
108 }
109 
110 
111 /*
112 * Word Addition
113 */
114 word word_add(word x, word y, word* carry)
115 {
116 	word z = x + y;
117 	word c1 = (z < x);
118 	z += *carry;
119 	*carry = c1 | (z < *carry);
120 	return z;
121 }
122 
123 /*
124 * Eight Word Block Addition, Two Argument
125 */
126 word word8_add2(ref word[8] x, const ref word[8] y, word carry)
127 {
128 	version (D_InlineAsm_X86_64) {
129 		word* _x = x.ptr;
130 		word* _y = cast(word*)y.ptr;
131 		word* _carry = &carry;
132 
133 		asm pure nothrow @nogc {
134 			mov RDI,_x;
135 			mov RSI,_y;
136 			mov RCX,_carry;
137 			xor RAX,RAX;
138 			sub RAX,[RCX]; //force CF=1 iff *carry==1
139 			mov RAX,[RSI];
140 			adc [RDI],RAX;
141 			
142 			mov RAX,[RSI+8];
143 			adc [RDI+8],RAX;
144 			mov RAX,[RSI+16];
145 			adc [RDI+16],RAX;
146 			mov RAX,[RSI+24];
147 			adc [RDI+24],RAX;
148 			mov RAX,[RSI+32];
149 			adc [RDI+32],RAX;
150 			mov RAX,[RSI+40];
151 			adc [RDI+40],RAX;
152 			mov RAX,[RSI+48];
153 			adc [RDI+48],RAX;
154 			mov RAX,[RSI+56];
155 			adc [RDI+56],RAX;
156 			sbb RAX,RAX;
157 			neg RAX;
158 			mov carry, RAX;
159 		}
160 		return carry;
161 	} else version (D_InlineAsm_X86) {
162 			
163 		word* _x = x.ptr;
164 		word* _y = cast(word*)y.ptr;
165 		word* _carry = &carry;
166 		asm pure nothrow @nogc {
167 			mov EDI,_x;
168 			mov ESI,_y;
169 			mov ECX,_carry;
170 			xor EAX,EAX;
171 			sub EAX,[ECX]; //force CF=1 iff *carry==1
172 			mov EAX,[ESI];
173 			adc [EDI],EAX;
174 			mov EAX,[ESI+4];
175 			adc [EDI+4],EAX;
176 			mov EAX,[ESI+8];
177 			adc [EDI+8],EAX;
178 			mov EAX,[ESI+12];
179 			adc [EDI+12],EAX;
180 			mov EAX,[ESI+16];
181 			adc [EDI+16],EAX;
182 			mov EAX,[ESI+20];
183 			adc [EDI+20],EAX;
184 			mov EAX,[ESI+24];
185 			adc [EDI+24],EAX;
186 			mov EAX,[ESI+28];
187 			adc [EDI+28],EAX;
188 			sbb EAX,EAX;
189 			neg EAX;
190 			mov carry, EAX;
191 		}
192 		return carry;
193 	} else {
194 		void word_add_i(size_t i) {
195 			word z = x.ptr[i] + y.ptr[i];
196 			word c1 = (z < x.ptr[i]);
197 			z += carry;
198 			carry = c1 | (z < carry);
199 			x.ptr[i] = z;
200 		}
201 		word_add_i(0);
202 		word_add_i(1);
203 		word_add_i(2);
204 		word_add_i(3);
205 		word_add_i(4);
206 		word_add_i(5);
207 		word_add_i(6);
208 		word_add_i(7);
209 		return carry;
210 	}
211 }
212 
213 /*
214 * Eight Word Block Addition, Three Argument
215 */
216 word word8_add3(ref word[8] z, const ref word[8] x, const ref word[8] y, word carry)
217 {
218 	version(D_InlineAsm_X86_64) {
219 
220 		word* _z = z.ptr;
221 		clearMem(_z, z.length);
222 		word* _x = cast(word*)x.ptr;
223 		word* _y = cast(word*)y.ptr;
224 		word* _carry = &carry;
225 		asm pure nothrow @nogc {
226 
227 			mov RBX,_x;
228 			mov RSI,_y;
229 			mov RDI,_z;
230 			mov RCX,_carry;
231 			xor RAX,RAX;
232 			sub RAX,[RCX]; //force CF=1 iff *carry==1
233 			mov RAX,[RBX];
234 			adc RAX,[RSI];
235 			mov [RDI],RAX;
236 				
237 			mov RAX,[RBX+8];
238 			adc RAX,[RSI+8];
239 			mov [RDI+8],RAX;
240 				
241 			mov RAX,[RBX+16];
242 			adc RAX,[RSI+16];
243 			mov [RDI+16],RAX;
244 				
245 			mov RAX,[RBX+24];
246 			adc RAX,[RSI+24];
247 			mov [RDI+24],RAX;
248 				
249 			mov RAX,[RBX+32];
250 			adc RAX,[RSI+32];
251 			mov [RDI+32],RAX;
252 				
253 			mov RAX,[RBX+40];
254 			adc RAX,[RSI+40];
255 			mov [RDI+40],RAX;
256 				
257 			mov RAX,[RBX+48];
258 			adc RAX,[RSI+48];
259 			mov [RDI+48],RAX;
260 				
261 			mov RAX,[RBX+56];
262 			adc RAX,[RSI+56];
263 			mov [RDI+56],RAX;
264 				
265 			sbb RAX,RAX;
266 			neg RAX;
267 			mov carry, RAX;
268 		}
269 		return carry;
270 	} else version (D_InlineAsm_X86) {
271 		word* _z = z.ptr;
272 		clearMem(_z, z.length);
273 		word* _x = cast(word*)x.ptr;
274 		word* _y = cast(word*)y.ptr;
275 		word* _carry = &carry;
276 		asm pure nothrow @nogc {
277 			
278 			mov EBX,_x;
279 			mov ESI,_y;
280 			mov EDI,_z;
281 			mov ECX,_carry;
282 			xor EAX,EAX;
283 			sub EAX,[ECX]; //force CF=1 iff *carry==1
284 			mov EAX,[EBX];
285 			adc EAX,[ESI];
286 			mov [EDI],EAX;
287 			
288 			mov EAX,[EBX+4];
289 			adc EAX,[ESI+4];
290 			mov [EDI+4],EAX;
291 			
292 			mov EAX,[EBX+8];
293 			adc EAX,[ESI+8];
294 			mov [EDI+8],EAX;
295 			
296 			mov EAX,[EBX+12];
297 			adc EAX,[ESI+12];
298 			mov [EDI+12],EAX;
299 			
300 			mov EAX,[EBX+16];
301 			adc EAX,[ESI+16];
302 			mov [EDI+16],EAX;
303 			
304 			mov EAX,[EBX+20];
305 			adc EAX,[ESI+20];
306 			mov [EDI+20],EAX;
307 			
308 			mov EAX,[EBX+24];
309 			adc EAX,[ESI+24];
310 			mov [EDI+24],EAX;
311 			
312 			mov EAX,[EBX+28];
313 			adc EAX,[ESI+28];
314 			mov [EDI+28],EAX;
315 			
316 			sbb EAX,EAX;
317 			neg EAX;
318 			mov carry, EAX;
319 		}
320 		return carry;
321 	}
322 	else {
323 		z[0] = word_add(x[0], y[0], &carry);
324 		z[1] = word_add(x[1], y[1], &carry);
325 		z[2] = word_add(x[2], y[2], &carry);
326 		z[3] = word_add(x[3], y[3], &carry);
327 		z[4] = word_add(x[4], y[4], &carry);
328 		z[5] = word_add(x[5], y[5], &carry);
329 		z[6] = word_add(x[6], y[6], &carry);
330 		z[7] = word_add(x[7], y[7], &carry);
331 		return carry;
332 	}
333 }
334 
335 /*
336 * Word Subtraction
337 */
338 word word_sub(word x, word y, word* carry)
339 {
340 	word t0 = x - y;
341 	word c1 = (t0 > x);
342 	word z = t0 - *carry;
343 	*carry = c1 | (z > t0);
344 	return z;
345 }
346 
347 /*
348 * Eight Word Block Subtraction, Two Argument
349 */
350 word word8_sub2(ref word[8] x, const ref word[8] y, word carry)
351 {
352 	version(D_InlineAsm_X86_64) {
353 		word* _x = x.ptr;
354 		word[8] ret;
355 		word* _z = ret.ptr;
356 		word* _y = cast(word*)y.ptr;
357 		word* _carry = &carry;
358 		asm pure nothrow @nogc {
359 			mov RBX,_x;
360 			mov RSI,_y;
361 			mov RDI, _z;
362 			mov RCX,_carry;
363 			xor RAX,RAX;
364 			sub RAX,[RCX]; //force CF=1 iff *carry==1
365 			mov RAX,[RBX];
366 			sbb RAX,[RSI];
367 			mov [RDI],RAX;
368 			mov RAX,[RBX+8];
369 			sbb RAX,[RSI+8];
370 			mov [RDI+8],RAX;
371 			mov RAX,[RBX+16];
372 			sbb RAX,[RSI+16];
373 			mov [RDI+16],RAX;
374 			mov RAX,[RBX+24];
375 			sbb RAX,[RSI+24];
376 			mov [RDI+24],RAX;
377 			mov RAX,[RBX+32];
378 			sbb RAX,[RSI+32];
379 			mov [RDI+32],RAX;
380 			mov RAX,[RBX+40];
381 			sbb RAX,[RSI+40];
382 			mov [RDI+40],RAX;
383 			mov RAX,[RBX+48];
384 			sbb RAX,[RSI+48];
385 			mov [RDI+48],RAX;
386 			mov RAX,[RBX+56];
387 			sbb RAX,[RSI+56];
388 			mov [RDI+56],RAX;
389 			sbb RAX,RAX;
390 			neg RAX;
391 			mov carry, RAX;
392 		}
393 		x[0 .. 8] = ret[0 .. 8];
394 		return carry;
395 
396 	}
397 	else version (D_InlineAsm_X86) {
398 		word* _x = x.ptr;
399 		word* _y = cast(word*)y.ptr;
400 		word[8] ret;
401 		word* _z = ret.ptr;
402 		word* _carry = &carry;
403 		asm pure nothrow @nogc {
404 			mov EBX,_x;
405 			mov EDI,_z;
406 			mov ESI,_y;
407 			mov ECX,_carry;
408 			xor EAX,EAX;
409 			sub EAX,[ECX]; //force CF=1 iff *carry==1
410 			mov EAX,[EBX];
411 			sbb EAX,[ESI];
412 			mov [EDI],EAX;
413 			mov EAX,[EBX+4];
414 			sbb EAX,[ESI+4];
415 			mov [EDI+4],EAX;
416 			mov EAX,[EBX+8];
417 			sbb EAX,[ESI+8];
418 			mov [EDI+8],EAX;
419 			mov EAX,[EBX+12];
420 			sbb EAX,[ESI+12];
421 			mov [EDI+12],EAX;
422 			mov EAX,[EBX+16];
423 			sbb EAX,[ESI+16];
424 			mov [EDI+16],EAX;
425 			mov EAX,[EBX+20];
426 			sbb EAX,[ESI+20];
427 			mov [EDI+20],EAX;
428 			mov EAX,[EBX+24];
429 			sbb EAX,[ESI+24];
430 			mov [EDI+24],EAX;
431 			mov EAX,[EBX+28];
432 			sbb EAX,[ESI+28];
433 			mov [EDI+28],EAX;
434 			sbb EAX,EAX;
435 			neg EAX;
436 			mov carry, EAX;
437 		}
438 		x[0 .. 8] = ret[0 .. 8];
439 		return carry;
440 
441 	} else {
442 		x[0] = word_sub(x[0], y[0], &carry);
443 		x[1] = word_sub(x[1], y[1], &carry);
444 		x[2] = word_sub(x[2], y[2], &carry);
445 		x[3] = word_sub(x[3], y[3], &carry);
446 		x[4] = word_sub(x[4], y[4], &carry);
447 		x[5] = word_sub(x[5], y[5], &carry);
448 		x[6] = word_sub(x[6], y[6], &carry);
449 		x[7] = word_sub(x[7], y[7], &carry);
450 		return carry;
451 	}
452 }
453 
454 /*
455 * Eight Word Block Subtraction, Two Argument
456 */
457 word word8_sub2_rev(ref word[8] x, const ref word[8] y, word carry)
458 {
459 	x[0] = word_sub(y[0], x[0], &carry);
460 	x[1] = word_sub(y[1], x[1], &carry);
461 	x[2] = word_sub(y[2], x[2], &carry);
462 	x[3] = word_sub(y[3], x[3], &carry);
463 	x[4] = word_sub(y[4], x[4], &carry);
464 	x[5] = word_sub(y[5], x[5], &carry);
465 	x[6] = word_sub(y[6], x[6], &carry);
466 	x[7] = word_sub(y[7], x[7], &carry);
467 	return carry;
468 }
469 
470 /*
471 * Eight Word Block Subtraction, Three Argument
472 */
473 word word8_sub3(ref word[8] z, const ref word[8] x, const ref word[8] y, word carry)
474 {
475 	version(D_InlineAsm_X86_64) {
476 		word* _z = z.ptr;
477 		clearMem(_z, z.length);
478 		
479 		word* _x = cast(word*)x.ptr;
480 		word* _y = cast(word*)y.ptr;
481 		word* _carry = &carry;
482 		asm pure nothrow @nogc {
483 			mov RBX,_x;
484 			mov RSI,_y;
485 			mov RCX,_carry;
486 			xor RAX,RAX;
487 			sub RAX,[RCX]; //force CF=1 iff *carry==1
488 			mov RDI,_z;
489 			mov RAX,[RBX];
490 			sbb RAX,[RSI];
491 			mov [RDI],RAX;
492 			mov RAX,[RBX+8];
493 			sbb RAX,[RSI+8];
494 			mov [RDI+8],RAX;
495 			mov RAX,[RBX+16];
496 			sbb RAX,[RSI+16];
497 			mov [RDI+16],RAX;
498 			mov RAX,[RBX+24];
499 			sbb RAX,[RSI+24];
500 			mov [RDI+24],RAX;
501 			mov RAX,[RBX+32];
502 			sbb RAX,[RSI+32];
503 			mov [RDI+32],RAX;
504 			mov RAX,[RBX+40];
505 			sbb RAX,[RSI+40];
506 			mov [RDI+40],RAX;
507 			mov RAX,[RBX+48];
508 			sbb RAX,[RSI+48];
509 			mov [RDI+48],RAX;
510 			mov RAX,[RBX+56];
511 			sbb RAX,[RSI+56];
512 			mov [RDI+56],RAX;
513 			sbb RAX,RAX;
514 			neg RAX;
515 			mov carry, RAX;
516 		}
517 		return carry;
518 	} else version (D_InlineAsm_X86) {
519 
520 		word* _z = z.ptr;
521 		word* _x = cast(word*)x.ptr;
522 		word* _y = cast(word*)y.ptr;
523 		word* _carry = &carry;
524 		asm {
525 			mov EBX,_x;
526 			mov ESI,_y;
527 			mov ECX,_carry;
528 			xor EAX,EAX;
529 			sub EAX,[ECX]; //force CF=1 iff *carry==1
530 			mov EDI,_z;
531 			mov EAX,[EBX];
532 			sbb EAX,[ESI];
533 			mov [EDI],EAX;
534 			mov EAX,[EBX+4];
535 			sbb EAX,[ESI+4];
536 			mov [EDI+4],EAX;
537 			mov EAX,[EBX+8];
538 			sbb EAX,[ESI+8];
539 			mov [EDI+8],EAX;
540 			mov EAX,[EBX+12];
541 			sbb EAX,[ESI+12];
542 			mov [EDI+12],EAX;
543 			mov EAX,[EBX+16];
544 			sbb EAX,[ESI+16];
545 			mov [EDI+16],EAX;
546 			mov EAX,[EBX+20];
547 			sbb EAX,[ESI+20];
548 			mov [EDI+20],EAX;
549 			mov EAX,[EBX+24];
550 			sbb EAX,[ESI+24];
551 			mov [EDI+24],EAX;
552 			mov EAX,[EBX+28];
553 			sbb EAX,[ESI+28];
554 			mov [EDI+28],EAX;
555 			sbb EAX,EAX;
556 			neg EAX;
557 			mov carry, EAX;
558 		}
559 		return carry;
560 	}
561 	else {
562 		z[0] = word_sub(x[0], y[0], &carry);
563 		z[1] = word_sub(x[1], y[1], &carry);
564 		z[2] = word_sub(x[2], y[2], &carry);
565 		z[3] = word_sub(x[3], y[3], &carry);
566 		z[4] = word_sub(x[4], y[4], &carry);
567 		z[5] = word_sub(x[5], y[5], &carry);
568 		z[6] = word_sub(x[6], y[6], &carry);
569 		z[7] = word_sub(x[7], y[7], &carry);
570 		return carry;
571 	}
572 }
573 
574 /*
575 * Eight Word Block Linear Multiplication
576 */
577 word word8_linmul2(ref word[8] x, word y, word carry)
578 {
579 	version(D_InlineAsm_X86_64) {
580 		word* _x = x.ptr;
581 		word[8] ret;
582 		word* _z = ret.ptr;
583 		word* _carry = &carry;
584 		asm pure nothrow @nogc {
585 			mov RSI, _x;
586 			mov RDI, _z;
587 			mov RDX, _carry;
588 			mov RCX, [RDX];
589 			
590 			mov RAX, [RSI];
591 			mov RBX, y;
592 			mul RBX;
593 			add RAX, RCX;
594 			adc RDX, 0;
595 			mov RCX, RDX;
596 			mov [RDI], RAX;
597 			
598 			mov RAX, [RSI+8];
599 			mov RBX, y;
600 			mul RBX;
601 			add RAX, RCX;
602 			adc RDX, 0;
603 			mov RCX, RDX;
604 			mov [RDI+8], RAX;
605 			
606 			mov RAX, [RSI+16];
607 			mov RBX, y;
608 			mul RBX;
609 			add RAX, RCX;
610 			adc RDX, 0;
611 			mov RCX, RDX;
612 			mov [RDI+16], RAX;
613 			
614 			mov RAX, [RSI+24];
615 			mov RBX, y;
616 			mul RBX;
617 			add RAX, RCX;
618 			adc RDX, 0;
619 			mov RCX, RDX;
620 			mov [RDI+24], RAX;
621 			
622 			mov RAX, [RSI+32];
623 			mov RBX, y;
624 			mul RBX;
625 			add RAX, RCX;
626 			adc RDX, 0;
627 			mov RCX, RDX;
628 			mov [RDI+32], RAX;
629 			
630 			mov RAX, [RSI+40];
631 			mov RBX, y;
632 			mul RBX;
633 			add RAX, RCX;
634 			adc RDX, 0;
635 			mov RCX, RDX;
636 			mov [RDI+40], RAX;
637 			
638 			mov RAX, [RSI+48];
639 			mov RBX, y;
640 			mul RBX;
641 			add RAX, RCX;
642 			adc RDX, 0;
643 			mov RCX, RDX;
644 			mov [RDI+48], RAX;
645 			
646 			mov RAX, [RSI+56];
647 			mov RBX, y;
648 			mul RBX;
649 			add RAX, RCX;
650 			adc RDX, 0;
651 			mov carry, RDX;
652 			mov [RDI+56], RAX;
653 		}
654 		x[0 .. 8] = ret[0 .. 8];
655 		return carry;
656 	}
657 	else {
658 		x[0] = word_madd2(x[0], y, &carry);
659 		x[1] = word_madd2(x[1], y, &carry);
660 		x[2] = word_madd2(x[2], y, &carry);
661 		x[3] = word_madd2(x[3], y, &carry);
662 		x[4] = word_madd2(x[4], y, &carry);
663 		x[5] = word_madd2(x[5], y, &carry);
664 		x[6] = word_madd2(x[6], y, &carry);
665 		x[7] = word_madd2(x[7], y, &carry);
666 		return carry;
667 	}
668 }
669 
670 /*
671 * Eight Word Block Linear Multiplication
672 */
673 word word8_linmul3(ref word[8] z, const ref word[8] x, word y, word carry)
674 {
675 	
676 	version(D_InlineAsm_X86_64) {
677 		word* _x = cast(word*)x.ptr;
678 		word* _z = z.ptr;
679 		word* _carry = &carry;
680 		clearMem(_z, z.length);
681 		asm pure nothrow @nogc {
682 			mov RSI, _x;
683 			mov RDI, _z;
684 			mov RDX, _carry;
685 			mov RCX, [RDX];
686 
687 			mov RAX, [RSI];
688 			mov RBX, y;
689 			mul RBX;
690 			add RAX, RCX;
691 			adc RDX, 0;
692 			mov RCX, RDX;
693 			mov [RDI], RAX;
694 			
695 			mov RAX, [RSI+8];
696 			mov RBX, y;
697 			mul RBX;
698 			add RAX, RCX;
699 			adc RDX, 0;
700 			mov RCX, RDX;
701 			mov [RDI+8], RAX;
702 			
703 			mov RAX, [RSI+16];
704 			mov RBX, y;
705 			mul RBX;
706 			add RAX, RCX;
707 			adc RDX, 0;
708 			mov RCX, RDX;
709 			mov [RDI+16], RAX;
710 		
711 			mov RAX, [RSI+24];
712 			mov RBX, y;
713 			mul RBX;
714 			add RAX, RCX;
715 			adc RDX, 0;
716 			mov RCX, RDX;
717 			mov [RDI+24], RAX;
718 			
719 			mov RAX, [RSI+32];
720 			mov RBX, y;
721 			mul RBX;
722 			add RAX, RCX;
723 			adc RDX, 0;
724 			mov RCX, RDX;
725 			mov [RDI+32], RAX;
726 			
727 			mov RAX, [RSI+40];
728 			mov RBX, y;
729 			mul RBX;
730 			add RAX, RCX;
731 			adc RDX, 0;
732 			mov RCX, RDX;
733 			mov [RDI+40], RAX;
734 			
735 			mov RAX, [RSI+48];
736 			mov RBX, y;
737 			mul RBX;
738 			add RAX, RCX;
739 			adc RDX, 0;
740 			mov RCX, RDX;
741 			mov [RDI+48], RAX;
742 			
743 			mov RAX, [RSI+56];
744 			mov RBX, y;
745 			mul RBX;
746 			add RAX, RCX;
747 			adc RDX, 0;
748 			mov carry, RDX;
749 			mov [RDI+56], RAX;
750 		}
751 		return carry;
752 	}
753 	else {
754 		z[0] = word_madd2(x[0], y, &carry);
755 		z[1] = word_madd2(x[1], y, &carry);
756 		z[2] = word_madd2(x[2], y, &carry);
757 		z[3] = word_madd2(x[3], y, &carry);
758 		z[4] = word_madd2(x[4], y, &carry);
759 		z[5] = word_madd2(x[5], y, &carry);
760 		z[6] = word_madd2(x[6], y, &carry);
761 		z[7] = word_madd2(x[7], y, &carry);
762 		return carry;
763 	}
764 }
765 
766 /*
767 * Eight Word Block Multiply/Add
768 */
769 word word8_madd3(ref word[8] z, const ref word[8] x, word y, word carry)
770 {
771 	version(D_InlineAsm_X86_64) {
772 		word* _x = cast(word*)x.ptr;
773 		word* _z = z.ptr;
774 		word* _carry = &carry;
775 		word[8] ret; word* _z1 = ret.ptr;
776 		asm pure nothrow @nogc {
777 			mov R8, _x;
778 			mov RSI, _z;
779 			mov R10, y;
780 			mov RDI, _z1;
781 			mov RDX, _carry;
782 			mov RCX, [RDX];
783 			
784 			mov RAX, [R8];
785 			mov RBX, R10;
786 			mul RBX;
787 			add RAX, [RSI];
788 			adc RDX, 0;
789 			add RAX, RCX;
790 			adc RDX, 0;
791 			mov RCX, RDX;
792 			mov [RDI], RAX;
793 			add R8, 8;
794 			
795 			mov RAX, [R8];
796 			mov RBX, R10;
797 			mul RBX;
798 			add RAX, [RSI+8];
799 			adc RDX, 0;
800 			add RAX, RCX;
801 			adc RDX, 0;
802 			mov RCX, RDX;
803 			mov [RDI+8], RAX;
804 			add R8, 8;
805 			
806 			mov RAX, [R8];
807 			mov RBX, R10;
808 			mul RBX;
809 			add RAX, [RSI+16];
810 			adc RDX, 0;
811 			add RAX, RCX;
812 			adc RDX, 0;
813 			mov RCX, RDX;
814 			mov [RDI+16], RAX;
815 			add R8, 8;
816 
817 			mov RAX, [R8];
818 			mov RBX, R10;
819 			mul RBX;
820 			add RAX, [RSI+24];
821 			adc RDX, 0;
822 			add RAX, RCX;
823 			adc RDX, 0;
824 			mov RCX, RDX;
825 			mov [RDI+24], RAX;
826 			add R8, 8;
827 		
828 			mov RAX, [R8];
829 			mov RBX, R10;
830 			mul RBX;
831 			add RAX, [RSI+32];
832 			adc RDX, 0;
833 			add RAX, RCX;
834 			adc RDX, 0;
835 			mov RCX, RDX;
836 			mov [RDI+32], RAX;
837 			add R8, 8;
838 			
839 			mov RAX, [R8];
840 			mov RBX, R10;
841 			mul RBX;
842 			add RAX, [RSI+40];
843 			adc RDX, 0;
844 			add RAX, RCX;
845 			adc RDX, 0;
846 			mov RCX, RDX;
847 			mov [RDI+40], RAX;
848 			add R8, 8;
849 
850 			mov RAX, [R8];
851 			mov RBX, R10;
852 			mul RBX;
853 			add RAX, [RSI+48];
854 			adc RDX, 0;
855 			add RAX, RCX;
856 			adc RDX, 0;
857 			mov RCX, RDX;
858 			mov [RDI+48], RAX;
859 			add R8, 8;
860 
861 			mov RAX, [R8];
862 			mov RBX, R10;
863 			mul RBX;
864 			add RAX, [RSI+56];
865 			adc RDX, 0;
866 			add RAX, RCX;
867 			adc RDX, 0;
868 			mov carry, RDX;
869 			mov [RDI+56], RAX;
870 		}
871 		z[0 .. 8] = ret[0..8];
872 		return carry;
873 	} else {
874 		z[0] = word_madd3(x[0], y, z[0], &carry);
875 		z[1] = word_madd3(x[1], y, z[1], &carry);
876 		z[2] = word_madd3(x[2], y, z[2], &carry);
877 		z[3] = word_madd3(x[3], y, z[3], &carry);
878 		z[4] = word_madd3(x[4], y, z[4], &carry);
879 		z[5] = word_madd3(x[5], y, z[5], &carry);
880 		z[6] = word_madd3(x[6], y, z[6], &carry);
881 		z[7] = word_madd3(x[7], y, z[7], &carry);
882 		return carry;
883 	}
884 }
885 
886 /*
887 * Multiply-Add Accumulator
888 */
889 void word3_muladd(word* w2, word* w1, word* w0, word a, word b)
890 {
891 	version (D_InlineAsm_X86_64) {
892 
893 		word* _b = &b;
894 		word* _a = &a;
895 		asm pure nothrow @nogc {
896 			mov R13, w0;
897 			mov R14, w1;
898 			mov R15, w2;
899 			mov R8, _a;
900 			mov R9, _b;
901 			mov RAX, [R8];
902 			mov RBX, [R9];
903 			mul RBX;
904 			
905 			add [R13], RAX;
906 			adc [R14], RDX;
907 			adc [R15], 0;
908 
909 		}
910 	} else {
911 		word carry = *w0;
912 		*w0 = word_madd2(a, b, &carry);
913 		*w1 += carry;
914 		*w2 += (*w1 < carry) ? 1 : 0;
915 	}
916 }
917 
918 /*
919 * Multiply-Add Accumulator
920 */
921 void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b)
922 {
923 	version(D_InlineAsm_X86_64) {
924 		word* _a = &a;
925 		word* _b = &b;
926 
927 		asm pure nothrow @nogc {
928 			mov R13, w0;
929 			mov R14, w1;
930 			mov R15, w2;
931 			mov R8, _a;
932 			mov R9, _b;
933 
934 			mov RAX, [R8];
935 			mov RBX, [R9];
936 			mul RBX;
937 
938 			add [R13], RAX;
939 			adc [R14], RDX;
940 			adc [R15], 0;
941 
942 			add [R13], RAX;
943 			adc [R14], RDX;
944 			adc [R15], 0;
945 		}
946 	}
947 	else {
948 		word carry = 0;
949 		a = word_madd2(a, b, &carry);
950 		b = carry;
951 		
952 		word top = (b >> (BOTAN_MP_WORD_BITS-1));
953 		b <<= 1;
954 		b |= (a >> (BOTAN_MP_WORD_BITS-1));
955 		a <<= 1;
956 		
957 		carry = 0;
958 		*w0 = word_add(*w0, a, &carry);
959 		*w1 = word_add(*w1, b, &carry);
960 		*w2 = word_add(*w2, top, &carry);
961 	}
962 }