1 /** 2 * Comba word operations 3 * 4 * Copyright: 5 * (C) 1999-2010,2014 Jack Lloyd 6 * (C) 2014-2015 Etienne Cimon 7 * 2006 Luca Piccarreta 8 * 9 * License: 10 * Botan is released under the Simplified BSD License (see LICENSE.md) 11 */ 12 module botan_math.mp_word; 13 import botan_math.mul128; 14 public import botan_math.mp_types; 15 /* 16 * Word Multiply/Add 17 */ 18 word word_madd2(word a, word b, word* c) 19 { 20 static if (BOTAN_HAS_MP_DWORD) { 21 const dword s = cast(dword)(a) * b + *c; 22 *c = cast(word)(s >> BOTAN_MP_WORD_BITS); 23 return cast(word)(s); 24 } else { 25 version(D_InlineAsm_X86_64) { 26 word* _a = &a; 27 asm pure nothrow @nogc { 28 29 mov RAX, a; 30 mov RBX, b; 31 mul RBX; 32 mov RCX, c; 33 add RAX, [RCX]; 34 adc RDX, 0; 35 mov [RCX], RDX; 36 mov RBX, _a; 37 mov [RBX], RAX; 38 } 39 return a; 40 } 41 else { 42 static assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); 43 44 word[2] res; 45 46 mul64x64_128(a, b, res); 47 48 res[0] += *c; 49 res[1] += (res[0] < *c); // carry? 50 51 *c = res[1]; 52 return res[0]; 53 } 54 } 55 } 56 57 /* 58 * Word Multiply/Add 59 */ 60 word word_madd3(word a, word b, word c, word* d) 61 { 62 static if (BOTAN_HAS_MP_DWORD) { 63 const dword s = cast(dword)(a) * b + c + *d; 64 *d = cast(word)(s >> BOTAN_MP_WORD_BITS); 65 return cast(word)(s); 66 } else { 67 version(D_InlineAsm_X86_64) { 68 word* _a = &a; 69 asm pure nothrow @nogc { 70 mov RAX, a; 71 mov RBX, b; 72 mul RBX; 73 mov RBX, d; 74 add RAX, c; 75 adc RDX, 0; 76 add RAX, [RBX]; 77 adc RDX, 0; 78 mov [RBX], RDX; 79 mov RBX, _a; 80 mov [RBX], RAX; 81 } 82 return a; 83 } 84 else { 85 static assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); 86 87 word[2] res; 88 89 mul64x64_128(a, b, res); 90 91 res[0] += c; 92 res[1] += (res[0] < c); // carry? 93 94 res[0] += *d; 95 res[1] += (res[0] < *d); // carry? 96 97 *d = res[1]; 98 return res[0]; 99 } 100 } 101 } 102 103 104 /* 105 * Word Addition 106 */ 107 word word_add(word x, word y, word* carry) 108 { 109 word z = x + y; 110 word c1 = (z < x); 111 z += *carry; 112 *carry = c1 | (z < *carry); 113 return z; 114 } 115 116 /* 117 * Eight Word Block Addition, Two Argument 118 */ 119 word word8_add2(ref word[8] x, const ref word[8] y, word carry) 120 { 121 version (D_InlineAsm_X86_64) { 122 word* _x = x.ptr; 123 word* _y = cast(word*)y.ptr; 124 125 asm pure nothrow @nogc { 126 mov RDI,_x; 127 mov RSI,_y; 128 xor RAX,RAX; 129 sub RAX,carry; //force CF=1 iff *carry==1 130 mov RAX,[RSI]; 131 adc [RDI],RAX; 132 133 mov RAX,[RSI+8]; 134 adc [RDI+8],RAX; 135 mov RAX,[RSI+16]; 136 adc [RDI+16],RAX; 137 mov RAX,[RSI+24]; 138 adc [RDI+24],RAX; 139 mov RAX,[RSI+32]; 140 adc [RDI+32],RAX; 141 mov RAX,[RSI+40]; 142 adc [RDI+40],RAX; 143 mov RAX,[RSI+48]; 144 adc [RDI+48],RAX; 145 mov RAX,[RSI+56]; 146 adc [RDI+56],RAX; 147 sbb RAX,RAX; 148 neg RAX; 149 mov carry, RAX; 150 } 151 return carry; 152 } else version (D_InlineAsm_X86) { 153 154 word* _x = x.ptr; 155 word* _y = cast(word*)y.ptr; 156 asm pure nothrow @nogc { 157 mov EDI,_x; 158 mov ESI,_y; 159 xor EAX,EAX; 160 sub EAX,carry; //force CF=1 iff *carry==1 161 mov EAX,[ESI]; 162 adc [EDI],EAX; 163 mov EAX,[ESI+4]; 164 adc [EDI+4],EAX; 165 mov EAX,[ESI+8]; 166 adc [EDI+8],EAX; 167 mov EAX,[ESI+12]; 168 adc [EDI+12],EAX; 169 mov EAX,[ESI+16]; 170 adc [EDI+16],EAX; 171 mov EAX,[ESI+20]; 172 adc [EDI+20],EAX; 173 mov EAX,[ESI+24]; 174 adc [EDI+24],EAX; 175 mov EAX,[ESI+28]; 176 adc [EDI+28],EAX; 177 sbb EAX,EAX; 178 neg EAX; 179 mov carry, EAX; 180 } 181 return carry; 182 } else { 183 void word_add_i(size_t i) { 184 word z = x.ptr[i] + y.ptr[i]; 185 word c1 = (z < x.ptr[i]); 186 z += carry; 187 carry = c1 | (z < carry); 188 x.ptr[i] = z; 189 } 190 word_add_i(0); 191 word_add_i(1); 192 word_add_i(2); 193 word_add_i(3); 194 word_add_i(4); 195 word_add_i(5); 196 word_add_i(6); 197 word_add_i(7); 198 return carry; 199 } 200 } 201 202 /* 203 * Eight Word Block Addition, Three Argument 204 */ 205 word word8_add3(ref word[8] z, const ref word[8] x, const ref word[8] y, word carry) 206 { 207 version(D_InlineAsm_X86_64) { 208 209 word* _z = z.ptr; 210 word* _x = cast(word*)x.ptr; 211 word* _y = cast(word*)y.ptr; 212 asm pure nothrow @nogc { 213 214 mov RBX,_x; 215 mov RSI,_y; 216 mov RDI,_z; 217 xor RAX,RAX; 218 sub RAX,carry; //force CF=1 iff *carry==1 219 mov RAX,[RBX]; 220 adc RAX,[RSI]; 221 mov [RDI],RAX; 222 223 mov RAX,[RBX+8]; 224 adc RAX,[RSI+8]; 225 mov [RDI+8],RAX; 226 227 mov RAX,[RBX+16]; 228 adc RAX,[RSI+16]; 229 mov [RDI+16],RAX; 230 231 mov RAX,[RBX+24]; 232 adc RAX,[RSI+24]; 233 mov [RDI+24],RAX; 234 235 mov RAX,[RBX+32]; 236 adc RAX,[RSI+32]; 237 mov [RDI+32],RAX; 238 239 mov RAX,[RBX+40]; 240 adc RAX,[RSI+40]; 241 mov [RDI+40],RAX; 242 243 mov RAX,[RBX+48]; 244 adc RAX,[RSI+48]; 245 mov [RDI+48],RAX; 246 247 mov RAX,[RBX+56]; 248 adc RAX,[RSI+56]; 249 mov [RDI+56],RAX; 250 251 sbb RAX,RAX; 252 neg RAX; 253 mov carry, RAX; 254 } 255 return carry; 256 } else version (D_InlineAsm_X86) { 257 word* _z = z.ptr; 258 word* _x = cast(word*)x.ptr; 259 word* _y = cast(word*)y.ptr; 260 asm pure nothrow @nogc { 261 262 mov EBX,_x; 263 mov ESI,_y; 264 mov EDI,_z; 265 xor EAX,EAX; 266 sub EAX,carry; //force CF=1 iff *carry==1 267 mov EAX,[EBX]; 268 adc EAX,[ESI]; 269 mov [EDI],EAX; 270 271 mov EAX,[EBX+4]; 272 adc EAX,[ESI+4]; 273 mov [EDI+4],EAX; 274 275 mov EAX,[EBX+8]; 276 adc EAX,[ESI+8]; 277 mov [EDI+8],EAX; 278 279 mov EAX,[EBX+12]; 280 adc EAX,[ESI+12]; 281 mov [EDI+12],EAX; 282 283 mov EAX,[EBX+16]; 284 adc EAX,[ESI+16]; 285 mov [EDI+16],EAX; 286 287 mov EAX,[EBX+20]; 288 adc EAX,[ESI+20]; 289 mov [EDI+20],EAX; 290 291 mov EAX,[EBX+24]; 292 adc EAX,[ESI+24]; 293 mov [EDI+24],EAX; 294 295 mov EAX,[EBX+28]; 296 adc EAX,[ESI+28]; 297 mov [EDI+28],EAX; 298 299 sbb EAX,EAX; 300 neg EAX; 301 mov carry, EAX; 302 } 303 return carry; 304 } 305 else { 306 z[0] = word_add(x[0], y[0], &carry); 307 z[1] = word_add(x[1], y[1], &carry); 308 z[2] = word_add(x[2], y[2], &carry); 309 z[3] = word_add(x[3], y[3], &carry); 310 z[4] = word_add(x[4], y[4], &carry); 311 z[5] = word_add(x[5], y[5], &carry); 312 z[6] = word_add(x[6], y[6], &carry); 313 z[7] = word_add(x[7], y[7], &carry); 314 return carry; 315 } 316 } 317 318 /* 319 * Word Subtraction 320 */ 321 word word_sub(word x, word y, word* carry) 322 { 323 word t0 = x - y; 324 word c1 = (t0 > x); 325 word z = t0 - *carry; 326 *carry = c1 | (z > t0); 327 return z; 328 } 329 330 /* 331 * Eight Word Block Subtraction, Two Argument 332 */ 333 word word8_sub2(ref word[8] x, const ref word[8] y, word carry) 334 { 335 version(D_InlineAsm_X86_64) { 336 word* _x = x.ptr; 337 word[8] ret; 338 word* _z = ret.ptr; 339 word* _y = cast(word*)y.ptr; 340 asm pure nothrow @nogc { 341 mov RBX,_x; 342 mov RSI,_y; 343 mov RDI, _z; 344 xor RAX,RAX; 345 sub RAX,carry; //force CF=1 iff *carry==1 346 mov RAX,[RBX]; 347 sbb RAX,[RSI]; 348 mov [RDI],RAX; 349 mov RAX,[RBX+8]; 350 sbb RAX,[RSI+8]; 351 mov [RDI+8],RAX; 352 mov RAX,[RBX+16]; 353 sbb RAX,[RSI+16]; 354 mov [RDI+16],RAX; 355 mov RAX,[RBX+24]; 356 sbb RAX,[RSI+24]; 357 mov [RDI+24],RAX; 358 mov RAX,[RBX+32]; 359 sbb RAX,[RSI+32]; 360 mov [RDI+32],RAX; 361 mov RAX,[RBX+40]; 362 sbb RAX,[RSI+40]; 363 mov [RDI+40],RAX; 364 mov RAX,[RBX+48]; 365 sbb RAX,[RSI+48]; 366 mov [RDI+48],RAX; 367 mov RAX,[RBX+56]; 368 sbb RAX,[RSI+56]; 369 mov [RDI+56],RAX; 370 sbb RAX,RAX; 371 neg RAX; 372 mov carry, RAX; 373 } 374 x[0 .. 8] = ret[0 .. 8]; 375 return carry; 376 377 } 378 else version (D_InlineAsm_X86) { 379 word* _x = x.ptr; 380 word* _y = cast(word*)y.ptr; 381 word[8] ret; 382 word* _z = ret.ptr; 383 asm pure nothrow @nogc { 384 mov EBX,_x; 385 mov EDI,_z; 386 mov ESI,_y; 387 xor EAX,EAX; 388 sub EAX,carry; //force CF=1 iff *carry==1 389 mov EAX,[EBX]; 390 sbb EAX,[ESI]; 391 mov [EDI],EAX; 392 mov EAX,[EBX+4]; 393 sbb EAX,[ESI+4]; 394 mov [EDI+4],EAX; 395 mov EAX,[EBX+8]; 396 sbb EAX,[ESI+8]; 397 mov [EDI+8],EAX; 398 mov EAX,[EBX+12]; 399 sbb EAX,[ESI+12]; 400 mov [EDI+12],EAX; 401 mov EAX,[EBX+16]; 402 sbb EAX,[ESI+16]; 403 mov [EDI+16],EAX; 404 mov EAX,[EBX+20]; 405 sbb EAX,[ESI+20]; 406 mov [EDI+20],EAX; 407 mov EAX,[EBX+24]; 408 sbb EAX,[ESI+24]; 409 mov [EDI+24],EAX; 410 mov EAX,[EBX+28]; 411 sbb EAX,[ESI+28]; 412 mov [EDI+28],EAX; 413 sbb EAX,EAX; 414 neg EAX; 415 mov carry, EAX; 416 } 417 x[0 .. 8] = ret[0 .. 8]; 418 return carry; 419 420 } else { 421 x[0] = word_sub(x[0], y[0], &carry); 422 x[1] = word_sub(x[1], y[1], &carry); 423 x[2] = word_sub(x[2], y[2], &carry); 424 x[3] = word_sub(x[3], y[3], &carry); 425 x[4] = word_sub(x[4], y[4], &carry); 426 x[5] = word_sub(x[5], y[5], &carry); 427 x[6] = word_sub(x[6], y[6], &carry); 428 x[7] = word_sub(x[7], y[7], &carry); 429 return carry; 430 } 431 } 432 433 /* 434 * Eight Word Block Subtraction, Two Argument 435 */ 436 word word8_sub2_rev(ref word[8] x, const ref word[8] y, word carry) 437 { 438 x[0] = word_sub(y[0], x[0], &carry); 439 x[1] = word_sub(y[1], x[1], &carry); 440 x[2] = word_sub(y[2], x[2], &carry); 441 x[3] = word_sub(y[3], x[3], &carry); 442 x[4] = word_sub(y[4], x[4], &carry); 443 x[5] = word_sub(y[5], x[5], &carry); 444 x[6] = word_sub(y[6], x[6], &carry); 445 x[7] = word_sub(y[7], x[7], &carry); 446 return carry; 447 } 448 449 /* 450 * Eight Word Block Subtraction, Three Argument 451 */ 452 word word8_sub3(ref word[8] z, const ref word[8] x, const ref word[8] y, word carry) 453 { 454 version(D_InlineAsm_X86_64) { 455 word* _z = z.ptr; 456 clearMem(_z, z.length); 457 458 word* _x = cast(word*)x.ptr; 459 word* _y = cast(word*)y.ptr; 460 asm pure nothrow @nogc { 461 mov RBX,_x; 462 mov RSI,_y; 463 xor RAX,RAX; 464 sub RAX,carry; //force CF=1 iff *carry==1 465 mov RDI,_z; 466 mov RAX,[RBX]; 467 sbb RAX,[RSI]; 468 mov [RDI],RAX; 469 mov RAX,[RBX+8]; 470 sbb RAX,[RSI+8]; 471 mov [RDI+8],RAX; 472 mov RAX,[RBX+16]; 473 sbb RAX,[RSI+16]; 474 mov [RDI+16],RAX; 475 mov RAX,[RBX+24]; 476 sbb RAX,[RSI+24]; 477 mov [RDI+24],RAX; 478 mov RAX,[RBX+32]; 479 sbb RAX,[RSI+32]; 480 mov [RDI+32],RAX; 481 mov RAX,[RBX+40]; 482 sbb RAX,[RSI+40]; 483 mov [RDI+40],RAX; 484 mov RAX,[RBX+48]; 485 sbb RAX,[RSI+48]; 486 mov [RDI+48],RAX; 487 mov RAX,[RBX+56]; 488 sbb RAX,[RSI+56]; 489 mov [RDI+56],RAX; 490 sbb RAX,RAX; 491 neg RAX; 492 mov carry, RAX; 493 } 494 return carry; 495 } else version (D_InlineAsm_X86) { 496 497 word* _z = z.ptr; 498 word* _x = cast(word*)x.ptr; 499 word* _y = cast(word*)y.ptr; 500 asm { 501 mov EBX,_x; 502 mov ESI,_y; 503 xor EAX,EAX; 504 sub EAX,carry; //force CF=1 iff *carry==1 505 mov EDI,_z; 506 mov EAX,[EBX]; 507 sbb EAX,[ESI]; 508 mov [EDI],EAX; 509 mov EAX,[EBX+4]; 510 sbb EAX,[ESI+4]; 511 mov [EDI+4],EAX; 512 mov EAX,[EBX+8]; 513 sbb EAX,[ESI+8]; 514 mov [EDI+8],EAX; 515 mov EAX,[EBX+12]; 516 sbb EAX,[ESI+12]; 517 mov [EDI+12],EAX; 518 mov EAX,[EBX+16]; 519 sbb EAX,[ESI+16]; 520 mov [EDI+16],EAX; 521 mov EAX,[EBX+20]; 522 sbb EAX,[ESI+20]; 523 mov [EDI+20],EAX; 524 mov EAX,[EBX+24]; 525 sbb EAX,[ESI+24]; 526 mov [EDI+24],EAX; 527 mov EAX,[EBX+28]; 528 sbb EAX,[ESI+28]; 529 mov [EDI+28],EAX; 530 sbb EAX,EAX; 531 neg EAX; 532 mov carry, EAX; 533 } 534 return carry; 535 } 536 else { 537 z[0] = word_sub(x[0], y[0], &carry); 538 z[1] = word_sub(x[1], y[1], &carry); 539 z[2] = word_sub(x[2], y[2], &carry); 540 z[3] = word_sub(x[3], y[3], &carry); 541 z[4] = word_sub(x[4], y[4], &carry); 542 z[5] = word_sub(x[5], y[5], &carry); 543 z[6] = word_sub(x[6], y[6], &carry); 544 z[7] = word_sub(x[7], y[7], &carry); 545 return carry; 546 } 547 } 548 549 /* 550 * Eight Word Block Linear Multiplication 551 */ 552 word word8_linmul2(ref word[8] x, word y, word carry) 553 { 554 version(D_InlineAsm_X86_64) { 555 word* _x = x.ptr; 556 word[8] ret; 557 word* _z = ret.ptr; 558 asm pure nothrow @nogc { 559 mov RSI, _x; 560 mov RDI, _z; 561 mov RCX, carry; 562 563 mov RAX, [RSI]; 564 mov RBX, y; 565 mul RBX; 566 add RAX, RCX; 567 adc RDX, 0; 568 mov RCX, RDX; 569 mov [RDI], RAX; 570 571 mov RAX, [RSI+8]; 572 mov RBX, y; 573 mul RBX; 574 add RAX, RCX; 575 adc RDX, 0; 576 mov RCX, RDX; 577 mov [RDI+8], RAX; 578 579 mov RAX, [RSI+16]; 580 mov RBX, y; 581 mul RBX; 582 add RAX, RCX; 583 adc RDX, 0; 584 mov RCX, RDX; 585 mov [RDI+16], RAX; 586 587 mov RAX, [RSI+24]; 588 mov RBX, y; 589 mul RBX; 590 add RAX, RCX; 591 adc RDX, 0; 592 mov RCX, RDX; 593 mov [RDI+24], RAX; 594 595 mov RAX, [RSI+32]; 596 mov RBX, y; 597 mul RBX; 598 add RAX, RCX; 599 adc RDX, 0; 600 mov RCX, RDX; 601 mov [RDI+32], RAX; 602 603 mov RAX, [RSI+40]; 604 mov RBX, y; 605 mul RBX; 606 add RAX, RCX; 607 adc RDX, 0; 608 mov RCX, RDX; 609 mov [RDI+40], RAX; 610 611 mov RAX, [RSI+48]; 612 mov RBX, y; 613 mul RBX; 614 add RAX, RCX; 615 adc RDX, 0; 616 mov RCX, RDX; 617 mov [RDI+48], RAX; 618 619 mov RAX, [RSI+56]; 620 mov RBX, y; 621 mul RBX; 622 add RAX, RCX; 623 adc RDX, 0; 624 mov carry, RDX; 625 mov [RDI+56], RAX; 626 } 627 x[0 .. 8] = ret[0 .. 8]; 628 return carry; 629 } 630 else { 631 x[0] = word_madd2(x[0], y, &carry); 632 x[1] = word_madd2(x[1], y, &carry); 633 x[2] = word_madd2(x[2], y, &carry); 634 x[3] = word_madd2(x[3], y, &carry); 635 x[4] = word_madd2(x[4], y, &carry); 636 x[5] = word_madd2(x[5], y, &carry); 637 x[6] = word_madd2(x[6], y, &carry); 638 x[7] = word_madd2(x[7], y, &carry); 639 return carry; 640 } 641 } 642 643 /* 644 * Eight Word Block Linear Multiplication 645 */ 646 word word8_linmul3(ref word[8] z, const ref word[8] x, word y, word carry) 647 { 648 649 version(D_InlineAsm_X86_64) { 650 word* _x = cast(word*)x.ptr; 651 word* _z = z.ptr; 652 clearMem(_z, z.length); 653 asm pure nothrow @nogc { 654 mov RSI, _x; 655 mov RDI, _z; 656 mov RCX, carry; 657 658 mov RAX, [RSI]; 659 mov RBX, y; 660 mul RBX; 661 add RAX, RCX; 662 adc RDX, 0; 663 mov RCX, RDX; 664 mov [RDI], RAX; 665 666 mov RAX, [RSI+8]; 667 mov RBX, y; 668 mul RBX; 669 add RAX, RCX; 670 adc RDX, 0; 671 mov RCX, RDX; 672 mov [RDI+8], RAX; 673 674 mov RAX, [RSI+16]; 675 mov RBX, y; 676 mul RBX; 677 add RAX, RCX; 678 adc RDX, 0; 679 mov RCX, RDX; 680 mov [RDI+16], RAX; 681 682 mov RAX, [RSI+24]; 683 mov RBX, y; 684 mul RBX; 685 add RAX, RCX; 686 adc RDX, 0; 687 mov RCX, RDX; 688 mov [RDI+24], RAX; 689 690 mov RAX, [RSI+32]; 691 mov RBX, y; 692 mul RBX; 693 add RAX, RCX; 694 adc RDX, 0; 695 mov RCX, RDX; 696 mov [RDI+32], RAX; 697 698 mov RAX, [RSI+40]; 699 mov RBX, y; 700 mul RBX; 701 add RAX, RCX; 702 adc RDX, 0; 703 mov RCX, RDX; 704 mov [RDI+40], RAX; 705 706 mov RAX, [RSI+48]; 707 mov RBX, y; 708 mul RBX; 709 add RAX, RCX; 710 adc RDX, 0; 711 mov RCX, RDX; 712 mov [RDI+48], RAX; 713 714 mov RAX, [RSI+56]; 715 mov RBX, y; 716 mul RBX; 717 add RAX, RCX; 718 adc RDX, 0; 719 mov carry, RDX; 720 mov [RDI+56], RAX; 721 } 722 return carry; 723 } 724 else { 725 z[0] = word_madd2(x[0], y, &carry); 726 z[1] = word_madd2(x[1], y, &carry); 727 z[2] = word_madd2(x[2], y, &carry); 728 z[3] = word_madd2(x[3], y, &carry); 729 z[4] = word_madd2(x[4], y, &carry); 730 z[5] = word_madd2(x[5], y, &carry); 731 z[6] = word_madd2(x[6], y, &carry); 732 z[7] = word_madd2(x[7], y, &carry); 733 return carry; 734 } 735 } 736 737 /* 738 * Eight Word Block Multiply/Add 739 */ 740 word word8_madd3(ref word[8] z, const ref word[8] x, word y, word carry) 741 { 742 version(D_InlineAsm_X86_64) { 743 word* _x = cast(word*)x.ptr; 744 word* _z = z.ptr; 745 word[8] ret; word* _z1 = ret.ptr; 746 size_t word_size = word.sizeof; 747 asm pure nothrow @nogc { 748 mov R8, _x; 749 mov RSI, _z; 750 mov R10, y; 751 mov RDI, _z1; 752 mov RCX, carry; 753 754 mov RAX, [R8]; 755 mov RBX, R10; 756 mul RBX; 757 add RAX, [RSI]; 758 adc RDX, 0; 759 add RAX, RCX; 760 adc RDX, 0; 761 mov RCX, RDX; 762 mov [RDI], RAX; 763 add R8, 8; 764 765 mov RAX, [R8]; 766 mov RBX, R10; 767 mul RBX; 768 add RAX, [RSI+8]; 769 adc RDX, 0; 770 add RAX, RCX; 771 adc RDX, 0; 772 mov RCX, RDX; 773 mov [RDI+8], RAX; 774 add R8, 8; 775 776 mov RAX, [R8]; 777 mov RBX, R10; 778 mul RBX; 779 add RAX, [RSI+16]; 780 adc RDX, 0; 781 add RAX, RCX; 782 adc RDX, 0; 783 mov RCX, RDX; 784 mov [RDI+16], RAX; 785 add R8, 8; 786 787 mov RAX, [R8]; 788 mov RBX, R10; 789 mul RBX; 790 add RAX, [RSI+24]; 791 adc RDX, 0; 792 add RAX, RCX; 793 adc RDX, 0; 794 mov RCX, RDX; 795 mov [RDI+24], RAX; 796 add R8, 8; 797 798 mov RAX, [R8]; 799 mov RBX, R10; 800 mul RBX; 801 add RAX, [RSI+32]; 802 adc RDX, 0; 803 add RAX, RCX; 804 adc RDX, 0; 805 mov RCX, RDX; 806 mov [RDI+32], RAX; 807 add R8, 8; 808 809 mov RAX, [R8]; 810 mov RBX, R10; 811 mul RBX; 812 add RAX, [RSI+40]; 813 adc RDX, 0; 814 add RAX, RCX; 815 adc RDX, 0; 816 mov RCX, RDX; 817 mov [RDI+40], RAX; 818 add R8, 8; 819 820 mov RAX, [R8]; 821 mov RBX, R10; 822 mul RBX; 823 add RAX, [RSI+48]; 824 adc RDX, 0; 825 add RAX, RCX; 826 adc RDX, 0; 827 mov RCX, RDX; 828 mov [EDI+48], RAX; 829 add R8, 8; 830 831 mov RAX, [R8]; 832 mov RBX, R10; 833 mul RBX; 834 add RAX, [RSI+56]; 835 adc RDX, 0; 836 add RAX, RCX; 837 adc RDX, 0; 838 mov carry, RDX; 839 mov [RDI+56], RAX; 840 } 841 z[0 .. 8] = ret[0..8]; 842 return carry; 843 } else { 844 z[0] = word_madd3(x[0], y, z[0], &carry); 845 z[1] = word_madd3(x[1], y, z[1], &carry); 846 z[2] = word_madd3(x[2], y, z[2], &carry); 847 z[3] = word_madd3(x[3], y, z[3], &carry); 848 z[4] = word_madd3(x[4], y, z[4], &carry); 849 z[5] = word_madd3(x[5], y, z[5], &carry); 850 z[6] = word_madd3(x[6], y, z[6], &carry); 851 z[7] = word_madd3(x[7], y, z[7], &carry); 852 return carry; 853 } 854 } 855 856 /* 857 * Multiply-Add Accumulator 858 */ 859 void word3_muladd(word* w2, word* w1, word* w0, word a, word b) 860 { 861 version (D_InlineAsm_X86_64) { 862 863 asm pure nothrow @nogc { 864 mov R13, w0; 865 mov R14, w1; 866 mov R15, w2; 867 mov RAX, a; 868 mov RBX, b; 869 mul RBX; 870 871 add [R13], RAX; 872 adc [R14], RDX; 873 adc [R15], 0; 874 875 } 876 } else { 877 word carry = *w0; 878 *w0 = word_madd2(a, b, &carry); 879 *w1 += carry; 880 *w2 += (*w1 < carry) ? 1 : 0; 881 } 882 } 883 884 /* 885 * Multiply-Add Accumulator 886 */ 887 void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) 888 { 889 version(D_InlineAsm_X86_64) { 890 891 asm pure nothrow @nogc { 892 mov R13, w0; 893 mov R14, w1; 894 mov R15, w2; 895 896 mov RAX, a; 897 mov RBX, b; 898 mul RBX; 899 900 add [R13], RAX; 901 adc [R14], RDX; 902 adc [R15], 0; 903 904 add [R13], RAX; 905 adc [R14], RDX; 906 adc [R15], 0; 907 } 908 } 909 else { 910 word carry = 0; 911 a = word_madd2(a, b, &carry); 912 b = carry; 913 914 word top = (b >> (BOTAN_MP_WORD_BITS-1)); 915 b <<= 1; 916 b |= (a >> (BOTAN_MP_WORD_BITS-1)); 917 a <<= 1; 918 919 carry = 0; 920 *w0 = word_add(*w0, a, &carry); 921 *w1 = word_add(*w1, b, &carry); 922 *w2 = word_add(*w2, top, &carry); 923 } 924 }