1 /** 2 * Comba word operations 3 * 4 * Copyright: 5 * (C) 1999-2010,2014 Jack Lloyd 6 * (C) 2014-2015 Etienne Cimon 7 * 2006 Luca Piccarreta 8 * 9 * License: 10 * Botan is released under the Simplified BSD License (see LICENSE.md) 11 */ 12 module botan_math.mp_word; 13 import botan_math.mul128; 14 public import botan_math.mp_types; 15 /* 16 * Word Multiply/Add 17 */ 18 word word_madd2(word a, word b, word* c) 19 { 20 static if (BOTAN_HAS_MP_DWORD) { 21 const dword s = cast(dword)(a) * b + *c; 22 *c = cast(word)(s >> BOTAN_MP_WORD_BITS); 23 return cast(word)(s); 24 } else { 25 version(D_InlineAsm_X86_64) { 26 word* _a = &a; 27 word* _b = &b; 28 asm pure nothrow @nogc { 29 mov R8, _a; 30 mov R9, _b; 31 mov RCX, c; 32 33 mov RAX, [R8]; 34 mov RBX, [R9]; 35 mul RBX; 36 add RAX, [RCX]; 37 adc RDX, 0; 38 mov [RCX], RDX; 39 mov [R8], RAX; 40 } 41 return a; 42 } 43 else { 44 static assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); 45 46 word[2] res; 47 48 mul64x64_128(a, b, res); 49 50 res[0] += *c; 51 res[1] += (res[0] < *c); // carry? 52 53 *c = res[1]; 54 return res[0]; 55 } 56 } 57 } 58 59 /* 60 * Word Multiply/Add 61 */ 62 word word_madd3(word a, word b, word c, word* d) 63 { 64 static if (BOTAN_HAS_MP_DWORD) { 65 const dword s = cast(dword)(a) * b + c + *d; 66 *d = cast(word)(s >> BOTAN_MP_WORD_BITS); 67 return cast(word)(s); 68 } else { 69 version(D_InlineAsm_X86_64) { 70 word* _a = &a; 71 word* _b = &b; 72 word* _c = &c; 73 asm pure nothrow @nogc { 74 mov R8, _a; 75 mov R9, _b; 76 mov R10, _c; 77 78 mov RAX, [R8]; 79 mov RBX, [R9]; 80 mul RBX; 81 mov RBX, d; 82 add RAX, [R10]; 83 adc RDX, 0; 84 add RAX, [RBX]; 85 adc RDX, 0; 86 mov [RBX], RDX; 87 mov [R8], RAX; 88 } 89 return a; 90 } 91 else { 92 static assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); 93 94 word[2] res; 95 96 mul64x64_128(a, b, res); 97 98 res[0] += c; 99 res[1] += (res[0] < c); // carry? 100 101 res[0] += *d; 102 res[1] += (res[0] < *d); // carry? 103 104 *d = res[1]; 105 return res[0]; 106 } 107 } 108 } 109 110 111 /* 112 * Word Addition 113 */ 114 word word_add(word x, word y, word* carry) 115 { 116 word z = x + y; 117 word c1 = (z < x); 118 z += *carry; 119 *carry = c1 | (z < *carry); 120 return z; 121 } 122 123 /* 124 * Eight Word Block Addition, Two Argument 125 */ 126 word word8_add2(ref word[8] x, const ref word[8] y, word carry) 127 { 128 version (D_InlineAsm_X86_64) { 129 word* _x = x.ptr; 130 word* _y = cast(word*)y.ptr; 131 word* _carry = &carry; 132 133 asm pure nothrow @nogc { 134 mov RDI,_x; 135 mov RSI,_y; 136 mov RCX,_carry; 137 xor RAX,RAX; 138 sub RAX,[RCX]; //force CF=1 iff *carry==1 139 mov RAX,[RSI]; 140 adc [RDI],RAX; 141 142 mov RAX,[RSI+8]; 143 adc [RDI+8],RAX; 144 mov RAX,[RSI+16]; 145 adc [RDI+16],RAX; 146 mov RAX,[RSI+24]; 147 adc [RDI+24],RAX; 148 mov RAX,[RSI+32]; 149 adc [RDI+32],RAX; 150 mov RAX,[RSI+40]; 151 adc [RDI+40],RAX; 152 mov RAX,[RSI+48]; 153 adc [RDI+48],RAX; 154 mov RAX,[RSI+56]; 155 adc [RDI+56],RAX; 156 sbb RAX,RAX; 157 neg RAX; 158 mov carry, RAX; 159 } 160 return carry; 161 } else version (D_InlineAsm_X86) { 162 163 word* _x = x.ptr; 164 word* _y = cast(word*)y.ptr; 165 word* _carry = &carry; 166 asm pure nothrow @nogc { 167 mov EDI,_x; 168 mov ESI,_y; 169 mov ECX,_carry; 170 xor EAX,EAX; 171 sub EAX,[ECX]; //force CF=1 iff *carry==1 172 mov EAX,[ESI]; 173 adc [EDI],EAX; 174 mov EAX,[ESI+4]; 175 adc [EDI+4],EAX; 176 mov EAX,[ESI+8]; 177 adc [EDI+8],EAX; 178 mov EAX,[ESI+12]; 179 adc [EDI+12],EAX; 180 mov EAX,[ESI+16]; 181 adc [EDI+16],EAX; 182 mov EAX,[ESI+20]; 183 adc [EDI+20],EAX; 184 mov EAX,[ESI+24]; 185 adc [EDI+24],EAX; 186 mov EAX,[ESI+28]; 187 adc [EDI+28],EAX; 188 sbb EAX,EAX; 189 neg EAX; 190 mov carry, EAX; 191 } 192 return carry; 193 } else { 194 void word_add_i(size_t i) { 195 word z = x.ptr[i] + y.ptr[i]; 196 word c1 = (z < x.ptr[i]); 197 z += carry; 198 carry = c1 | (z < carry); 199 x.ptr[i] = z; 200 } 201 word_add_i(0); 202 word_add_i(1); 203 word_add_i(2); 204 word_add_i(3); 205 word_add_i(4); 206 word_add_i(5); 207 word_add_i(6); 208 word_add_i(7); 209 return carry; 210 } 211 } 212 213 /* 214 * Eight Word Block Addition, Three Argument 215 */ 216 word word8_add3(ref word[8] z, const ref word[8] x, const ref word[8] y, word carry) 217 { 218 version(D_InlineAsm_X86_64) { 219 220 word* _z = z.ptr; 221 clearMem(_z, z.length); 222 word* _x = cast(word*)x.ptr; 223 word* _y = cast(word*)y.ptr; 224 word* _carry = &carry; 225 asm pure nothrow @nogc { 226 227 mov RBX,_x; 228 mov RSI,_y; 229 mov RDI,_z; 230 mov RCX,_carry; 231 xor RAX,RAX; 232 sub RAX,[RCX]; //force CF=1 iff *carry==1 233 mov RAX,[RBX]; 234 adc RAX,[RSI]; 235 mov [RDI],RAX; 236 237 mov RAX,[RBX+8]; 238 adc RAX,[RSI+8]; 239 mov [RDI+8],RAX; 240 241 mov RAX,[RBX+16]; 242 adc RAX,[RSI+16]; 243 mov [RDI+16],RAX; 244 245 mov RAX,[RBX+24]; 246 adc RAX,[RSI+24]; 247 mov [RDI+24],RAX; 248 249 mov RAX,[RBX+32]; 250 adc RAX,[RSI+32]; 251 mov [RDI+32],RAX; 252 253 mov RAX,[RBX+40]; 254 adc RAX,[RSI+40]; 255 mov [RDI+40],RAX; 256 257 mov RAX,[RBX+48]; 258 adc RAX,[RSI+48]; 259 mov [RDI+48],RAX; 260 261 mov RAX,[RBX+56]; 262 adc RAX,[RSI+56]; 263 mov [RDI+56],RAX; 264 265 sbb RAX,RAX; 266 neg RAX; 267 mov carry, RAX; 268 } 269 return carry; 270 } else version (D_InlineAsm_X86) { 271 word* _z = z.ptr; 272 clearMem(_z, z.length); 273 word* _x = cast(word*)x.ptr; 274 word* _y = cast(word*)y.ptr; 275 word* _carry = &carry; 276 asm pure nothrow @nogc { 277 278 mov EBX,_x; 279 mov ESI,_y; 280 mov EDI,_z; 281 mov ECX,_carry; 282 xor EAX,EAX; 283 sub EAX,[ECX]; //force CF=1 iff *carry==1 284 mov EAX,[EBX]; 285 adc EAX,[ESI]; 286 mov [EDI],EAX; 287 288 mov EAX,[EBX+4]; 289 adc EAX,[ESI+4]; 290 mov [EDI+4],EAX; 291 292 mov EAX,[EBX+8]; 293 adc EAX,[ESI+8]; 294 mov [EDI+8],EAX; 295 296 mov EAX,[EBX+12]; 297 adc EAX,[ESI+12]; 298 mov [EDI+12],EAX; 299 300 mov EAX,[EBX+16]; 301 adc EAX,[ESI+16]; 302 mov [EDI+16],EAX; 303 304 mov EAX,[EBX+20]; 305 adc EAX,[ESI+20]; 306 mov [EDI+20],EAX; 307 308 mov EAX,[EBX+24]; 309 adc EAX,[ESI+24]; 310 mov [EDI+24],EAX; 311 312 mov EAX,[EBX+28]; 313 adc EAX,[ESI+28]; 314 mov [EDI+28],EAX; 315 316 sbb EAX,EAX; 317 neg EAX; 318 mov carry, EAX; 319 } 320 return carry; 321 } 322 else { 323 z[0] = word_add(x[0], y[0], &carry); 324 z[1] = word_add(x[1], y[1], &carry); 325 z[2] = word_add(x[2], y[2], &carry); 326 z[3] = word_add(x[3], y[3], &carry); 327 z[4] = word_add(x[4], y[4], &carry); 328 z[5] = word_add(x[5], y[5], &carry); 329 z[6] = word_add(x[6], y[6], &carry); 330 z[7] = word_add(x[7], y[7], &carry); 331 return carry; 332 } 333 } 334 335 /* 336 * Word Subtraction 337 */ 338 word word_sub(word x, word y, word* carry) 339 { 340 word t0 = x - y; 341 word c1 = (t0 > x); 342 word z = t0 - *carry; 343 *carry = c1 | (z > t0); 344 return z; 345 } 346 347 /* 348 * Eight Word Block Subtraction, Two Argument 349 */ 350 word word8_sub2(ref word[8] x, const ref word[8] y, word carry) 351 { 352 version(D_InlineAsm_X86_64) { 353 word* _x = x.ptr; 354 word[8] ret; 355 word* _z = ret.ptr; 356 word* _y = cast(word*)y.ptr; 357 word* _carry = &carry; 358 asm pure nothrow @nogc { 359 mov RBX,_x; 360 mov RSI,_y; 361 mov RDI, _z; 362 mov RCX,_carry; 363 xor RAX,RAX; 364 sub RAX,[RCX]; //force CF=1 iff *carry==1 365 mov RAX,[RBX]; 366 sbb RAX,[RSI]; 367 mov [RDI],RAX; 368 mov RAX,[RBX+8]; 369 sbb RAX,[RSI+8]; 370 mov [RDI+8],RAX; 371 mov RAX,[RBX+16]; 372 sbb RAX,[RSI+16]; 373 mov [RDI+16],RAX; 374 mov RAX,[RBX+24]; 375 sbb RAX,[RSI+24]; 376 mov [RDI+24],RAX; 377 mov RAX,[RBX+32]; 378 sbb RAX,[RSI+32]; 379 mov [RDI+32],RAX; 380 mov RAX,[RBX+40]; 381 sbb RAX,[RSI+40]; 382 mov [RDI+40],RAX; 383 mov RAX,[RBX+48]; 384 sbb RAX,[RSI+48]; 385 mov [RDI+48],RAX; 386 mov RAX,[RBX+56]; 387 sbb RAX,[RSI+56]; 388 mov [RDI+56],RAX; 389 sbb RAX,RAX; 390 neg RAX; 391 mov carry, RAX; 392 } 393 x[0 .. 8] = ret[0 .. 8]; 394 return carry; 395 396 } 397 else version (D_InlineAsm_X86) { 398 word* _x = x.ptr; 399 word* _y = cast(word*)y.ptr; 400 word[8] ret; 401 word* _z = ret.ptr; 402 word* _carry = &carry; 403 asm pure nothrow @nogc { 404 mov EBX,_x; 405 mov EDI,_z; 406 mov ESI,_y; 407 mov ECX,_carry; 408 xor EAX,EAX; 409 sub EAX,[ECX]; //force CF=1 iff *carry==1 410 mov EAX,[EBX]; 411 sbb EAX,[ESI]; 412 mov [EDI],EAX; 413 mov EAX,[EBX+4]; 414 sbb EAX,[ESI+4]; 415 mov [EDI+4],EAX; 416 mov EAX,[EBX+8]; 417 sbb EAX,[ESI+8]; 418 mov [EDI+8],EAX; 419 mov EAX,[EBX+12]; 420 sbb EAX,[ESI+12]; 421 mov [EDI+12],EAX; 422 mov EAX,[EBX+16]; 423 sbb EAX,[ESI+16]; 424 mov [EDI+16],EAX; 425 mov EAX,[EBX+20]; 426 sbb EAX,[ESI+20]; 427 mov [EDI+20],EAX; 428 mov EAX,[EBX+24]; 429 sbb EAX,[ESI+24]; 430 mov [EDI+24],EAX; 431 mov EAX,[EBX+28]; 432 sbb EAX,[ESI+28]; 433 mov [EDI+28],EAX; 434 sbb EAX,EAX; 435 neg EAX; 436 mov carry, EAX; 437 } 438 x[0 .. 8] = ret[0 .. 8]; 439 return carry; 440 441 } else { 442 x[0] = word_sub(x[0], y[0], &carry); 443 x[1] = word_sub(x[1], y[1], &carry); 444 x[2] = word_sub(x[2], y[2], &carry); 445 x[3] = word_sub(x[3], y[3], &carry); 446 x[4] = word_sub(x[4], y[4], &carry); 447 x[5] = word_sub(x[5], y[5], &carry); 448 x[6] = word_sub(x[6], y[6], &carry); 449 x[7] = word_sub(x[7], y[7], &carry); 450 return carry; 451 } 452 } 453 454 /* 455 * Eight Word Block Subtraction, Two Argument 456 */ 457 word word8_sub2_rev(ref word[8] x, const ref word[8] y, word carry) 458 { 459 x[0] = word_sub(y[0], x[0], &carry); 460 x[1] = word_sub(y[1], x[1], &carry); 461 x[2] = word_sub(y[2], x[2], &carry); 462 x[3] = word_sub(y[3], x[3], &carry); 463 x[4] = word_sub(y[4], x[4], &carry); 464 x[5] = word_sub(y[5], x[5], &carry); 465 x[6] = word_sub(y[6], x[6], &carry); 466 x[7] = word_sub(y[7], x[7], &carry); 467 return carry; 468 } 469 470 /* 471 * Eight Word Block Subtraction, Three Argument 472 */ 473 word word8_sub3(ref word[8] z, const ref word[8] x, const ref word[8] y, word carry) 474 { 475 version(D_InlineAsm_X86_64) { 476 word* _z = z.ptr; 477 clearMem(_z, z.length); 478 479 word* _x = cast(word*)x.ptr; 480 word* _y = cast(word*)y.ptr; 481 word* _carry = &carry; 482 asm pure nothrow @nogc { 483 mov RBX,_x; 484 mov RSI,_y; 485 mov RCX,_carry; 486 xor RAX,RAX; 487 sub RAX,[RCX]; //force CF=1 iff *carry==1 488 mov RDI,_z; 489 mov RAX,[RBX]; 490 sbb RAX,[RSI]; 491 mov [RDI],RAX; 492 mov RAX,[RBX+8]; 493 sbb RAX,[RSI+8]; 494 mov [RDI+8],RAX; 495 mov RAX,[RBX+16]; 496 sbb RAX,[RSI+16]; 497 mov [RDI+16],RAX; 498 mov RAX,[RBX+24]; 499 sbb RAX,[RSI+24]; 500 mov [RDI+24],RAX; 501 mov RAX,[RBX+32]; 502 sbb RAX,[RSI+32]; 503 mov [RDI+32],RAX; 504 mov RAX,[RBX+40]; 505 sbb RAX,[RSI+40]; 506 mov [RDI+40],RAX; 507 mov RAX,[RBX+48]; 508 sbb RAX,[RSI+48]; 509 mov [RDI+48],RAX; 510 mov RAX,[RBX+56]; 511 sbb RAX,[RSI+56]; 512 mov [RDI+56],RAX; 513 sbb RAX,RAX; 514 neg RAX; 515 mov carry, RAX; 516 } 517 return carry; 518 } else version (D_InlineAsm_X86) { 519 520 word* _z = z.ptr; 521 word* _x = cast(word*)x.ptr; 522 word* _y = cast(word*)y.ptr; 523 word* _carry = &carry; 524 asm { 525 mov EBX,_x; 526 mov ESI,_y; 527 mov ECX,_carry; 528 xor EAX,EAX; 529 sub EAX,[ECX]; //force CF=1 iff *carry==1 530 mov EDI,_z; 531 mov EAX,[EBX]; 532 sbb EAX,[ESI]; 533 mov [EDI],EAX; 534 mov EAX,[EBX+4]; 535 sbb EAX,[ESI+4]; 536 mov [EDI+4],EAX; 537 mov EAX,[EBX+8]; 538 sbb EAX,[ESI+8]; 539 mov [EDI+8],EAX; 540 mov EAX,[EBX+12]; 541 sbb EAX,[ESI+12]; 542 mov [EDI+12],EAX; 543 mov EAX,[EBX+16]; 544 sbb EAX,[ESI+16]; 545 mov [EDI+16],EAX; 546 mov EAX,[EBX+20]; 547 sbb EAX,[ESI+20]; 548 mov [EDI+20],EAX; 549 mov EAX,[EBX+24]; 550 sbb EAX,[ESI+24]; 551 mov [EDI+24],EAX; 552 mov EAX,[EBX+28]; 553 sbb EAX,[ESI+28]; 554 mov [EDI+28],EAX; 555 sbb EAX,EAX; 556 neg EAX; 557 mov carry, EAX; 558 } 559 return carry; 560 } 561 else { 562 z[0] = word_sub(x[0], y[0], &carry); 563 z[1] = word_sub(x[1], y[1], &carry); 564 z[2] = word_sub(x[2], y[2], &carry); 565 z[3] = word_sub(x[3], y[3], &carry); 566 z[4] = word_sub(x[4], y[4], &carry); 567 z[5] = word_sub(x[5], y[5], &carry); 568 z[6] = word_sub(x[6], y[6], &carry); 569 z[7] = word_sub(x[7], y[7], &carry); 570 return carry; 571 } 572 } 573 574 /* 575 * Eight Word Block Linear Multiplication 576 */ 577 word word8_linmul2(ref word[8] x, word y, word carry) 578 { 579 version(D_InlineAsm_X86_64) { 580 word* _x = x.ptr; 581 word[8] ret; 582 word* _z = ret.ptr; 583 word* _carry = &carry; 584 asm pure nothrow @nogc { 585 mov RSI, _x; 586 mov RDI, _z; 587 mov RDX, _carry; 588 mov RCX, [RDX]; 589 590 mov RAX, [RSI]; 591 mov RBX, y; 592 mul RBX; 593 add RAX, RCX; 594 adc RDX, 0; 595 mov RCX, RDX; 596 mov [RDI], RAX; 597 598 mov RAX, [RSI+8]; 599 mov RBX, y; 600 mul RBX; 601 add RAX, RCX; 602 adc RDX, 0; 603 mov RCX, RDX; 604 mov [RDI+8], RAX; 605 606 mov RAX, [RSI+16]; 607 mov RBX, y; 608 mul RBX; 609 add RAX, RCX; 610 adc RDX, 0; 611 mov RCX, RDX; 612 mov [RDI+16], RAX; 613 614 mov RAX, [RSI+24]; 615 mov RBX, y; 616 mul RBX; 617 add RAX, RCX; 618 adc RDX, 0; 619 mov RCX, RDX; 620 mov [RDI+24], RAX; 621 622 mov RAX, [RSI+32]; 623 mov RBX, y; 624 mul RBX; 625 add RAX, RCX; 626 adc RDX, 0; 627 mov RCX, RDX; 628 mov [RDI+32], RAX; 629 630 mov RAX, [RSI+40]; 631 mov RBX, y; 632 mul RBX; 633 add RAX, RCX; 634 adc RDX, 0; 635 mov RCX, RDX; 636 mov [RDI+40], RAX; 637 638 mov RAX, [RSI+48]; 639 mov RBX, y; 640 mul RBX; 641 add RAX, RCX; 642 adc RDX, 0; 643 mov RCX, RDX; 644 mov [RDI+48], RAX; 645 646 mov RAX, [RSI+56]; 647 mov RBX, y; 648 mul RBX; 649 add RAX, RCX; 650 adc RDX, 0; 651 mov carry, RDX; 652 mov [RDI+56], RAX; 653 } 654 x[0 .. 8] = ret[0 .. 8]; 655 return carry; 656 } 657 else { 658 x[0] = word_madd2(x[0], y, &carry); 659 x[1] = word_madd2(x[1], y, &carry); 660 x[2] = word_madd2(x[2], y, &carry); 661 x[3] = word_madd2(x[3], y, &carry); 662 x[4] = word_madd2(x[4], y, &carry); 663 x[5] = word_madd2(x[5], y, &carry); 664 x[6] = word_madd2(x[6], y, &carry); 665 x[7] = word_madd2(x[7], y, &carry); 666 return carry; 667 } 668 } 669 670 /* 671 * Eight Word Block Linear Multiplication 672 */ 673 word word8_linmul3(ref word[8] z, const ref word[8] x, word y, word carry) 674 { 675 676 version(D_InlineAsm_X86_64) { 677 word* _x = cast(word*)x.ptr; 678 word* _z = z.ptr; 679 word* _carry = &carry; 680 clearMem(_z, z.length); 681 asm pure nothrow @nogc { 682 mov RSI, _x; 683 mov RDI, _z; 684 mov RDX, _carry; 685 mov RCX, [RDX]; 686 687 mov RAX, [RSI]; 688 mov RBX, y; 689 mul RBX; 690 add RAX, RCX; 691 adc RDX, 0; 692 mov RCX, RDX; 693 mov [RDI], RAX; 694 695 mov RAX, [RSI+8]; 696 mov RBX, y; 697 mul RBX; 698 add RAX, RCX; 699 adc RDX, 0; 700 mov RCX, RDX; 701 mov [RDI+8], RAX; 702 703 mov RAX, [RSI+16]; 704 mov RBX, y; 705 mul RBX; 706 add RAX, RCX; 707 adc RDX, 0; 708 mov RCX, RDX; 709 mov [RDI+16], RAX; 710 711 mov RAX, [RSI+24]; 712 mov RBX, y; 713 mul RBX; 714 add RAX, RCX; 715 adc RDX, 0; 716 mov RCX, RDX; 717 mov [RDI+24], RAX; 718 719 mov RAX, [RSI+32]; 720 mov RBX, y; 721 mul RBX; 722 add RAX, RCX; 723 adc RDX, 0; 724 mov RCX, RDX; 725 mov [RDI+32], RAX; 726 727 mov RAX, [RSI+40]; 728 mov RBX, y; 729 mul RBX; 730 add RAX, RCX; 731 adc RDX, 0; 732 mov RCX, RDX; 733 mov [RDI+40], RAX; 734 735 mov RAX, [RSI+48]; 736 mov RBX, y; 737 mul RBX; 738 add RAX, RCX; 739 adc RDX, 0; 740 mov RCX, RDX; 741 mov [RDI+48], RAX; 742 743 mov RAX, [RSI+56]; 744 mov RBX, y; 745 mul RBX; 746 add RAX, RCX; 747 adc RDX, 0; 748 mov carry, RDX; 749 mov [RDI+56], RAX; 750 } 751 return carry; 752 } 753 else { 754 z[0] = word_madd2(x[0], y, &carry); 755 z[1] = word_madd2(x[1], y, &carry); 756 z[2] = word_madd2(x[2], y, &carry); 757 z[3] = word_madd2(x[3], y, &carry); 758 z[4] = word_madd2(x[4], y, &carry); 759 z[5] = word_madd2(x[5], y, &carry); 760 z[6] = word_madd2(x[6], y, &carry); 761 z[7] = word_madd2(x[7], y, &carry); 762 return carry; 763 } 764 } 765 766 /* 767 * Eight Word Block Multiply/Add 768 */ 769 word word8_madd3(ref word[8] z, const ref word[8] x, word y, word carry) 770 { 771 version(D_InlineAsm_X86_64) { 772 word* _x = cast(word*)x.ptr; 773 word* _z = z.ptr; 774 word* _carry = &carry; 775 word[8] ret; word* _z1 = ret.ptr; 776 asm pure nothrow @nogc { 777 mov R8, _x; 778 mov RSI, _z; 779 mov R10, y; 780 mov RDI, _z1; 781 mov RDX, _carry; 782 mov RCX, [RDX]; 783 784 mov RAX, [R8]; 785 mov RBX, R10; 786 mul RBX; 787 add RAX, [RSI]; 788 adc RDX, 0; 789 add RAX, RCX; 790 adc RDX, 0; 791 mov RCX, RDX; 792 mov [RDI], RAX; 793 add R8, 8; 794 795 mov RAX, [R8]; 796 mov RBX, R10; 797 mul RBX; 798 add RAX, [RSI+8]; 799 adc RDX, 0; 800 add RAX, RCX; 801 adc RDX, 0; 802 mov RCX, RDX; 803 mov [RDI+8], RAX; 804 add R8, 8; 805 806 mov RAX, [R8]; 807 mov RBX, R10; 808 mul RBX; 809 add RAX, [RSI+16]; 810 adc RDX, 0; 811 add RAX, RCX; 812 adc RDX, 0; 813 mov RCX, RDX; 814 mov [RDI+16], RAX; 815 add R8, 8; 816 817 mov RAX, [R8]; 818 mov RBX, R10; 819 mul RBX; 820 add RAX, [RSI+24]; 821 adc RDX, 0; 822 add RAX, RCX; 823 adc RDX, 0; 824 mov RCX, RDX; 825 mov [RDI+24], RAX; 826 add R8, 8; 827 828 mov RAX, [R8]; 829 mov RBX, R10; 830 mul RBX; 831 add RAX, [RSI+32]; 832 adc RDX, 0; 833 add RAX, RCX; 834 adc RDX, 0; 835 mov RCX, RDX; 836 mov [RDI+32], RAX; 837 add R8, 8; 838 839 mov RAX, [R8]; 840 mov RBX, R10; 841 mul RBX; 842 add RAX, [RSI+40]; 843 adc RDX, 0; 844 add RAX, RCX; 845 adc RDX, 0; 846 mov RCX, RDX; 847 mov [RDI+40], RAX; 848 add R8, 8; 849 850 mov RAX, [R8]; 851 mov RBX, R10; 852 mul RBX; 853 add RAX, [RSI+48]; 854 adc RDX, 0; 855 add RAX, RCX; 856 adc RDX, 0; 857 mov RCX, RDX; 858 mov [RDI+48], RAX; 859 add R8, 8; 860 861 mov RAX, [R8]; 862 mov RBX, R10; 863 mul RBX; 864 add RAX, [RSI+56]; 865 adc RDX, 0; 866 add RAX, RCX; 867 adc RDX, 0; 868 mov carry, RDX; 869 mov [RDI+56], RAX; 870 } 871 z[0 .. 8] = ret[0..8]; 872 return carry; 873 } else { 874 z[0] = word_madd3(x[0], y, z[0], &carry); 875 z[1] = word_madd3(x[1], y, z[1], &carry); 876 z[2] = word_madd3(x[2], y, z[2], &carry); 877 z[3] = word_madd3(x[3], y, z[3], &carry); 878 z[4] = word_madd3(x[4], y, z[4], &carry); 879 z[5] = word_madd3(x[5], y, z[5], &carry); 880 z[6] = word_madd3(x[6], y, z[6], &carry); 881 z[7] = word_madd3(x[7], y, z[7], &carry); 882 return carry; 883 } 884 } 885 886 /* 887 * Multiply-Add Accumulator 888 */ 889 void word3_muladd(word* w2, word* w1, word* w0, word a, word b) 890 { 891 version (D_InlineAsm_X86_64) { 892 893 word* _b = &b; 894 word* _a = &a; 895 asm pure nothrow @nogc { 896 mov R13, w0; 897 mov R14, w1; 898 mov R15, w2; 899 mov R8, _a; 900 mov R9, _b; 901 mov RAX, [R8]; 902 mov RBX, [R9]; 903 mul RBX; 904 905 add [R13], RAX; 906 adc [R14], RDX; 907 adc [R15], 0; 908 909 } 910 } else { 911 word carry = *w0; 912 *w0 = word_madd2(a, b, &carry); 913 *w1 += carry; 914 *w2 += (*w1 < carry) ? 1 : 0; 915 } 916 } 917 918 /* 919 * Multiply-Add Accumulator 920 */ 921 void word3_muladd_2(word* w2, word* w1, word* w0, word a, word b) 922 { 923 version(D_InlineAsm_X86_64) { 924 word* _a = &a; 925 word* _b = &b; 926 927 asm pure nothrow @nogc { 928 mov R13, w0; 929 mov R14, w1; 930 mov R15, w2; 931 mov R8, _a; 932 mov R9, _b; 933 934 mov RAX, [R8]; 935 mov RBX, [R9]; 936 mul RBX; 937 938 add [R13], RAX; 939 adc [R14], RDX; 940 adc [R15], 0; 941 942 add [R13], RAX; 943 adc [R14], RDX; 944 adc [R15], 0; 945 } 946 } 947 else { 948 word carry = 0; 949 a = word_madd2(a, b, &carry); 950 b = carry; 951 952 word top = (b >> (BOTAN_MP_WORD_BITS-1)); 953 b <<= 1; 954 b |= (a >> (BOTAN_MP_WORD_BITS-1)); 955 a <<= 1; 956 957 carry = 0; 958 *w0 = word_add(*w0, a, &carry); 959 *w1 = word_add(*w1, b, &carry); 960 *w2 = word_add(*w2, top, &carry); 961 } 962 }