1 /** 2 * Comba Multiplication / Squaring 3 * 4 * Copyright: 5 * (C) 1999-2010,2014 Jack Lloyd 6 * (C) 2014-2015 Etienne Cimon 7 * 2006 Luca Piccarreta 8 * 9 * License: 10 * Botan is released under the Simplified BSD License (see LICENSE.md) 11 */ 12 module botan_math.mp_comba; 13 14 import botan_math.mp_word; 15 /* 16 * Comba 4x4 Squaring 17 */ 18 void bigint_comba_sqr4(ref word[8] z, const ref word[4] x) 19 { 20 word w2 = 0, w1 = 0, w0 = 0; 21 22 word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]); 23 z[ 0] = w0; w0 = 0; 24 25 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); 26 z[ 1] = w1; w1 = 0; 27 28 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); 29 word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]); 30 z[ 2] = w2; w2 = 0; 31 32 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); 33 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); 34 z[ 3] = w0; w0 = 0; 35 36 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); 37 word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]); 38 39 z[ 4] = w1; w1 = 0; 40 41 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); 42 z[ 5] = w2; w2 = 0; 43 44 word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]); 45 z[ 6] = w0; 46 z[ 7] = w1; 47 } 48 49 /* 50 * Comba 4x4 Multiplication 51 */ 52 void bigint_comba_mul4(ref word[8] z, const ref word[4] x, const ref word[4] y) 53 { 54 version(D_InlineAsm_X86_64) { 55 import botan_math.x86_64.mp_comba_mul; 56 mixin(mp_bigint_comba_mul!4); 57 } 58 else 59 { 60 word w2 = 0, w1 = 0, w0 = 0; 61 word carry; 62 { 63 carry = w0; 64 w0 = word_madd2(x[0], y[0], &carry); 65 w1 += carry; 66 w2 += (w1 < carry) ? 1 : 0; 67 } 68 z[ 0] = w0; w0 = 0; 69 70 { //2 71 carry = w1; 72 w1 = word_madd2(x[0], y[1], &carry); 73 w2 += carry; 74 w0 += (w2 < carry) ? 1 : 0; 75 } 76 { 77 carry = w1; 78 w1 = word_madd2(x[1], y[0], &carry); 79 w2 += carry; 80 w0 += (w2 < carry) ? 1 : 0; 81 } 82 z[ 1] = w1; w1 = 0; 83 84 { //4 85 carry = w2; 86 w2 = word_madd2(x[0], y[2], &carry); 87 w0 += carry; 88 w1 += (w0 < carry) ? 1 : 0; 89 } 90 { //5 91 carry = w2; 92 w2 = word_madd2(x[1], y[1], &carry); 93 w0 += carry; 94 w1 += (w0 < carry) ? 1 : 0; 95 } 96 { //6 97 carry = w2; 98 w2 = word_madd2(x[2], y[0], &carry); 99 w0 += carry; 100 w1 += (w0 < carry) ? 1 : 0; 101 } 102 z[ 2] = w2; w2 = 0; 103 104 { //7 105 carry = w0; 106 w0 = word_madd2(x[0], y[3], &carry); 107 w1 += carry; 108 w2 += (w1 < carry) ? 1 : 0; 109 } 110 {//8 111 carry = w0; 112 w0 = word_madd2(x[1], y[2], &carry); 113 w1 += carry; 114 w2 += (w1 < carry) ? 1 : 0; 115 } 116 {//9 117 carry = w0; 118 w0 = word_madd2(x[2], y[1], &carry); 119 w1 += carry; 120 w2 += (w1 < carry) ? 1 : 0; 121 } 122 {//10 123 carry = w0; 124 w0 = word_madd2(x[3], y[0], &carry); 125 w1 += carry; 126 w2 += (w1 < carry) ? 1 : 0; 127 } 128 z[ 3] = w0; w0 = 0; 129 130 {//11 131 carry = w1; 132 w1 = word_madd2(x[1], y[3], &carry); 133 w2 += carry; 134 w0 += (w2 < carry) ? 1 : 0; 135 } 136 {//12 137 carry = w1; 138 w1 = word_madd2(x[2], y[2], &carry); 139 w2 += carry; 140 w0 += (w2 < carry) ? 1 : 0; 141 } 142 {//13 143 carry = w1; 144 w1 = word_madd2(x[3], y[1], &carry); 145 w2 += carry; 146 w0 += (w2 < carry) ? 1 : 0; 147 } 148 z[ 4] = w1; w1 = 0; 149 150 {//14 151 carry = w2; 152 w2 = word_madd2(x[2], y[3], &carry); 153 w0 += carry; 154 w1 += (w0 < carry) ? 1 : 0; 155 } 156 {//15 157 carry = w2; 158 w2 = word_madd2(x[3], y[2], &carry); 159 w0 += carry; 160 w1 += (w0 < carry) ? 1 : 0; 161 } 162 z[ 5] = w2; w2 = 0; 163 164 {//16 165 carry = w0; 166 w0 = word_madd2(x[3], y[3], &carry); 167 w1 += carry; 168 w2 += (w1 < carry) ? 1 : 0; 169 } 170 z[ 6] = w0; 171 z[ 7] = w1; 172 } 173 } 174 175 /* 176 * Comba 6x6 Squaring 177 */ 178 void bigint_comba_sqr6(ref word[12] z, const ref word[6] x) 179 { 180 word w2 = 0, w1 = 0, w0 = 0; 181 182 word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]); 183 z[ 0] = w0; w0 = 0; 184 185 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); 186 z[ 1] = w1; w1 = 0; 187 188 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); 189 word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]); 190 z[ 2] = w2; w2 = 0; 191 192 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); 193 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); 194 z[ 3] = w0; w0 = 0; 195 196 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]); 197 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); 198 word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]); 199 z[ 4] = w1; w1 = 0; 200 201 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]); 202 word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]); 203 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); 204 z[ 5] = w2; w2 = 0; 205 206 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); 207 word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); 208 word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]); 209 z[ 6] = w0; w0 = 0; 210 211 word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]); 212 word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]); 213 z[ 7] = w1; w1 = 0; 214 215 word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]); 216 word3_muladd(&w1, &w0, &w2, x[ 4], x[ 4]); 217 z[ 8] = w2; w2 = 0; 218 219 word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); 220 z[ 9] = w0; w0 = 0; 221 222 word3_muladd(&w0, &w2, &w1, x[ 5], x[ 5]); 223 z[10] = w1; 224 z[11] = w2; 225 } 226 227 /* 228 * Comba 6x6 Multiplication 229 */ 230 void bigint_comba_mul6(ref word[12] z, const ref word[6] x, const ref word[6] y) 231 { 232 version(D_InlineAsm_X86_64) { 233 234 import botan_math.x86_64.mp_comba_mul; 235 mixin(mp_bigint_comba_mul!6); 236 } else 237 { 238 239 word w2 = 0, w1 = 0, w0 = 0; 240 word carry; 241 { 242 carry = w0; 243 w0 = word_madd2(x[0], y[0], &carry); 244 w1 += carry; 245 w2 += (w1 < carry) ? 1 : 0; 246 } 247 z[ 0] = w0; w0 = 0; 248 249 { 250 carry = w1; 251 w1 = word_madd2(x[0], y[1], &carry); 252 w2 += carry; 253 w0 += (w2 < carry) ? 1 : 0; 254 } 255 { 256 carry = w1; 257 w1 = word_madd2(x[1], y[0], &carry); 258 w2 += carry; 259 w0 += (w2 < carry) ? 1 : 0; 260 } 261 z[ 1] = w1; w1 = 0; 262 263 { 264 carry = w2; 265 w2 = word_madd2(x[0], y[2], &carry); 266 w0 += carry; 267 w1 += (w0 < carry) ? 1 : 0; 268 } 269 { 270 carry = w2; 271 w2 = word_madd2(x[1], y[1], &carry); 272 w0 += carry; 273 w1 += (w0 < carry) ? 1 : 0; 274 } 275 { 276 carry = w2; 277 w2 = word_madd2(x[2], y[0], &carry); 278 w0 += carry; 279 w1 += (w0 < carry) ? 1 : 0; 280 } 281 z[ 2] = w2; w2 = 0; 282 283 { 284 carry = w0; 285 w0 = word_madd2(x[0], y[3], &carry); 286 w1 += carry; 287 w2 += (w1 < carry) ? 1 : 0; 288 } 289 { 290 carry = w0; 291 w0 = word_madd2(x[1], y[2], &carry); 292 w1 += carry; 293 w2 += (w1 < carry) ? 1 : 0; 294 } 295 { 296 carry = w0; 297 w0 = word_madd2(x[2], y[1], &carry); 298 w1 += carry; 299 w2 += (w1 < carry) ? 1 : 0; 300 } 301 { 302 carry = w0; 303 w0 = word_madd2(x[3], y[0], &carry); 304 w1 += carry; 305 w2 += (w1 < carry) ? 1 : 0; 306 } 307 z[ 3] = w0; w0 = 0; 308 309 { 310 carry = w1; 311 w1 = word_madd2(x[0], y[4], &carry); 312 w2 += carry; 313 w0 += (w2 < carry) ? 1 : 0; 314 } 315 { 316 carry = w1; 317 w1 = word_madd2(x[1], y[3], &carry); 318 w2 += carry; 319 w0 += (w2 < carry) ? 1 : 0; 320 } 321 { 322 carry = w1; 323 w1 = word_madd2(x[2], y[2], &carry); 324 w2 += carry; 325 w0 += (w2 < carry) ? 1 : 0; 326 } 327 { 328 carry = w1; 329 w1 = word_madd2(x[3], y[1], &carry); 330 w2 += carry; 331 w0 += (w2 < carry) ? 1 : 0; 332 } 333 { 334 carry = w1; 335 w1 = word_madd2(x[4], y[0], &carry); 336 w2 += carry; 337 w0 += (w2 < carry) ? 1 : 0; 338 } 339 z[ 4] = w1; w1 = 0; 340 341 { 342 carry = w2; 343 w2 = word_madd2(x[0], y[5], &carry); 344 w0 += carry; 345 w1 += (w0 < carry) ? 1 : 0; 346 } 347 { 348 carry = w2; 349 w2 = word_madd2(x[1], y[4], &carry); 350 w0 += carry; 351 w1 += (w0 < carry) ? 1 : 0; 352 } 353 { 354 carry = w2; 355 w2 = word_madd2(x[2], y[3], &carry); 356 w0 += carry; 357 w1 += (w0 < carry) ? 1 : 0; 358 } 359 { 360 carry = w2; 361 w2 = word_madd2(x[3], y[2], &carry); 362 w0 += carry; 363 w1 += (w0 < carry) ? 1 : 0; 364 } 365 { 366 carry = w2; 367 w2 = word_madd2(x[4], y[1], &carry); 368 w0 += carry; 369 w1 += (w0 < carry) ? 1 : 0; 370 } 371 { 372 carry = w2; 373 w2 = word_madd2(x[5], y[0], &carry); 374 w0 += carry; 375 w1 += (w0 < carry) ? 1 : 0; 376 } 377 z[ 5] = w2; w2 = 0; 378 379 { 380 carry = w0; 381 w0 = word_madd2(x[1], y[5], &carry); 382 w1 += carry; 383 w2 += (w1 < carry) ? 1 : 0; 384 } 385 { 386 carry = w0; 387 w0 = word_madd2(x[2], y[4], &carry); 388 w1 += carry; 389 w2 += (w1 < carry) ? 1 : 0; 390 } 391 { 392 carry = w0; 393 w0 = word_madd2(x[3], y[3], &carry); 394 w1 += carry; 395 w2 += (w1 < carry) ? 1 : 0; 396 } 397 { 398 carry = w0; 399 w0 = word_madd2(x[4], y[2], &carry); 400 w1 += carry; 401 w2 += (w1 < carry) ? 1 : 0; 402 } 403 { 404 carry = w0; 405 w0 = word_madd2(x[5], y[1], &carry); 406 w1 += carry; 407 w2 += (w1 < carry) ? 1 : 0; 408 } 409 z[ 6] = w0; w0 = 0; 410 411 { 412 carry = w1; 413 w1 = word_madd2(x[2], y[5], &carry); 414 w2 += carry; 415 w0 += (w2 < carry) ? 1 : 0; 416 } 417 { 418 carry = w1; 419 w1 = word_madd2(x[3], y[4], &carry); 420 w2 += carry; 421 w0 += (w2 < carry) ? 1 : 0; 422 } 423 { 424 carry = w1; 425 w1 = word_madd2(x[4], y[3], &carry); 426 w2 += carry; 427 w0 += (w2 < carry) ? 1 : 0; 428 } 429 { 430 carry = w1; 431 w1 = word_madd2(x[5], y[2], &carry); 432 w2 += carry; 433 w0 += (w2 < carry) ? 1 : 0; 434 } 435 z[ 7] = w1; w1 = 0; 436 437 { 438 carry = w2; 439 w2 = word_madd2(x[3], y[5], &carry); 440 w0 += carry; 441 w1 += (w0 < carry) ? 1 : 0; 442 } 443 { 444 carry = w2; 445 w2 = word_madd2(x[4], y[4], &carry); 446 w0 += carry; 447 w1 += (w0 < carry) ? 1 : 0; 448 } 449 { 450 carry = w2; 451 w2 = word_madd2(x[5], y[3], &carry); 452 w0 += carry; 453 w1 += (w0 < carry) ? 1 : 0; 454 } 455 z[ 8] = w2; w2 = 0; 456 457 { 458 carry = w0; 459 w0 = word_madd2(x[4], y[5], &carry); 460 w1 += carry; 461 w2 += (w1 < carry) ? 1 : 0; 462 } 463 { 464 carry = w0; 465 w0 = word_madd2(x[5], y[4], &carry); 466 w1 += carry; 467 w2 += (w1 < carry) ? 1 : 0; 468 } 469 z[ 9] = w0; w0 = 0; 470 471 { 472 carry = w1; 473 w1 = word_madd2(x[5], y[5], &carry); 474 w2 += carry; 475 w0 += (w2 < carry) ? 1 : 0; 476 } 477 z[10] = w1; 478 z[11] = w2; 479 } 480 } 481 482 /* 483 * Comba 8x8 Squaring 484 */ 485 void bigint_comba_sqr8(ref word[16] z, const ref word[8] x) 486 { 487 word w2 = 0, w1 = 0, w0 = 0; 488 489 word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]); 490 z[ 0] = w0; w0 = 0; 491 492 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); 493 z[ 1] = w1; w1 = 0; 494 495 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); 496 word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]); 497 z[ 2] = w2; w2 = 0; 498 499 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); 500 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); 501 z[ 3] = w0; w0 = 0; 502 503 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]); 504 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); 505 word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]); 506 z[ 4] = w1; w1 = 0; 507 508 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]); 509 word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]); 510 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); 511 z[ 5] = w2; w2 = 0; 512 513 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); 514 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); 515 word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); 516 word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]); 517 z[ 6] = w0; w0 = 0; 518 519 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]); 520 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]); 521 word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]); 522 word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]); 523 z[ 7] = w1; w1 = 0; 524 525 word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]); 526 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]); 527 word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]); 528 word3_muladd(&w1, &w0, &w2, x[ 4], x[ 4]); 529 z[ 8] = w2; w2 = 0; 530 531 word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); 532 word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); 533 word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); 534 z[ 9] = w0; w0 = 0; 535 536 word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]); 537 word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]); 538 word3_muladd(&w0, &w2, &w1, x[ 5], x[ 5]); 539 z[10] = w1; w1 = 0; 540 541 word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]); 542 word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]); 543 z[11] = w2; w2 = 0; 544 545 word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); 546 word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]); 547 z[12] = w0; w0 = 0; 548 549 word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]); 550 z[13] = w1; w1 = 0; 551 552 word3_muladd(&w1, &w0, &w2, x[ 7], x[ 7]); 553 z[14] = w2; 554 z[15] = w0; 555 } 556 557 /* 558 * Comba 8x8 Multiplication 559 */ 560 void bigint_comba_mul8(ref word[16] z, const ref word[8] x, const ref word[8] y) 561 { 562 version(D_InlineAsm_X86_64) { 563 import botan_math.x86_64.mp_comba_mul; 564 mixin(mp_bigint_comba_mul!8); 565 } else 566 { 567 word w2 = 0, w1 = 0, w0 = 0; 568 size_t carry; 569 570 void word3_mulladd_021(size_t i, size_t j) { 571 carry = w1; 572 w1 = word_madd2(x.ptr[i], y.ptr[j], &carry); 573 w2 += carry; 574 w0 += (w2 < carry) ? 1 : 0; 575 } 576 577 void word3_mulladd_102(size_t i, size_t j) { 578 carry = w2; 579 w2 = word_madd2(x.ptr[i], y.ptr[j], &carry); 580 w0 += carry; 581 w1 += (w0 < carry) ? 1 : 0; 582 } 583 584 void word3_mulladd_210(size_t i, size_t j) { 585 carry = w0; 586 w0 = word_madd2(x.ptr[i], y.ptr[j], &carry); 587 w1 += carry; 588 w2 += (w1 < carry) ? 1 : 0; 589 } 590 591 592 word3_mulladd_210(0, 0); //1 593 594 z[ 0] = w0; w0 = 0; 595 596 word3_mulladd_021(0, 1); //2 597 word3_mulladd_021(1, 0); //3 598 z[ 1] = w1; w1 = 0; 599 600 word3_mulladd_102(0, 2); //4 601 word3_mulladd_102(1, 1); //5 602 word3_mulladd_102(2, 0); //6 603 z[ 2] = w2; w2 = 0; 604 605 word3_mulladd_210(0, 3); //7 606 word3_mulladd_210(1, 2); //8 607 word3_mulladd_210(2, 1); //9 608 word3_mulladd_210(3, 0); //10 609 z[ 3] = w0; w0 = 0; 610 611 word3_mulladd_021(0, 4); //11 612 word3_mulladd_021(1, 3); //12 613 word3_mulladd_021(2, 2); //13 614 word3_mulladd_021(3, 1); //14 615 word3_mulladd_021(4, 0); //15 616 z[ 4] = w1; w1 = 0; 617 618 word3_mulladd_102(0, 5); //16 619 word3_mulladd_102(1, 4); //17 620 word3_mulladd_102(2, 3); //18 621 word3_mulladd_102(3, 2); //19 622 word3_mulladd_102(4, 1); //20 623 word3_mulladd_102(5, 0); //21 624 z[ 5] = w2; w2 = 0; 625 626 word3_mulladd_210(0, 6); //22 627 word3_mulladd_210(1, 5); //23 628 word3_mulladd_210(2, 4); //24 629 word3_mulladd_210(3, 3); //25 630 word3_mulladd_210(4, 2); //26 631 word3_mulladd_210(5, 1); //27 632 word3_mulladd_210(6, 0); //28 633 z[ 6] = w0; w0 = 0; 634 635 word3_mulladd_021(0, 7); //29 636 word3_mulladd_021(1, 6); //30 637 word3_mulladd_021(2, 5); //31 638 word3_mulladd_021(3, 4); //32 639 word3_mulladd_021(4, 3); //33 640 word3_mulladd_021(5, 2); //34 641 word3_mulladd_021(6, 1); //35 642 word3_mulladd_021(7, 0); //36 643 z[ 7] = w1; w1 = 0; 644 645 word3_mulladd_102(1, 7); //37 646 word3_mulladd_102(2, 6); //38 647 word3_mulladd_102(3, 5); //39 648 word3_mulladd_102(4, 4); //40 649 word3_mulladd_102(5, 3); //41 650 word3_mulladd_102(6, 2); //42 651 word3_mulladd_102(7, 1); //43 652 z[ 8] = w2; w2 = 0; 653 654 word3_mulladd_210(2, 7); //44 655 word3_mulladd_210(3, 6); //45 656 word3_mulladd_210(4, 5); //46 657 word3_mulladd_210(5, 4); //47 658 word3_mulladd_210(6, 3); //48 659 word3_mulladd_210(7, 2); //49 660 z[ 9] = w0; w0 = 0; 661 662 word3_mulladd_021(3, 7); //50 663 word3_mulladd_021(4, 6); //51 664 word3_mulladd_021(5, 5); //52 665 word3_mulladd_021(6, 4); //53 666 word3_mulladd_021(7, 3); //54 667 z[10] = w1; w1 = 0; 668 669 word3_mulladd_102(4, 7); //55 670 word3_mulladd_102(5, 6); //56 671 word3_mulladd_102(6, 5); //57 672 word3_mulladd_102(7, 4); //58 673 z[11] = w2; w2 = 0; 674 675 word3_mulladd_210(5, 7); //59 676 word3_mulladd_210(6, 6); //60 677 word3_mulladd_210(7, 5); //61 678 z[12] = w0; w0 = 0; 679 680 word3_mulladd_021(6, 7); //62 681 word3_mulladd_021(7, 6); //63 682 z[13] = w1; w1 = 0; 683 684 word3_mulladd_102(7, 7); //64 685 z[14] = w2; 686 z[15] = w0; 687 } 688 } 689 690 /* 691 * Comba 9x9 Squaring 692 */ 693 void bigint_comba_sqr9(ref word[18] z, const ref word[9] x) 694 { 695 word w2 = 0, w1 = 0, w0 = 0; 696 697 word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]); 698 z[ 0] = w0; w0 = 0; 699 700 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); 701 z[ 1] = w1; w1 = 0; 702 703 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); 704 word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]); 705 z[ 2] = w2; w2 = 0; 706 707 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); 708 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); 709 z[ 3] = w0; w0 = 0; 710 711 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]); 712 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); 713 word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]); 714 z[ 4] = w1; w1 = 0; 715 716 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]); 717 word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]); 718 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); 719 z[ 5] = w2; w2 = 0; 720 721 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); 722 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); 723 word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); 724 word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]); 725 z[ 6] = w0; w0 = 0; 726 727 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]); 728 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]); 729 word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]); 730 word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]); 731 z[ 7] = w1; w1 = 0; 732 733 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 8]); 734 word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]); 735 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]); 736 word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]); 737 word3_muladd(&w1, &w0, &w2, x[ 4], x[ 4]); 738 z[ 8] = w2; w2 = 0; 739 740 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]); 741 word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); 742 word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); 743 word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); 744 z[ 9] = w0; w0 = 0; 745 746 word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 8]); 747 word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]); 748 word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]); 749 word3_muladd(&w0, &w2, &w1, x[ 5], x[ 5]); 750 z[10] = w1; w1 = 0; 751 752 word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 8]); 753 word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]); 754 word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]); 755 z[11] = w2; w2 = 0; 756 757 word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]); 758 word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); 759 word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]); 760 z[12] = w0; w0 = 0; 761 762 word3_muladd_2(&w0, &w2, &w1, x[ 5], x[ 8]); 763 word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]); 764 z[13] = w1; w1 = 0; 765 766 word3_muladd_2(&w1, &w0, &w2, x[ 6], x[ 8]); 767 word3_muladd(&w1, &w0, &w2, x[ 7], x[ 7]); 768 z[14] = w2; w2 = 0; 769 770 word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]); 771 z[15] = w0; w0 = 0; 772 773 word3_muladd(&w0, &w2, &w1, x[ 8], x[ 8]); 774 z[16] = w1; 775 z[17] = w2; 776 } 777 778 /* 779 * Comba 9x9 Multiplication 780 */ 781 void bigint_comba_mul9(ref word[18] z, const ref word[9] x, const ref word[9] y) 782 { 783 version(D_InlineAsm_X86_64) { 784 import botan_math.x86_64.mp_comba_mul; 785 mixin(mp_bigint_comba_mul!9); 786 } else { 787 word w2 = 0, w1 = 0, w0 = 0; 788 789 word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); //1 790 z[ 0] = w0; w0 = 0; 791 792 word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); //2 793 word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); //3 794 z[ 1] = w1; w1 = 0; 795 796 word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); //4 797 word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); //5 798 word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); //6 799 z[ 2] = w2; w2 = 0; 800 801 word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); //7 802 word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); //8 803 word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); //9 804 word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); //10 805 z[ 3] = w0; w0 = 0; 806 807 word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]); //11 808 word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); //12 809 word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); //13 810 word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); //14 811 word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]); //15 812 z[ 4] = w1; w1 = 0; 813 814 word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]); //16 815 word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]); //17 816 word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); //18 817 word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); //19 818 word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]); //20 819 word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]); //21 820 z[ 5] = w2; w2 = 0; 821 822 word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); //22 823 word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); //23 824 word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); //24 825 word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); //25 826 word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); //26 827 word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); //27 828 word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); //28 829 z[ 6] = w0; w0 = 0; 830 831 word3_muladd(&w0, &w2, &w1, x[ 0], y[ 7]); //29 832 word3_muladd(&w0, &w2, &w1, x[ 1], y[ 6]); //30 833 word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]); //31 834 word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]); //32 835 word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]); //33 836 word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]); //34 837 word3_muladd(&w0, &w2, &w1, x[ 6], y[ 1]); //35 838 word3_muladd(&w0, &w2, &w1, x[ 7], y[ 0]); //36 839 z[ 7] = w1; w1 = 0; 840 841 word3_muladd(&w1, &w0, &w2, x[ 0], y[ 8]); //37 842 word3_muladd(&w1, &w0, &w2, x[ 1], y[ 7]); //38 843 word3_muladd(&w1, &w0, &w2, x[ 2], y[ 6]); //39 844 word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]); //40 845 word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]); //41 846 word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]); //42 847 word3_muladd(&w1, &w0, &w2, x[ 6], y[ 2]); //43 848 word3_muladd(&w1, &w0, &w2, x[ 7], y[ 1]); //44 849 word3_muladd(&w1, &w0, &w2, x[ 8], y[ 0]); //45 850 z[ 8] = w2; w2 = 0; 851 852 word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); //46 853 word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); //47 854 word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); //48 855 word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); //49 856 word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); //50 857 word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); //51 858 word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); //52 859 word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); //53 860 z[ 9] = w0; w0 = 0; 861 862 word3_muladd(&w0, &w2, &w1, x[ 2], y[ 8]); //54 863 word3_muladd(&w0, &w2, &w1, x[ 3], y[ 7]); //55 864 word3_muladd(&w0, &w2, &w1, x[ 4], y[ 6]); //56 865 word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]); //57 866 word3_muladd(&w0, &w2, &w1, x[ 6], y[ 4]); //58 867 word3_muladd(&w0, &w2, &w1, x[ 7], y[ 3]); //59 868 word3_muladd(&w0, &w2, &w1, x[ 8], y[ 2]); //60 869 z[10] = w1; w1 = 0; 870 871 word3_muladd(&w1, &w0, &w2, x[ 3], y[ 8]); //61 872 word3_muladd(&w1, &w0, &w2, x[ 4], y[ 7]); //62 873 word3_muladd(&w1, &w0, &w2, x[ 5], y[ 6]); //63 874 word3_muladd(&w1, &w0, &w2, x[ 6], y[ 5]); //64 875 word3_muladd(&w1, &w0, &w2, x[ 7], y[ 4]); //65 876 word3_muladd(&w1, &w0, &w2, x[ 8], y[ 3]); //66 877 z[11] = w2; w2 = 0; 878 879 word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); //67 880 word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); //68 881 word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); //69 882 word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); //70 883 word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); //71 884 z[12] = w0; w0 = 0; 885 886 word3_muladd(&w0, &w2, &w1, x[ 5], y[ 8]); //72 887 word3_muladd(&w0, &w2, &w1, x[ 6], y[ 7]); //73 888 word3_muladd(&w0, &w2, &w1, x[ 7], y[ 6]); //74 889 word3_muladd(&w0, &w2, &w1, x[ 8], y[ 5]); //75 890 z[13] = w1; w1 = 0; 891 892 word3_muladd(&w1, &w0, &w2, x[ 6], y[ 8]); //76 893 word3_muladd(&w1, &w0, &w2, x[ 7], y[ 7]); //77 894 word3_muladd(&w1, &w0, &w2, x[ 8], y[ 6]); //78 895 z[14] = w2; w2 = 0; 896 897 word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); //79 898 word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); //80 899 z[15] = w0; w0 = 0; 900 901 word3_muladd(&w0, &w2, &w1, x[ 8], y[ 8]); 902 z[16] = w1; 903 z[17] = w2; 904 } 905 } 906 907 /* 908 * Comba 16x16 Squaring 909 */ 910 void bigint_comba_sqr16(ref word[32] z, const ref word[16] x) 911 { 912 word w2 = 0, w1 = 0, w0 = 0; 913 914 word3_muladd(&w2, &w1, &w0, x[ 0], x[ 0]); 915 z[ 0] = w0; w0 = 0; 916 917 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 1]); 918 z[ 1] = w1; w1 = 0; 919 920 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 2]); 921 word3_muladd(&w1, &w0, &w2, x[ 1], x[ 1]); 922 z[ 2] = w2; w2 = 0; 923 924 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 3]); 925 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 2]); 926 z[ 3] = w0; w0 = 0; 927 928 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 4]); 929 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 3]); 930 word3_muladd(&w0, &w2, &w1, x[ 2], x[ 2]); 931 z[ 4] = w1; w1 = 0; 932 933 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 5]); 934 word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 4]); 935 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 3]); 936 z[ 5] = w2; w2 = 0; 937 938 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 6]); 939 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 5]); 940 word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 4]); 941 word3_muladd(&w2, &w1, &w0, x[ 3], x[ 3]); 942 z[ 6] = w0; w0 = 0; 943 944 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[ 7]); 945 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 6]); 946 word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 5]); 947 word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 4]); 948 z[ 7] = w1; w1 = 0; 949 950 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[ 8]); 951 word3_muladd_2(&w1, &w0, &w2, x[ 1], x[ 7]); 952 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 6]); 953 word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 5]); 954 word3_muladd(&w1, &w0, &w2, x[ 4], x[ 4]); 955 z[ 8] = w2; w2 = 0; 956 957 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[ 9]); 958 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[ 8]); 959 word3_muladd_2(&w2, &w1, &w0, x[ 2], x[ 7]); 960 word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 6]); 961 word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 5]); 962 z[ 9] = w0; w0 = 0; 963 964 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[10]); 965 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[ 9]); 966 word3_muladd_2(&w0, &w2, &w1, x[ 2], x[ 8]); 967 word3_muladd_2(&w0, &w2, &w1, x[ 3], x[ 7]); 968 word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 6]); 969 word3_muladd(&w0, &w2, &w1, x[ 5], x[ 5]); 970 z[10] = w1; w1 = 0; 971 972 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[11]); 973 word3_muladd_2(&w1, &w0, &w2, x[ 1], x[10]); 974 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[ 9]); 975 word3_muladd_2(&w1, &w0, &w2, x[ 3], x[ 8]); 976 word3_muladd_2(&w1, &w0, &w2, x[ 4], x[ 7]); 977 word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 6]); 978 z[11] = w2; w2 = 0; 979 980 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[12]); 981 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[11]); 982 word3_muladd_2(&w2, &w1, &w0, x[ 2], x[10]); 983 word3_muladd_2(&w2, &w1, &w0, x[ 3], x[ 9]); 984 word3_muladd_2(&w2, &w1, &w0, x[ 4], x[ 8]); 985 word3_muladd_2(&w2, &w1, &w0, x[ 5], x[ 7]); 986 word3_muladd(&w2, &w1, &w0, x[ 6], x[ 6]); 987 z[12] = w0; w0 = 0; 988 989 word3_muladd_2(&w0, &w2, &w1, x[ 0], x[13]); 990 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[12]); 991 word3_muladd_2(&w0, &w2, &w1, x[ 2], x[11]); 992 word3_muladd_2(&w0, &w2, &w1, x[ 3], x[10]); 993 word3_muladd_2(&w0, &w2, &w1, x[ 4], x[ 9]); 994 word3_muladd_2(&w0, &w2, &w1, x[ 5], x[ 8]); 995 word3_muladd_2(&w0, &w2, &w1, x[ 6], x[ 7]); 996 z[13] = w1; w1 = 0; 997 998 word3_muladd_2(&w1, &w0, &w2, x[ 0], x[14]); 999 word3_muladd_2(&w1, &w0, &w2, x[ 1], x[13]); 1000 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[12]); 1001 word3_muladd_2(&w1, &w0, &w2, x[ 3], x[11]); 1002 word3_muladd_2(&w1, &w0, &w2, x[ 4], x[10]); 1003 word3_muladd_2(&w1, &w0, &w2, x[ 5], x[ 9]); 1004 word3_muladd_2(&w1, &w0, &w2, x[ 6], x[ 8]); 1005 word3_muladd(&w1, &w0, &w2, x[ 7], x[ 7]); 1006 z[14] = w2; w2 = 0; 1007 1008 word3_muladd_2(&w2, &w1, &w0, x[ 0], x[15]); 1009 word3_muladd_2(&w2, &w1, &w0, x[ 1], x[14]); 1010 word3_muladd_2(&w2, &w1, &w0, x[ 2], x[13]); 1011 word3_muladd_2(&w2, &w1, &w0, x[ 3], x[12]); 1012 word3_muladd_2(&w2, &w1, &w0, x[ 4], x[11]); 1013 word3_muladd_2(&w2, &w1, &w0, x[ 5], x[10]); 1014 word3_muladd_2(&w2, &w1, &w0, x[ 6], x[ 9]); 1015 word3_muladd_2(&w2, &w1, &w0, x[ 7], x[ 8]); 1016 z[15] = w0; w0 = 0; 1017 1018 word3_muladd_2(&w0, &w2, &w1, x[ 1], x[15]); 1019 word3_muladd_2(&w0, &w2, &w1, x[ 2], x[14]); 1020 word3_muladd_2(&w0, &w2, &w1, x[ 3], x[13]); 1021 word3_muladd_2(&w0, &w2, &w1, x[ 4], x[12]); 1022 word3_muladd_2(&w0, &w2, &w1, x[ 5], x[11]); 1023 word3_muladd_2(&w0, &w2, &w1, x[ 6], x[10]); 1024 word3_muladd_2(&w0, &w2, &w1, x[ 7], x[ 9]); 1025 word3_muladd(&w0, &w2, &w1, x[ 8], x[ 8]); 1026 z[16] = w1; w1 = 0; 1027 1028 word3_muladd_2(&w1, &w0, &w2, x[ 2], x[15]); 1029 word3_muladd_2(&w1, &w0, &w2, x[ 3], x[14]); 1030 word3_muladd_2(&w1, &w0, &w2, x[ 4], x[13]); 1031 word3_muladd_2(&w1, &w0, &w2, x[ 5], x[12]); 1032 word3_muladd_2(&w1, &w0, &w2, x[ 6], x[11]); 1033 word3_muladd_2(&w1, &w0, &w2, x[ 7], x[10]); 1034 word3_muladd_2(&w1, &w0, &w2, x[ 8], x[ 9]); 1035 z[17] = w2; w2 = 0; 1036 1037 word3_muladd_2(&w2, &w1, &w0, x[ 3], x[15]); 1038 word3_muladd_2(&w2, &w1, &w0, x[ 4], x[14]); 1039 word3_muladd_2(&w2, &w1, &w0, x[ 5], x[13]); 1040 word3_muladd_2(&w2, &w1, &w0, x[ 6], x[12]); 1041 word3_muladd_2(&w2, &w1, &w0, x[ 7], x[11]); 1042 word3_muladd_2(&w2, &w1, &w0, x[ 8], x[10]); 1043 word3_muladd(&w2, &w1, &w0, x[ 9], x[ 9]); 1044 z[18] = w0; w0 = 0; 1045 1046 word3_muladd_2(&w0, &w2, &w1, x[ 4], x[15]); 1047 word3_muladd_2(&w0, &w2, &w1, x[ 5], x[14]); 1048 word3_muladd_2(&w0, &w2, &w1, x[ 6], x[13]); 1049 word3_muladd_2(&w0, &w2, &w1, x[ 7], x[12]); 1050 word3_muladd_2(&w0, &w2, &w1, x[ 8], x[11]); 1051 word3_muladd_2(&w0, &w2, &w1, x[ 9], x[10]); 1052 z[19] = w1; w1 = 0; 1053 1054 word3_muladd_2(&w1, &w0, &w2, x[ 5], x[15]); 1055 word3_muladd_2(&w1, &w0, &w2, x[ 6], x[14]); 1056 word3_muladd_2(&w1, &w0, &w2, x[ 7], x[13]); 1057 word3_muladd_2(&w1, &w0, &w2, x[ 8], x[12]); 1058 word3_muladd_2(&w1, &w0, &w2, x[ 9], x[11]); 1059 word3_muladd(&w1, &w0, &w2, x[10], x[10]); 1060 z[20] = w2; w2 = 0; 1061 1062 word3_muladd_2(&w2, &w1, &w0, x[ 6], x[15]); 1063 word3_muladd_2(&w2, &w1, &w0, x[ 7], x[14]); 1064 word3_muladd_2(&w2, &w1, &w0, x[ 8], x[13]); 1065 word3_muladd_2(&w2, &w1, &w0, x[ 9], x[12]); 1066 word3_muladd_2(&w2, &w1, &w0, x[10], x[11]); 1067 z[21] = w0; w0 = 0; 1068 1069 word3_muladd_2(&w0, &w2, &w1, x[ 7], x[15]); 1070 word3_muladd_2(&w0, &w2, &w1, x[ 8], x[14]); 1071 word3_muladd_2(&w0, &w2, &w1, x[ 9], x[13]); 1072 word3_muladd_2(&w0, &w2, &w1, x[10], x[12]); 1073 word3_muladd(&w0, &w2, &w1, x[11], x[11]); 1074 z[22] = w1; w1 = 0; 1075 1076 word3_muladd_2(&w1, &w0, &w2, x[ 8], x[15]); 1077 word3_muladd_2(&w1, &w0, &w2, x[ 9], x[14]); 1078 word3_muladd_2(&w1, &w0, &w2, x[10], x[13]); 1079 word3_muladd_2(&w1, &w0, &w2, x[11], x[12]); 1080 z[23] = w2; w2 = 0; 1081 1082 word3_muladd_2(&w2, &w1, &w0, x[ 9], x[15]); 1083 word3_muladd_2(&w2, &w1, &w0, x[10], x[14]); 1084 word3_muladd_2(&w2, &w1, &w0, x[11], x[13]); 1085 word3_muladd(&w2, &w1, &w0, x[12], x[12]); 1086 z[24] = w0; w0 = 0; 1087 1088 word3_muladd_2(&w0, &w2, &w1, x[10], x[15]); 1089 word3_muladd_2(&w0, &w2, &w1, x[11], x[14]); 1090 word3_muladd_2(&w0, &w2, &w1, x[12], x[13]); 1091 z[25] = w1; w1 = 0; 1092 1093 word3_muladd_2(&w1, &w0, &w2, x[11], x[15]); 1094 word3_muladd_2(&w1, &w0, &w2, x[12], x[14]); 1095 word3_muladd(&w1, &w0, &w2, x[13], x[13]); 1096 z[26] = w2; w2 = 0; 1097 1098 word3_muladd_2(&w2, &w1, &w0, x[12], x[15]); 1099 word3_muladd_2(&w2, &w1, &w0, x[13], x[14]); 1100 z[27] = w0; w0 = 0; 1101 1102 word3_muladd_2(&w0, &w2, &w1, x[13], x[15]); 1103 word3_muladd(&w0, &w2, &w1, x[14], x[14]); 1104 z[28] = w1; w1 = 0; 1105 1106 word3_muladd_2(&w1, &w0, &w2, x[14], x[15]); 1107 z[29] = w2; w2 = 0; 1108 1109 word3_muladd(&w2, &w1, &w0, x[15], x[15]); 1110 z[30] = w0; 1111 z[31] = w1; 1112 } 1113 1114 /* 1115 * Comba 16x16 Multiplication 1116 */ 1117 void bigint_comba_mul16(ref word[32] z, const ref word[16] x, const ref word[16] y) 1118 { 1119 version(D_InlineAsm_X86_64) { 1120 import botan_math.x86_64.mp_comba_mul; 1121 mixin(mp_bigint_comba_mul!16); 1122 } 1123 else { 1124 word w2 = 0, w1 = 0, w0 = 0; 1125 1126 word3_muladd(&w2, &w1, &w0, x[ 0], y[ 0]); //1 1127 z[ 0] = w0; w0 = 0; 1128 1129 word3_muladd(&w0, &w2, &w1, x[ 0], y[ 1]); //2 1130 word3_muladd(&w0, &w2, &w1, x[ 1], y[ 0]); //3 1131 z[ 1] = w1; w1 = 0; 1132 1133 word3_muladd(&w1, &w0, &w2, x[ 0], y[ 2]); //4 1134 word3_muladd(&w1, &w0, &w2, x[ 1], y[ 1]); //5 1135 word3_muladd(&w1, &w0, &w2, x[ 2], y[ 0]); //6 1136 z[ 2] = w2; w2 = 0; 1137 1138 word3_muladd(&w2, &w1, &w0, x[ 0], y[ 3]); //7 1139 word3_muladd(&w2, &w1, &w0, x[ 1], y[ 2]); //8 1140 word3_muladd(&w2, &w1, &w0, x[ 2], y[ 1]); //9 1141 word3_muladd(&w2, &w1, &w0, x[ 3], y[ 0]); //10 1142 z[ 3] = w0; w0 = 0; 1143 1144 word3_muladd(&w0, &w2, &w1, x[ 0], y[ 4]); //11 1145 word3_muladd(&w0, &w2, &w1, x[ 1], y[ 3]); //12 1146 word3_muladd(&w0, &w2, &w1, x[ 2], y[ 2]); //13 1147 word3_muladd(&w0, &w2, &w1, x[ 3], y[ 1]); //14 1148 word3_muladd(&w0, &w2, &w1, x[ 4], y[ 0]); //15 1149 z[ 4] = w1; w1 = 0; 1150 1151 word3_muladd(&w1, &w0, &w2, x[ 0], y[ 5]); //16 1152 word3_muladd(&w1, &w0, &w2, x[ 1], y[ 4]); //17 1153 word3_muladd(&w1, &w0, &w2, x[ 2], y[ 3]); //18 1154 word3_muladd(&w1, &w0, &w2, x[ 3], y[ 2]); //19 1155 word3_muladd(&w1, &w0, &w2, x[ 4], y[ 1]); //20 1156 word3_muladd(&w1, &w0, &w2, x[ 5], y[ 0]); //21 1157 z[ 5] = w2; w2 = 0; 1158 1159 word3_muladd(&w2, &w1, &w0, x[ 0], y[ 6]); //22 1160 word3_muladd(&w2, &w1, &w0, x[ 1], y[ 5]); //23 1161 word3_muladd(&w2, &w1, &w0, x[ 2], y[ 4]); //24 1162 word3_muladd(&w2, &w1, &w0, x[ 3], y[ 3]); //25 1163 word3_muladd(&w2, &w1, &w0, x[ 4], y[ 2]); //26 1164 word3_muladd(&w2, &w1, &w0, x[ 5], y[ 1]); //27 1165 word3_muladd(&w2, &w1, &w0, x[ 6], y[ 0]); //28 1166 z[ 6] = w0; w0 = 0; 1167 1168 word3_muladd(&w0, &w2, &w1, x[ 0], y[ 7]); //29 1169 word3_muladd(&w0, &w2, &w1, x[ 1], y[ 6]); //30 1170 word3_muladd(&w0, &w2, &w1, x[ 2], y[ 5]); //31 1171 word3_muladd(&w0, &w2, &w1, x[ 3], y[ 4]); //32 1172 word3_muladd(&w0, &w2, &w1, x[ 4], y[ 3]); //33 1173 word3_muladd(&w0, &w2, &w1, x[ 5], y[ 2]); //34 1174 word3_muladd(&w0, &w2, &w1, x[ 6], y[ 1]); //35 1175 word3_muladd(&w0, &w2, &w1, x[ 7], y[ 0]); //36 1176 z[ 7] = w1; w1 = 0; 1177 1178 word3_muladd(&w1, &w0, &w2, x[ 0], y[ 8]); //37 1179 word3_muladd(&w1, &w0, &w2, x[ 1], y[ 7]); //38 1180 word3_muladd(&w1, &w0, &w2, x[ 2], y[ 6]); //39 1181 word3_muladd(&w1, &w0, &w2, x[ 3], y[ 5]); //40 1182 word3_muladd(&w1, &w0, &w2, x[ 4], y[ 4]); //41 1183 word3_muladd(&w1, &w0, &w2, x[ 5], y[ 3]); //42 1184 word3_muladd(&w1, &w0, &w2, x[ 6], y[ 2]); //43 1185 word3_muladd(&w1, &w0, &w2, x[ 7], y[ 1]); //44 1186 word3_muladd(&w1, &w0, &w2, x[ 8], y[ 0]); //45 1187 z[ 8] = w2; w2 = 0; 1188 1189 word3_muladd(&w2, &w1, &w0, x[ 0], y[ 9]); //46 1190 word3_muladd(&w2, &w1, &w0, x[ 1], y[ 8]); //47 1191 word3_muladd(&w2, &w1, &w0, x[ 2], y[ 7]); //48 1192 word3_muladd(&w2, &w1, &w0, x[ 3], y[ 6]); //49 1193 word3_muladd(&w2, &w1, &w0, x[ 4], y[ 5]); //50 1194 word3_muladd(&w2, &w1, &w0, x[ 5], y[ 4]); //51 1195 word3_muladd(&w2, &w1, &w0, x[ 6], y[ 3]); //52 1196 word3_muladd(&w2, &w1, &w0, x[ 7], y[ 2]); //53 1197 word3_muladd(&w2, &w1, &w0, x[ 8], y[ 1]); //54 1198 word3_muladd(&w2, &w1, &w0, x[ 9], y[ 0]); //55 1199 z[ 9] = w0; w0 = 0; 1200 1201 word3_muladd(&w0, &w2, &w1, x[ 0], y[10]); //56 1202 word3_muladd(&w0, &w2, &w1, x[ 1], y[ 9]); //57 1203 word3_muladd(&w0, &w2, &w1, x[ 2], y[ 8]); //58 1204 word3_muladd(&w0, &w2, &w1, x[ 3], y[ 7]); //59 1205 word3_muladd(&w0, &w2, &w1, x[ 4], y[ 6]); //60 1206 word3_muladd(&w0, &w2, &w1, x[ 5], y[ 5]); //61 1207 word3_muladd(&w0, &w2, &w1, x[ 6], y[ 4]); //62 1208 word3_muladd(&w0, &w2, &w1, x[ 7], y[ 3]); //63 1209 word3_muladd(&w0, &w2, &w1, x[ 8], y[ 2]); //64 1210 word3_muladd(&w0, &w2, &w1, x[ 9], y[ 1]); //65 1211 word3_muladd(&w0, &w2, &w1, x[10], y[ 0]); //66 1212 z[10] = w1; w1 = 0; 1213 1214 word3_muladd(&w1, &w0, &w2, x[ 0], y[11]); //67 1215 word3_muladd(&w1, &w0, &w2, x[ 1], y[10]); //68 1216 word3_muladd(&w1, &w0, &w2, x[ 2], y[ 9]); //69 1217 word3_muladd(&w1, &w0, &w2, x[ 3], y[ 8]); //70 1218 word3_muladd(&w1, &w0, &w2, x[ 4], y[ 7]); //71 1219 word3_muladd(&w1, &w0, &w2, x[ 5], y[ 6]); //72 1220 word3_muladd(&w1, &w0, &w2, x[ 6], y[ 5]); //73 1221 word3_muladd(&w1, &w0, &w2, x[ 7], y[ 4]); //74 1222 word3_muladd(&w1, &w0, &w2, x[ 8], y[ 3]); //75 1223 word3_muladd(&w1, &w0, &w2, x[ 9], y[ 2]); //76 1224 word3_muladd(&w1, &w0, &w2, x[10], y[ 1]); //77 1225 word3_muladd(&w1, &w0, &w2, x[11], y[ 0]); //78 1226 z[11] = w2; w2 = 0; 1227 1228 word3_muladd(&w2, &w1, &w0, x[ 0], y[12]); //79 1229 word3_muladd(&w2, &w1, &w0, x[ 1], y[11]); //80 1230 word3_muladd(&w2, &w1, &w0, x[ 2], y[10]); //81 1231 word3_muladd(&w2, &w1, &w0, x[ 3], y[ 9]); //82 1232 word3_muladd(&w2, &w1, &w0, x[ 4], y[ 8]); //83 1233 word3_muladd(&w2, &w1, &w0, x[ 5], y[ 7]); //84 1234 word3_muladd(&w2, &w1, &w0, x[ 6], y[ 6]); //85 1235 word3_muladd(&w2, &w1, &w0, x[ 7], y[ 5]); //86 1236 word3_muladd(&w2, &w1, &w0, x[ 8], y[ 4]); //87 1237 word3_muladd(&w2, &w1, &w0, x[ 9], y[ 3]); //88 1238 word3_muladd(&w2, &w1, &w0, x[10], y[ 2]); //89 1239 word3_muladd(&w2, &w1, &w0, x[11], y[ 1]); //90 1240 word3_muladd(&w2, &w1, &w0, x[12], y[ 0]); //91 1241 z[12] = w0; w0 = 0; 1242 1243 word3_muladd(&w0, &w2, &w1, x[ 0], y[13]); //92 1244 word3_muladd(&w0, &w2, &w1, x[ 1], y[12]); //93 1245 word3_muladd(&w0, &w2, &w1, x[ 2], y[11]); //94 1246 word3_muladd(&w0, &w2, &w1, x[ 3], y[10]); //95 1247 word3_muladd(&w0, &w2, &w1, x[ 4], y[ 9]); //96 1248 word3_muladd(&w0, &w2, &w1, x[ 5], y[ 8]); //97 1249 word3_muladd(&w0, &w2, &w1, x[ 6], y[ 7]); //98 1250 word3_muladd(&w0, &w2, &w1, x[ 7], y[ 6]); //99 1251 word3_muladd(&w0, &w2, &w1, x[ 8], y[ 5]); //100 1252 word3_muladd(&w0, &w2, &w1, x[ 9], y[ 4]); //101 1253 word3_muladd(&w0, &w2, &w1, x[10], y[ 3]); //102 1254 word3_muladd(&w0, &w2, &w1, x[11], y[ 2]); //103 1255 word3_muladd(&w0, &w2, &w1, x[12], y[ 1]); //104 1256 word3_muladd(&w0, &w2, &w1, x[13], y[ 0]); //105 1257 z[13] = w1; w1 = 0; 1258 1259 word3_muladd(&w1, &w0, &w2, x[ 0], y[14]); //106 1260 word3_muladd(&w1, &w0, &w2, x[ 1], y[13]); //107 1261 word3_muladd(&w1, &w0, &w2, x[ 2], y[12]); //108 1262 word3_muladd(&w1, &w0, &w2, x[ 3], y[11]); //109 1263 word3_muladd(&w1, &w0, &w2, x[ 4], y[10]); //110 1264 word3_muladd(&w1, &w0, &w2, x[ 5], y[ 9]); //111 1265 word3_muladd(&w1, &w0, &w2, x[ 6], y[ 8]); //112 1266 word3_muladd(&w1, &w0, &w2, x[ 7], y[ 7]); //113 1267 word3_muladd(&w1, &w0, &w2, x[ 8], y[ 6]); //114 1268 word3_muladd(&w1, &w0, &w2, x[ 9], y[ 5]); //115 1269 word3_muladd(&w1, &w0, &w2, x[10], y[ 4]); //116 1270 word3_muladd(&w1, &w0, &w2, x[11], y[ 3]); //117 1271 word3_muladd(&w1, &w0, &w2, x[12], y[ 2]); //118 1272 word3_muladd(&w1, &w0, &w2, x[13], y[ 1]); //119 1273 word3_muladd(&w1, &w0, &w2, x[14], y[ 0]); //120 1274 z[14] = w2; w2 = 0; 1275 1276 word3_muladd(&w2, &w1, &w0, x[ 0], y[15]); //121 1277 word3_muladd(&w2, &w1, &w0, x[ 1], y[14]); //122 1278 word3_muladd(&w2, &w1, &w0, x[ 2], y[13]); //123 1279 word3_muladd(&w2, &w1, &w0, x[ 3], y[12]); //124 1280 word3_muladd(&w2, &w1, &w0, x[ 4], y[11]); //125 1281 word3_muladd(&w2, &w1, &w0, x[ 5], y[10]); //126 1282 word3_muladd(&w2, &w1, &w0, x[ 6], y[ 9]); //127 1283 word3_muladd(&w2, &w1, &w0, x[ 7], y[ 8]); //128 1284 word3_muladd(&w2, &w1, &w0, x[ 8], y[ 7]); //129 1285 word3_muladd(&w2, &w1, &w0, x[ 9], y[ 6]); //130 1286 word3_muladd(&w2, &w1, &w0, x[10], y[ 5]); //131 1287 word3_muladd(&w2, &w1, &w0, x[11], y[ 4]); //132 1288 word3_muladd(&w2, &w1, &w0, x[12], y[ 3]); //133 1289 word3_muladd(&w2, &w1, &w0, x[13], y[ 2]); //134 1290 word3_muladd(&w2, &w1, &w0, x[14], y[ 1]); //135 1291 word3_muladd(&w2, &w1, &w0, x[15], y[ 0]); //136 1292 z[15] = w0; w0 = 0; 1293 1294 word3_muladd(&w0, &w2, &w1, x[ 1], y[15]); //137 1295 word3_muladd(&w0, &w2, &w1, x[ 2], y[14]); //138 1296 word3_muladd(&w0, &w2, &w1, x[ 3], y[13]); //139 1297 word3_muladd(&w0, &w2, &w1, x[ 4], y[12]); //140 1298 word3_muladd(&w0, &w2, &w1, x[ 5], y[11]); //141 1299 word3_muladd(&w0, &w2, &w1, x[ 6], y[10]); //142 1300 word3_muladd(&w0, &w2, &w1, x[ 7], y[ 9]); //143 1301 word3_muladd(&w0, &w2, &w1, x[ 8], y[ 8]); //144 1302 word3_muladd(&w0, &w2, &w1, x[ 9], y[ 7]); //145 1303 word3_muladd(&w0, &w2, &w1, x[10], y[ 6]); //146 1304 word3_muladd(&w0, &w2, &w1, x[11], y[ 5]); //147 1305 word3_muladd(&w0, &w2, &w1, x[12], y[ 4]); //148 1306 word3_muladd(&w0, &w2, &w1, x[13], y[ 3]); //149 1307 word3_muladd(&w0, &w2, &w1, x[14], y[ 2]); //150 1308 word3_muladd(&w0, &w2, &w1, x[15], y[ 1]); //151 1309 z[16] = w1; w1 = 0; 1310 1311 word3_muladd(&w1, &w0, &w2, x[ 2], y[15]); //152 1312 word3_muladd(&w1, &w0, &w2, x[ 3], y[14]); //153 1313 word3_muladd(&w1, &w0, &w2, x[ 4], y[13]); //154 1314 word3_muladd(&w1, &w0, &w2, x[ 5], y[12]); //155 1315 word3_muladd(&w1, &w0, &w2, x[ 6], y[11]); //156 1316 word3_muladd(&w1, &w0, &w2, x[ 7], y[10]); //157 1317 word3_muladd(&w1, &w0, &w2, x[ 8], y[ 9]); //158 1318 word3_muladd(&w1, &w0, &w2, x[ 9], y[ 8]); //159 1319 word3_muladd(&w1, &w0, &w2, x[10], y[ 7]); //160 1320 word3_muladd(&w1, &w0, &w2, x[11], y[ 6]); //161 1321 word3_muladd(&w1, &w0, &w2, x[12], y[ 5]); //162 1322 word3_muladd(&w1, &w0, &w2, x[13], y[ 4]); //163 1323 word3_muladd(&w1, &w0, &w2, x[14], y[ 3]); //164 1324 word3_muladd(&w1, &w0, &w2, x[15], y[ 2]); //165 1325 z[17] = w2; w2 = 0; 1326 1327 word3_muladd(&w2, &w1, &w0, x[ 3], y[15]); //166 1328 word3_muladd(&w2, &w1, &w0, x[ 4], y[14]); //167 1329 word3_muladd(&w2, &w1, &w0, x[ 5], y[13]); //168 1330 word3_muladd(&w2, &w1, &w0, x[ 6], y[12]); //169 1331 word3_muladd(&w2, &w1, &w0, x[ 7], y[11]); //170 1332 word3_muladd(&w2, &w1, &w0, x[ 8], y[10]); //171 1333 word3_muladd(&w2, &w1, &w0, x[ 9], y[ 9]); //172 1334 word3_muladd(&w2, &w1, &w0, x[10], y[ 8]); //173 1335 word3_muladd(&w2, &w1, &w0, x[11], y[ 7]); //174 1336 word3_muladd(&w2, &w1, &w0, x[12], y[ 6]); //175 1337 word3_muladd(&w2, &w1, &w0, x[13], y[ 5]); //176 1338 word3_muladd(&w2, &w1, &w0, x[14], y[ 4]); //177 1339 word3_muladd(&w2, &w1, &w0, x[15], y[ 3]); //178 1340 z[18] = w0; w0 = 0; 1341 1342 word3_muladd(&w0, &w2, &w1, x[ 4], y[15]); //179 1343 word3_muladd(&w0, &w2, &w1, x[ 5], y[14]); //180 1344 word3_muladd(&w0, &w2, &w1, x[ 6], y[13]); //181 1345 word3_muladd(&w0, &w2, &w1, x[ 7], y[12]); //182 1346 word3_muladd(&w0, &w2, &w1, x[ 8], y[11]); //183 1347 word3_muladd(&w0, &w2, &w1, x[ 9], y[10]); //184 1348 word3_muladd(&w0, &w2, &w1, x[10], y[ 9]); //185 1349 word3_muladd(&w0, &w2, &w1, x[11], y[ 8]); //186 1350 word3_muladd(&w0, &w2, &w1, x[12], y[ 7]); //187 1351 word3_muladd(&w0, &w2, &w1, x[13], y[ 6]); //188 1352 word3_muladd(&w0, &w2, &w1, x[14], y[ 5]); //189 1353 word3_muladd(&w0, &w2, &w1, x[15], y[ 4]); //190 1354 z[19] = w1; w1 = 0; 1355 1356 word3_muladd(&w1, &w0, &w2, x[ 5], y[15]); //191 1357 word3_muladd(&w1, &w0, &w2, x[ 6], y[14]); //192 1358 word3_muladd(&w1, &w0, &w2, x[ 7], y[13]); //193 1359 word3_muladd(&w1, &w0, &w2, x[ 8], y[12]); //194 1360 word3_muladd(&w1, &w0, &w2, x[ 9], y[11]); //195 1361 word3_muladd(&w1, &w0, &w2, x[10], y[10]); //196 1362 word3_muladd(&w1, &w0, &w2, x[11], y[ 9]); //197 1363 word3_muladd(&w1, &w0, &w2, x[12], y[ 8]); //198 1364 word3_muladd(&w1, &w0, &w2, x[13], y[ 7]); //199 1365 word3_muladd(&w1, &w0, &w2, x[14], y[ 6]); //200 1366 word3_muladd(&w1, &w0, &w2, x[15], y[ 5]); //201 1367 z[20] = w2; w2 = 0; 1368 1369 word3_muladd(&w2, &w1, &w0, x[ 6], y[15]); //202 1370 word3_muladd(&w2, &w1, &w0, x[ 7], y[14]); //203 1371 word3_muladd(&w2, &w1, &w0, x[ 8], y[13]); //204 1372 word3_muladd(&w2, &w1, &w0, x[ 9], y[12]); //205 1373 word3_muladd(&w2, &w1, &w0, x[10], y[11]); //206 1374 word3_muladd(&w2, &w1, &w0, x[11], y[10]); //207 1375 word3_muladd(&w2, &w1, &w0, x[12], y[ 9]); //208 1376 word3_muladd(&w2, &w1, &w0, x[13], y[ 8]); //209 1377 word3_muladd(&w2, &w1, &w0, x[14], y[ 7]); //210 1378 word3_muladd(&w2, &w1, &w0, x[15], y[ 6]); //211 1379 z[21] = w0; w0 = 0; 1380 1381 word3_muladd(&w0, &w2, &w1, x[ 7], y[15]); //212 1382 word3_muladd(&w0, &w2, &w1, x[ 8], y[14]); //213 1383 word3_muladd(&w0, &w2, &w1, x[ 9], y[13]); //214 1384 word3_muladd(&w0, &w2, &w1, x[10], y[12]); //215 1385 word3_muladd(&w0, &w2, &w1, x[11], y[11]); //216 1386 word3_muladd(&w0, &w2, &w1, x[12], y[10]); //217 1387 word3_muladd(&w0, &w2, &w1, x[13], y[ 9]); //218 1388 word3_muladd(&w0, &w2, &w1, x[14], y[ 8]); //219 1389 word3_muladd(&w0, &w2, &w1, x[15], y[ 7]); //220 1390 z[22] = w1; w1 = 0; 1391 1392 word3_muladd(&w1, &w0, &w2, x[ 8], y[15]); //221 1393 word3_muladd(&w1, &w0, &w2, x[ 9], y[14]); //222 1394 word3_muladd(&w1, &w0, &w2, x[10], y[13]); //223 1395 word3_muladd(&w1, &w0, &w2, x[11], y[12]); //224 1396 word3_muladd(&w1, &w0, &w2, x[12], y[11]); //225 1397 word3_muladd(&w1, &w0, &w2, x[13], y[10]); //226 1398 word3_muladd(&w1, &w0, &w2, x[14], y[ 9]); //227 1399 word3_muladd(&w1, &w0, &w2, x[15], y[ 8]); //228 1400 z[23] = w2; w2 = 0; 1401 1402 word3_muladd(&w2, &w1, &w0, x[ 9], y[15]); //229 1403 word3_muladd(&w2, &w1, &w0, x[10], y[14]); //230 1404 word3_muladd(&w2, &w1, &w0, x[11], y[13]); //231 1405 word3_muladd(&w2, &w1, &w0, x[12], y[12]); //232 1406 word3_muladd(&w2, &w1, &w0, x[13], y[11]); //233 1407 word3_muladd(&w2, &w1, &w0, x[14], y[10]); //234 1408 word3_muladd(&w2, &w1, &w0, x[15], y[ 9]); //235 1409 z[24] = w0; w0 = 0; 1410 1411 word3_muladd(&w0, &w2, &w1, x[10], y[15]); //236 1412 word3_muladd(&w0, &w2, &w1, x[11], y[14]); //237 1413 word3_muladd(&w0, &w2, &w1, x[12], y[13]); //238 1414 word3_muladd(&w0, &w2, &w1, x[13], y[12]); //239 1415 word3_muladd(&w0, &w2, &w1, x[14], y[11]); //240 1416 word3_muladd(&w0, &w2, &w1, x[15], y[10]); //241 1417 z[25] = w1; w1 = 0; 1418 1419 word3_muladd(&w1, &w0, &w2, x[11], y[15]); //242 1420 word3_muladd(&w1, &w0, &w2, x[12], y[14]); //243 1421 word3_muladd(&w1, &w0, &w2, x[13], y[13]); //244 1422 word3_muladd(&w1, &w0, &w2, x[14], y[12]); //245 1423 word3_muladd(&w1, &w0, &w2, x[15], y[11]); //246 1424 z[26] = w2; w2 = 0; 1425 1426 word3_muladd(&w2, &w1, &w0, x[12], y[15]); //247 1427 word3_muladd(&w2, &w1, &w0, x[13], y[14]); //248 1428 word3_muladd(&w2, &w1, &w0, x[14], y[13]); //249 1429 word3_muladd(&w2, &w1, &w0, x[15], y[12]); //250 1430 z[27] = w0; w0 = 0; 1431 1432 word3_muladd(&w0, &w2, &w1, x[13], y[15]); //251 1433 word3_muladd(&w0, &w2, &w1, x[14], y[14]); //252 1434 word3_muladd(&w0, &w2, &w1, x[15], y[13]); //253 1435 z[28] = w1; w1 = 0; 1436 1437 word3_muladd(&w1, &w0, &w2, x[14], y[15]); //254 1438 word3_muladd(&w1, &w0, &w2, x[15], y[14]); //255 1439 z[29] = w2; w2 = 0; 1440 1441 word3_muladd(&w2, &w1, &w0, x[15], y[15]); //256 1442 z[30] = w0; 1443 z[31] = w1; 1444 } 1445 }