1 module botan_math.x86_64.mp_comba_mul; 2 import std.conv; 3 import std.array; 4 5 string mp_bigint_comba_mul(alias ROWS)() { 6 // w1 : R14 7 // w2 : R15 8 // w0 : R13 9 string start = " 10 auto _x = x.ptr; 11 auto _y = y.ptr; 12 clearMem(z.ptr, z.length); 13 word* _z = z.ptr; 14 asm pure nothrow @nogc {"; 15 string end = "\n}\n"; 16 17 string asm_x86_64; 18 size_t cnt; 19 string[3] W = ["R13", "R14", "R15"]; 20 void shiftW_right() { 21 string w0 = W[0]; 22 string w1 = W[1]; 23 string w2 = W[2]; 24 W[0] = w1; 25 W[1] = w2; 26 W[2] = w0; 27 } 28 29 //init 30 asm_x86_64 ~= " 31 mov RSI, _x; 32 mov R9, _y; 33 mov RDI, _z; 34 xor R15, R15; 35 xor R14, R14; 36 xor R13, R13;\n"; 37 38 void word3_muladd(int i, bool reverse = false) { 39 int k = reverse?0:1; 40 foreach (j; 0 .. i + k) { 41 cnt++; 42 43 asm_x86_64 ~= "\nMUL_" ~ cnt.to!string ~ ": // " ~ i.to!string ~ " // " ~ j.to!string ~ " // " ~ reverse.to!string ~ "\n"; 44 if (j == 0) { 45 if (i > 1) asm_x86_64 ~= "sub RSI, " ~ ((i-1)*8).to!string ~ ";\n"; 46 if (i > 0) asm_x86_64 ~= "add R9, " ~ (i*8).to!string ~ ";\n"; 47 } 48 else { 49 asm_x86_64 ~= "add RSI, 8;\n"; 50 asm_x86_64 ~= "sub R9, 8;\n"; 51 } 52 // R15: w2, R13: w0, R14: w1 53 // multiply 54 asm_x86_64 ~= "mov RAX, [RSI]; mov RBX, [R9]; mul RBX;\n"; 55 // add carry 56 { 57 asm_x86_64 ~= "add RAX, "; 58 asm_x86_64 ~= W[0]; 59 asm_x86_64 ~= ";\n"; 60 } 61 // carry over 62 { 63 asm_x86_64 ~= "adc RDX, 0;\n"; 64 asm_x86_64 ~= "add "; 65 asm_x86_64 ~= W[1]; 66 asm_x86_64 ~= ", RDX;\n"; 67 } 68 // save multiplication result 69 if (j <= i - 1 - (1-k)) { 70 asm_x86_64 ~= "mov "; 71 asm_x86_64 ~= W[0]; 72 asm_x86_64 ~= ", RAX;\n"; 73 } else { // if this is the last j 74 if (i > 0) asm_x86_64 ~= "add RDI, 8;\n"; 75 asm_x86_64 ~= "mov [RDI], RAX;\n"; 76 asm_x86_64 ~= "xor "; 77 asm_x86_64 ~= W[0]; 78 asm_x86_64 ~= ", "; 79 asm_x86_64 ~= W[0]; 80 asm_x86_64 ~= ";\n"; 81 } 82 // add carry carry over 83 if (i > 0 && !(i == 1 && reverse)) { 84 asm_x86_64 ~= "cmp "; 85 asm_x86_64 ~= W[1]; 86 asm_x86_64 ~= ", RDX;\n"; 87 asm_x86_64 ~= "jnb MUL_"; 88 asm_x86_64 ~= (cnt+1).to!string; 89 asm_x86_64 ~= ";\n"; 90 asm_x86_64 ~= "add "; 91 asm_x86_64 ~= W[2]; 92 asm_x86_64 ~= ", 1;\n"; 93 } 94 } 95 96 } 97 98 foreach (int i; 0 .. ROWS) 99 { 100 word3_muladd(i); 101 shiftW_right(); 102 } 103 foreach_reverse (int i; 0 .. ROWS) 104 { 105 word3_muladd(i, true); 106 if (i > 1) shiftW_right(); 107 } 108 109 // save the last carry 110 asm_x86_64 ~= "add RDI, 8;\n"; 111 asm_x86_64 ~= "mov [RDI], " ~ W[1] ~ ";\n"; 112 return start ~ asm_x86_64 ~ end; 113 }