1 module botan_math.x86_64.mp_comba_mul;
2 import std.conv;
3 import std.array;
4 
5 string mp_bigint_comba_mul(alias ROWS)() {
6 	// w1 : R14
7 	// w2 : R15
8 	// w0 : R13
9 	string start = "
10 	auto _x = x.ptr;
11 	auto _y = y.ptr;
12 	clearMem(z.ptr, z.length);
13 	word* _z = z.ptr;
14     asm pure nothrow @nogc {";
15 	string end = "\n}\n";
16 
17 	string asm_x86_64;
18 	size_t cnt;
19 	string[3] W = ["R13", "R14", "R15"];
20 	void shiftW_right() {
21 		string w0 = W[0];
22 		string w1 = W[1];
23 		string w2 = W[2];
24 		W[0] = w1;
25 		W[1] = w2;
26 		W[2] = w0;
27 	}
28 
29 	//init
30 	asm_x86_64 ~= "
31 			mov RSI, _x;
32 			mov R9, _y;
33 			mov RDI, _z;
34 			xor R15, R15;
35 			xor R14, R14;
36 			xor R13, R13;\n";
37 
38 	void word3_muladd(int i, bool reverse = false) {
39 		int k = reverse?0:1;
40 		foreach (j; 0 .. i + k) {
41 			cnt++;
42 
43 			asm_x86_64 ~= "\nMUL_" ~ cnt.to!string ~ ": // " ~ i.to!string ~ " // " ~ j.to!string ~ " // " ~ reverse.to!string ~ "\n";
44 			if (j == 0) {
45 				if (i > 1) asm_x86_64 ~= "sub RSI, " ~ ((i-1)*8).to!string ~ ";\n";
46 				if (i > 0) asm_x86_64 ~= "add R9, " ~ (i*8).to!string ~ ";\n";
47 			}
48 			else {
49 				asm_x86_64 ~= "add RSI, 8;\n";
50 				asm_x86_64 ~= "sub R9, 8;\n";
51 			}
52 			// R15: w2, R13: w0, R14: w1
53 			// multiply
54 			asm_x86_64 ~= "mov RAX, [RSI]; mov RBX, [R9]; mul RBX;\n";
55 			// add carry
56 			{ 
57 				asm_x86_64 ~= "add RAX, ";
58 				asm_x86_64 ~= W[0];
59 				asm_x86_64 ~= ";\n";
60 			}
61 			// carry over
62 			{
63 				asm_x86_64 ~= "adc RDX, 0;\n";
64 				asm_x86_64 ~= "add ";
65 				asm_x86_64 ~= W[1];
66 				asm_x86_64 ~= ", RDX;\n";
67 			}
68 			// save multiplication result
69 			if (j <= i - 1 - (1-k)) {
70 				asm_x86_64 ~= "mov ";
71 				asm_x86_64 ~= W[0];
72 				asm_x86_64 ~= ", RAX;\n";
73 			} else { // if this is the last j
74 				if (i > 0) asm_x86_64 ~= "add RDI, 8;\n";
75 				asm_x86_64 ~= "mov [RDI], RAX;\n";
76 				asm_x86_64 ~= "xor ";
77 				asm_x86_64 ~= W[0];
78 				asm_x86_64 ~= ", ";
79 				asm_x86_64 ~= W[0];
80 				asm_x86_64 ~= ";\n";
81 			}
82 			// add carry carry over
83 			if (i > 0 && !(i == 1 && reverse)) {
84 				asm_x86_64 ~= "cmp ";
85 				asm_x86_64 ~= W[1];
86 				asm_x86_64 ~= ", RDX;\n";
87 				asm_x86_64 ~= "jnb MUL_";
88 				asm_x86_64 ~= (cnt+1).to!string;
89 				asm_x86_64 ~= ";\n";
90 				asm_x86_64 ~= "add ";
91 				asm_x86_64 ~= W[2];
92 				asm_x86_64 ~= ", 1;\n";
93 			}
94 		}
95 				
96 	}
97 
98 	foreach (int i; 0 .. ROWS)
99 	{
100 		word3_muladd(i);
101 		shiftW_right();
102 	}
103 	foreach_reverse (int i; 0 .. ROWS)
104 	{
105 		word3_muladd(i, true);
106 		if (i > 1) shiftW_right();
107 	}
108 
109 	// save the last carry
110 	asm_x86_64 ~= "add RDI, 8;\n";
111 	asm_x86_64 ~= "mov [RDI], " ~ W[1] ~ ";\n";
112 	return start ~ asm_x86_64 ~ end;
113 }