1 /**
2 * Monty operations
3 * 
4 * Copyright:
5 * (C) 1999-2010,2014 Jack Lloyd
6 * (C) 2014-2015 Etienne Cimon
7 *      2006 Luca Piccarreta
8 *
9 * License:
10 * Botan is released under the Simplified BSD License (see LICENSE.md)
11 */
12 module botan_math.mp_monty;
13 
14 import botan_math.mp_word;
15 import botan_math.mp_bigint;
16 import botan_math.mp_comba;
17 import botan_math.mp_karatsuba;
18 
19 /*
20 * Multiplication Algorithm Dispatcher
21 */
22 void bigint_mul(word* z, size_t z_size, word* workspace,
23 	in word* x, size_t x_size, size_t x_sw,
24 	in word* y, size_t y_size, size_t y_sw)
25 {
26 	if (x_sw == 1)
27 	{
28 		bigint_linmul3(z, y, y_sw, x[0]);
29 	}
30 	else if (y_sw == 1)
31 	{
32 		bigint_linmul3(z, x, x_sw, y[0]);
33 	}
34 	else if (x_sw <= 4 && x_size >= 4 &&
35 		y_sw <= 4 && y_size >= 4 && z_size >= 8)
36 	{
37 		bigint_comba_mul4(*cast(word[8]*) z, *cast(word[4]*) x, *cast(word[4]*) y);
38 	}
39 	else if (x_sw <= 6 && x_size >= 6 &&
40 		y_sw <= 6 && y_size >= 6 && z_size >= 12)
41 	{
42 		bigint_comba_mul6(*cast(word[12]*) z, *cast(word[6]*) x, *cast(word[6]*) y);
43 	}
44 	else if (x_sw <= 8 && x_size >= 8 &&
45 		y_sw <= 8 && y_size >= 8 && z_size >= 16)
46 	{
47 		bigint_comba_mul8(*cast(word[16]*) z, *cast(word[8]*) x, *cast(word[8]*) y);
48 	}
49 	else if (x_sw <= 9 && x_size >= 9 &&
50 		y_sw <= 9 && y_size >= 9 && z_size >= 18)
51 	{
52 		bigint_comba_mul9(*cast(word[18]*) z, *cast(word[9]*) x, *cast(word[9]*) y);
53 	}
54 	else if (x_sw <= 16 && x_size >= 16 &&
55 		y_sw <= 16 && y_size >= 16 && z_size >= 32)
56 	{
57 		bigint_comba_mul16(*cast(word[32]*) z, *cast(word[16]*) x, *cast(word[16]*) y);
58 	}
59 	else if (x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
60 		y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
61 		!workspace)
62 	{
63 		bigint_simple_mul(z, x, x_sw, y, y_sw);
64 	}
65 	else
66 	{
67 		const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
68 		
69 		if (N)
70 			karatsuba_mul(z, x, y, N, workspace);
71 		else
72 			bigint_simple_mul(z, x, x_sw, y, y_sw);
73 	}
74 }
75 
76 /*
77 * Squaring Algorithm Dispatcher
78 */
79 void bigint_sqr(word* z, size_t z_size, word* workspace,
80 	in word* x, size_t x_size, size_t x_sw)
81 {
82 	if (x_sw == 1)
83 	{
84 		bigint_linmul3(z, x, x_sw, x[0]);
85 	}
86 	else if (x_sw <= 4 && x_size >= 4 && z_size >= 8)
87 	{
88 		bigint_comba_sqr4(*cast(word[8]*) z, *cast(word[4]*) x);
89 	}
90 	else if (x_sw <= 6 && x_size >= 6 && z_size >= 12)
91 	{
92 		bigint_comba_sqr6(*cast(word[12]*) z, *cast(word[6]*) x);
93 	}
94 	else if (x_sw <= 8 && x_size >= 8 && z_size >= 16)
95 	{
96 		bigint_comba_sqr8(*cast(word[16]*) z, *cast(word[8]*) x);
97 	}
98 	else if (x_sw <= 9 && x_size >= 9 && z_size >= 18)
99 	{
100 		bigint_comba_sqr9(*cast(word[18]*) z, *cast(word[9]*) x);
101 	}
102 	else if (x_sw <= 16 && x_size >= 16 && z_size >= 32)
103 	{
104 		bigint_comba_sqr16(*cast(word[32]*) z, *cast(word[16]*) x);
105 	}
106 	else if (x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
107 	{
108 		bigint_simple_sqr(z, x, x_sw);
109 	}
110 	else
111 	{
112 		const size_t N = karatsuba_size(z_size, x_size, x_sw);
113 		
114 		if (N)
115 			karatsuba_sqr(z, x, N, workspace);
116 		else
117 			bigint_simple_sqr(z, x, x_sw);
118 	}
119 }
120 
121 /*
122 * Montgomery Multiplication
123 */
124 void bigint_monty_mul(word* z, size_t z_size,
125 	in word* x, size_t x_size, size_t x_sw,
126 	in word* y, size_t y_size, size_t y_sw,
127 	in word* p, size_t p_size, word p_dash,
128 	word* ws)
129 {
130 	bigint_mul( z, z_size, ws,
131 		x, x_size, x_sw,
132 		y, y_size, y_sw);
133 	
134 	bigint_monty_redc(z, p, p_size, p_dash, ws);
135 }
136 
137 
138 /*
139 * Montgomery Squaring
140 */
141 void bigint_monty_sqr(word* z, size_t z_size,
142 	in word* x, size_t x_size, size_t x_sw,
143 	in word* p, size_t p_size, word p_dash,
144 	word* ws)
145 {
146 	bigint_sqr(z, z_size, ws, x, x_size, x_sw);
147 	bigint_monty_redc(z, p, p_size, p_dash, ws);
148 }
149 
150 
151 
152 /**
153 * Montgomery Reduction
154 * Params:
155 *  z = integer to reduce, of size exactly 2*(p_size+1).
156               Output is in the first p_size+1 words, higher
157               words are set to zero.
158 *  p = modulus
159 *  p_size = size of p
160 *  p_dash = Montgomery value
161 *  ws = workspace array of at least 2*(p_size+1) words
162 */
163 void bigint_monty_redc(word* z, in word* p, size_t p_size, word p_dash, word* ws)
164 {
165 	const size_t z_size = 2*(p_size+1);
166 	const size_t blocks_of_8 = p_size - (p_size % 8);
167 	
168 	foreach (size_t i; 0 .. p_size)
169 	{
170 		word* z_i = z + i;
171 		
172 		word y = (*z_i) * p_dash;
173 		
174 		/*
175         bigint_linmul3(ws, p, p_size, y);
176         bigint_add2(z_i, z_size - i, ws, p_size+1);
177         */
178 		
179 		word carry = 0;
180 		
181 		for (size_t j = 0; j < blocks_of_8; j += 8)
182 			carry = word8_madd3(*cast(word[8]*) (z_i + j), *cast(word[8]*) (p + j), y, carry);
183 
184 		for (size_t j = blocks_of_8; j < p_size; j++) {
185 			version(D_InlineAsm_X86_64) {
186 				word* _x = (cast(word*)p) + j;
187 				word* _z = z_i + j;
188 				size_t word_size = word.sizeof;
189 				asm pure nothrow @nogc {
190 					mov R8, _x;
191 					mov R9, _z;
192 					mov RCX, carry;
193 					
194 					mov RAX, [R8];
195 					mov RBX, y;
196 					mul RBX;
197 					add RAX, [R9];
198 					adc RDX, 0;
199 					add RAX, RCX;
200 					adc RDX, 0;
201 					mov carry, RDX;
202 					mov [R9], RAX;
203 				}
204 			} else 
205 				z_i[j] = word_madd3(p[j], y, z_i[j], &carry);
206 		}
207 		word z_sum = z_i[p_size] + carry;
208 		carry = (z_sum < z_i[p_size]);
209 		z_i[p_size] = z_sum;
210 		
211 		for (size_t j = p_size + 1; carry && j != z_size - i; ++j)
212 		{
213 			++z_i[j];
214 			carry = !z_i[j];
215 		}
216 	}
217 	
218 	word borrow = 0;
219 	foreach (size_t i; 0 .. p_size)
220 		ws[i] = word_sub(z[p_size + i], p[i], &borrow);
221 	
222 	ws[p_size] = word_sub(z[p_size+p_size], 0, &borrow);
223 	
224 	copyMem(ws + p_size + 1, z + p_size, p_size + 1);
225 	
226 	copyMem(z, ws + borrow*(p_size+1), p_size + 1);
227 	clearMem(z + p_size + 1, z_size - p_size - 1);
228 }