10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "runtime/deoptimization.hpp"
27 #include "runtime/frame.inline.hpp"
28 #include "runtime/stubRoutines.hpp"
29 #include "runtime/thread.inline.hpp"
30
31 // Implementation of the platform-specific part of StubRoutines - for
32 // a description of how to extend it, see the stubRoutines.hpp file.
33
34 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
35 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
36 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
37 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
38
39 uint64_t StubRoutines::x86::_crc_by128_masks[] =
40 {
41 /* The fields in this structure are arranged so that they can be
42 * picked up two at a time with 128-bit loads.
43 *
44 * Because of flipped bit order for this CRC polynomials
45 * the constant for X**N is left-shifted by 1. This is because
46 * a 64 x 64 polynomial multiply produces a 127-bit result
47 * but the highest term is always aligned to bit 0 in the container.
48 * Pre-shifting by one fixes this, at the cost of potentially making
49 * the 32-bit constant no longer fit in a 32-bit container (thus the
113 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
114 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
115 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
116 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
117 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
118 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
119 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
120 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
121 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
122 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
123 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
124 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
125 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
126 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
127 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
128 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
129 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
130 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
131 0x2d02ef8dUL
132 };
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "runtime/deoptimization.hpp"
27 #include "runtime/frame.inline.hpp"
28 #include "runtime/stubRoutines.hpp"
29 #include "runtime/thread.inline.hpp"
30 #include "crc32c.h"
31
32 // Implementation of the platform-specific part of StubRoutines - for
33 // a description of how to extend it, see the stubRoutines.hpp file.
34
35 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
36 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
37 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
38 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
39
40 uint64_t StubRoutines::x86::_crc_by128_masks[] =
41 {
42 /* The fields in this structure are arranged so that they can be
43 * picked up two at a time with 128-bit loads.
44 *
45 * Because of flipped bit order for this CRC polynomials
46 * the constant for X**N is left-shifted by 1. This is because
47 * a 64 x 64 polynomial multiply produces a 127-bit result
48 * but the highest term is always aligned to bit 0 in the container.
49 * Pre-shifting by one fixes this, at the cost of potentially making
50 * the 32-bit constant no longer fit in a 32-bit container (thus the
114 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
115 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
116 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
117 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
118 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
119 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
120 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
121 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
122 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
123 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
124 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
125 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
126 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
127 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
128 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
129 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
130 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
131 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
132 0x2d02ef8dUL
133 };
134
135 #define D 32
136 #define P 0x82F63B78 // Reflection of Castagnoli (0x11EDC6F41)
137
138 #define TILL_CYCLE 31
139 uint32_t _crc32c_pow_2k_table[TILL_CYCLE]; // because _crc32c_pow_2k_table[TILL_CYCLE == 31] == _crc32c_pow_2k_table[0]
140
141 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 8
142 // Listing 1: Multiplication of normalized polynomials
143 // "a" and "b" occupy D least significant bits.
144 uint32_t crc32c_multiply(uint32_t a, uint32_t b) {
145 uint32_t product = 0;
146 uint32_t b_pow_x_table[D + 1]; // b_pow_x_table[k] = (b * x**k) mod P
147 b_pow_x_table[0] = b;
148 for (int k = 0; k < D; ++k) {
149 // If "a" has non-zero coefficient at x**k,/ add ((b * x**k) mod P) to the result.
150 if ((a & (uint64_t)(1 << (D - 1 - k))) != 0) product ^= b_pow_x_table[k];
151
152 // Compute b_pow_x_table[k+1] = (b ** x**(k+1)) mod P.
153 if (b_pow_x_table[k] & 1) {
154 // If degree of (b_pow_x_table[k] * x) is D, then
155 // degree of (b_pow_x_table[k] * x - P) is less than D.
156 b_pow_x_table[k + 1] = (b_pow_x_table[k] >> 1) ^ P;
157 }
158 else {
159 b_pow_x_table[k + 1] = b_pow_x_table[k] >> 1;
160 }
161 }
162 return product;
163 }
164 #undef D
165 #undef P
166
167 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 9
168 void crc32c_init_pow_2k(void) {
169 // _crc32c_pow_2k_table(0) =
170 // x^(2^k) mod P(x) = x mod P(x) = x
171 // Since we are operating on a reflected values
172 // x = 10b, reflect(x) = 0x40000000
173 _crc32c_pow_2k_table[0] = 0x40000000;
174
175 for (int k = 1; k < TILL_CYCLE; k++) {
176 // _crc32c_pow_2k_table(k+1) = _crc32c_pow_2k_table(k-1)^2 mod P(x)
177 uint32_t tmp = _crc32c_pow_2k_table[k - 1];
178 _crc32c_pow_2k_table[k] = crc32c_multiply(tmp, tmp);
179 }
180 }
181
182 // x^N mod P(x)
183 uint32_t crc32c_f_pow_n(uint32_t n) {
184 // result = 1 (polynomial)
185 uint32_t one, result = 0x80000000, i = 0;
186
187 while (one = (n & 1), (n == 1 || n - one > 0)) {
188 if (one) {
189 result = crc32c_multiply(result, _crc32c_pow_2k_table[i]);
190 }
191 n >>= 1;
192 i++;
193 }
194
195 return result;
196 }
197
198 juint *StubRoutines::x86::_crc32c_table;
199
200 void StubRoutines::x86::generate_CRC32C_table(bool is_pclmulqdq_table_supported) {
201
202 static juint pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
203
204 crc32c_init_pow_2k();
205
206 pow_n[0] = crc32c_f_pow_n(CRC32C_HIGH * 8); // 8N * 8 = 64N
207 pow_n[1] = crc32c_f_pow_n(CRC32C_HIGH * 8 * 2); // 128N
208
209 pow_n[2] = crc32c_f_pow_n(CRC32C_MIDDLE * 8);
210 pow_n[3] = crc32c_f_pow_n(CRC32C_MIDDLE * 8 * 2);
211
212 pow_n[4] = crc32c_f_pow_n(CRC32C_LOW * 8);
213 pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1] =
214 crc32c_f_pow_n(CRC32C_LOW * 8 * 2);
215
216 if (is_pclmulqdq_table_supported) {
217 _crc32c_table = pow_n;
218 } else {
219 static julong pclmulqdq_table[CRC32C_NUM_PRECOMPUTED_CONSTANTS * 256];
220
221 for (int j = 0; j < CRC32C_NUM_PRECOMPUTED_CONSTANTS; j++) {
222 static juint X_CONST = pow_n[j];
223 for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations
224 // S. Gueron / Information Processing Letters 112 (2012) 184
225 // Algorithm 3: Generating a carry-less multiplication lookup table.
226 // Input: A 32-bit constant, X_CONST.
227 // Output: A table of 256 entries, each one is a 64-bit quadword,
228 // that can be used for computing "byte" * X_CONST, for a given byte.
229 pclmulqdq_table[j * 256 + i] =
230 ((i & 1) * X_CONST) ^ ((i & 2) * X_CONST) ^ ((i & 4) * X_CONST) ^
231 ((i & 8) * X_CONST) ^ ((i & 16) * X_CONST) ^ ((i & 32) * X_CONST) ^
232 ((i & 64) * X_CONST) ^ ((i & 128) * X_CONST);
233 }
234 }
235 _crc32c_table = (juint*)pclmulqdq_table;
236 }
237 }
|