1 /* 2 * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "runtime/deoptimization.hpp" 27 #include "runtime/frame.inline.hpp" 28 #include "runtime/stubRoutines.hpp" 29 #include "runtime/thread.inline.hpp" 30 #include "crc32c.h" 31 32 #ifdef _MSC_VER 33 #define ALIGNED_(x) __declspec(align(x)) 34 #else 35 #define ALIGNED_(x) __attribute__ ((aligned(x))) 36 #endif 37 38 // Implementation of the platform-specific part of StubRoutines - for 39 // a description of how to extend it, see the stubRoutines.hpp file. 40 41 address StubRoutines::x86::_verify_mxcsr_entry = NULL; 42 address StubRoutines::x86::_key_shuffle_mask_addr = NULL; 43 address StubRoutines::x86::_counter_shuffle_mask_addr = NULL; 44 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL; 45 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL; 46 address StubRoutines::x86::_upper_word_mask_addr = NULL; 47 address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL; 48 address StubRoutines::x86::_k256_adr = NULL; 49 address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL; 50 51 //tables common for sin and cos 52 address StubRoutines::x86::_ONEHALF_adr = NULL; 53 address StubRoutines::x86::_P_2_adr = NULL; 54 address StubRoutines::x86::_SC_4_adr = NULL; 55 address StubRoutines::x86::_Ctable_adr = NULL; 56 address StubRoutines::x86::_SC_2_adr = NULL; 57 address StubRoutines::x86::_SC_3_adr = NULL; 58 address StubRoutines::x86::_SC_1_adr = NULL; 59 address StubRoutines::x86::_PI_INV_TABLE_adr = NULL; 60 address StubRoutines::x86::_PI_4_adr = NULL; 61 address StubRoutines::x86::_PI32INV_adr = NULL; 62 address StubRoutines::x86::_SIGN_MASK_adr = NULL; 63 address StubRoutines::x86::_P_1_adr = NULL; 64 address StubRoutines::x86::_P_3_adr = NULL; 65 address StubRoutines::x86::_NEG_ZERO_adr = NULL; 66 67 //tables common for sincos and tancot 68 address StubRoutines::x86::_L_2il0floatpacket_0_adr = NULL; 69 address StubRoutines::x86::_Pi4Inv_adr = NULL; 70 address StubRoutines::x86::_Pi4x3_adr = NULL; 71 address StubRoutines::x86::_Pi4x4_adr = NULL; 72 address StubRoutines::x86::_ones_adr = NULL; 73 74 uint64_t StubRoutines::x86::_crc_by128_masks[] = 75 { 76 /* The fields in this structure are arranged so that they can be 77 * picked up two at a time with 128-bit loads. 78 * 79 * Because of flipped bit order for this CRC polynomials 80 * the constant for X**N is left-shifted by 1. This is because 81 * a 64 x 64 polynomial multiply produces a 127-bit result 82 * but the highest term is always aligned to bit 0 in the container. 83 * Pre-shifting by one fixes this, at the cost of potentially making 84 * the 32-bit constant no longer fit in a 32-bit container (thus the 85 * use of uint64_t, though this is also the size used by the carry- 86 * less multiply instruction. 87 * 88 * In addition, the flipped bit order and highest-term-at-least-bit 89 * multiply changes the constants used. The 96-bit result will be 90 * aligned to the high-term end of the target 128-bit container, 91 * not the low-term end; that is, instead of a 512-bit or 576-bit fold, 92 * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold. 93 * 94 * This cause additional problems in the 128-to-64-bit reduction; see the 95 * code for details. By storing a mask in the otherwise unused half of 96 * a 128-bit constant, bits can be cleared before multiplication without 97 * storing and reloading. Note that staying on a 128-bit datapath means 98 * that some data is uselessly stored and some unused data is intersected 99 * with an irrelevant constant. 100 */ 101 102 ((uint64_t) 0xffffffffUL), /* low of K_M_64 */ 103 ((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64 */ 104 ((uint64_t) 0xba8ccbe8U << 1), /* low of K_160_96 */ 105 ((uint64_t) 0x6655004fU << 1), /* high of K_160_96 */ 106 ((uint64_t) 0xaa2215eaU << 1), /* low of K_544_480 */ 107 ((uint64_t) 0xe3720acbU << 1) /* high of K_544_480 */ 108 }; 109 110 /** 111 * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h 112 */ 113 juint StubRoutines::x86::_crc_table[] = 114 { 115 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, 116 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, 117 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, 118 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, 119 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, 120 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, 121 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, 122 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, 123 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, 124 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, 125 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, 126 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, 127 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, 128 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, 129 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, 130 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, 131 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, 132 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, 133 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, 134 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, 135 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, 136 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, 137 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, 138 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, 139 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, 140 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, 141 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, 142 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, 143 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, 144 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, 145 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, 146 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, 147 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, 148 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, 149 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, 150 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, 151 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, 152 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, 153 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, 154 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, 155 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, 156 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, 157 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, 158 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, 159 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, 160 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, 161 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, 162 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, 163 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, 164 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, 165 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, 166 0x2d02ef8dUL 167 }; 168 169 #define D 32 170 #define P 0x82F63B78 // Reflection of Castagnoli (0x11EDC6F41) 171 172 #define TILL_CYCLE 31 173 uint32_t _crc32c_pow_2k_table[TILL_CYCLE]; // because _crc32c_pow_2k_table[TILL_CYCLE == 31] == _crc32c_pow_2k_table[0] 174 175 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 8 176 // Listing 1: Multiplication of normalized polynomials 177 // "a" and "b" occupy D least significant bits. 178 uint32_t crc32c_multiply(uint32_t a, uint32_t b) { 179 uint32_t product = 0; 180 uint32_t b_pow_x_table[D + 1]; // b_pow_x_table[k] = (b * x**k) mod P 181 b_pow_x_table[0] = b; 182 for (int k = 0; k < D; ++k) { 183 // If "a" has non-zero coefficient at x**k,/ add ((b * x**k) mod P) to the result. 184 if ((a & (((uint32_t)1) << (D - 1 - k))) != 0) product ^= b_pow_x_table[k]; 185 186 // Compute b_pow_x_table[k+1] = (b ** x**(k+1)) mod P. 187 if (b_pow_x_table[k] & 1) { 188 // If degree of (b_pow_x_table[k] * x) is D, then 189 // degree of (b_pow_x_table[k] * x - P) is less than D. 190 b_pow_x_table[k + 1] = (b_pow_x_table[k] >> 1) ^ P; 191 } 192 else { 193 b_pow_x_table[k + 1] = b_pow_x_table[k] >> 1; 194 } 195 } 196 return product; 197 } 198 #undef D 199 #undef P 200 201 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 9 202 void crc32c_init_pow_2k(void) { 203 // _crc32c_pow_2k_table(0) = 204 // x^(2^k) mod P(x) = x mod P(x) = x 205 // Since we are operating on a reflected values 206 // x = 10b, reflect(x) = 0x40000000 207 _crc32c_pow_2k_table[0] = 0x40000000; 208 209 for (int k = 1; k < TILL_CYCLE; k++) { 210 // _crc32c_pow_2k_table(k+1) = _crc32c_pow_2k_table(k-1)^2 mod P(x) 211 uint32_t tmp = _crc32c_pow_2k_table[k - 1]; 212 _crc32c_pow_2k_table[k] = crc32c_multiply(tmp, tmp); 213 } 214 } 215 216 // x^N mod P(x) 217 uint32_t crc32c_f_pow_n(uint32_t n) { 218 // result = 1 (polynomial) 219 uint32_t one, result = 0x80000000, i = 0; 220 221 while (one = (n & 1), (n == 1 || n - one > 0)) { 222 if (one) { 223 result = crc32c_multiply(result, _crc32c_pow_2k_table[i]); 224 } 225 n >>= 1; 226 i++; 227 } 228 229 return result; 230 } 231 232 juint *StubRoutines::x86::_crc32c_table; 233 234 void StubRoutines::x86::generate_CRC32C_table(bool is_pclmulqdq_table_supported) { 235 236 static juint pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS]; 237 238 crc32c_init_pow_2k(); 239 240 pow_n[0] = crc32c_f_pow_n(CRC32C_HIGH * 8); // 8N * 8 = 64N 241 pow_n[1] = crc32c_f_pow_n(CRC32C_HIGH * 8 * 2); // 128N 242 243 pow_n[2] = crc32c_f_pow_n(CRC32C_MIDDLE * 8); 244 pow_n[3] = crc32c_f_pow_n(CRC32C_MIDDLE * 8 * 2); 245 246 pow_n[4] = crc32c_f_pow_n(CRC32C_LOW * 8); 247 pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1] = 248 crc32c_f_pow_n(CRC32C_LOW * 8 * 2); 249 250 if (is_pclmulqdq_table_supported) { 251 _crc32c_table = pow_n; 252 } else { 253 static julong pclmulqdq_table[CRC32C_NUM_PRECOMPUTED_CONSTANTS * 256]; 254 255 for (int j = 0; j < CRC32C_NUM_PRECOMPUTED_CONSTANTS; j++) { 256 static juint X_CONST = pow_n[j]; 257 for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations 258 // S. Gueron / Information Processing Letters 112 (2012) 184 259 // Algorithm 3: Generating a carry-less multiplication lookup table. 260 // Input: A 32-bit constant, X_CONST. 261 // Output: A table of 256 entries, each one is a 64-bit quadword, 262 // that can be used for computing "byte" * X_CONST, for a given byte. 263 pclmulqdq_table[j * 256 + i] = 264 ((i & 1) * X_CONST) ^ ((i & 2) * X_CONST) ^ ((i & 4) * X_CONST) ^ 265 ((i & 8) * X_CONST) ^ ((i & 16) * X_CONST) ^ ((i & 32) * X_CONST) ^ 266 ((i & 64) * X_CONST) ^ ((i & 128) * X_CONST); 267 } 268 } 269 _crc32c_table = (juint*)pclmulqdq_table; 270 } 271 } 272 273 ALIGNED_(64) juint StubRoutines::x86::_k256[] = 274 { 275 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 276 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 277 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, 278 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, 279 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, 280 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 281 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 282 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, 283 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, 284 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, 285 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 286 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 287 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, 288 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, 289 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, 290 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL 291 };