1 /* 2 * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "runtime/deoptimization.hpp" 27 #include "runtime/frame.inline.hpp" 28 #include "runtime/stubRoutines.hpp" 29 #include "runtime/thread.inline.hpp" 30 31 // Implementation of the platform-specific part of StubRoutines - for 32 // a description of how to extend it, see the stubRoutines.hpp file. 33 34 address StubRoutines::x86::_verify_mxcsr_entry = NULL; 35 address StubRoutines::x86::_key_shuffle_mask_addr = NULL; 36 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL; 37 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL; 38 39 uint64_t StubRoutines::x86::_crc_by128_masks[] = 40 { 41 /* The fields in this structure are arranged so that they can be 42 * picked up two at a time with 128-bit loads. 43 * 44 * Because of flipped bit order for this CRC polynomials 45 * the constant for X**N is left-shifted by 1. This is because 46 * a 64 x 64 polynomial multiply produces a 127-bit result 47 * but the highest term is always aligned to bit 0 in the container. 48 * Pre-shifting by one fixes this, at the cost of potentially making 49 * the 32-bit constant no longer fit in a 32-bit container (thus the 50 * use of uint64_t, though this is also the size used by the carry- 51 * less multiply instruction. 52 * 53 * In addition, the flipped bit order and highest-term-at-least-bit 54 * multiply changes the constants used. The 96-bit result will be 55 * aligned to the high-term end of the target 128-bit container, 56 * not the low-term end; that is, instead of a 512-bit or 576-bit fold, 57 * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold. 58 * 59 * This cause additional problems in the 128-to-64-bit reduction; see the 60 * code for details. By storing a mask in the otherwise unused half of 61 * a 128-bit constant, bits can be cleared before multiplication without 62 * storing and reloading. Note that staying on a 128-bit datapath means 63 * that some data is uselessly stored and some unused data is intersected 64 * with an irrelevant constant. 65 */ 66 67 ((uint64_t) 0xffffffffUL), /* low of K_M_64 */ 68 ((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64 */ 69 ((uint64_t) 0xba8ccbe8U << 1), /* low of K_160_96 */ 70 ((uint64_t) 0x6655004fU << 1), /* high of K_160_96 */ 71 ((uint64_t) 0xaa2215eaU << 1), /* low of K_544_480 */ 72 ((uint64_t) 0xe3720acbU << 1) /* high of K_544_480 */ 73 }; 74 75 /** 76 * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h 77 */ 78 juint StubRoutines::x86::_crc_table[] = 79 { 80 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, 81 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, 82 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, 83 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, 84 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, 85 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, 86 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, 87 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, 88 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, 89 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, 90 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, 91 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, 92 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, 93 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, 94 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, 95 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, 96 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, 97 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, 98 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, 99 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, 100 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, 101 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, 102 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, 103 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, 104 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, 105 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, 106 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, 107 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, 108 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, 109 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, 110 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, 111 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, 112 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, 113 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, 114 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, 115 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, 116 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, 117 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, 118 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, 119 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, 120 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, 121 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, 122 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, 123 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, 124 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, 125 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, 126 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, 127 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, 128 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, 129 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, 130 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, 131 0x2d02ef8dUL 132 }; 133 134 namespace CRC32C { 135 #include "crc32c.h" 136 137 #undef CONST 138 static juint x; 139 #define CONST x 140 141 #define D 32 142 #define P 0x82F63B78 // Reflection of Castagnoli (0x11EDC6F41) 143 144 #define TILL_CYCLE 31 145 uint32_t Pow2k[TILL_CYCLE]; // because Pow2k[TILL_CYCLE == 31] == Pow2k[0] 146 147 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 8 148 // Listing 1: Multiplication of normalized polynomials 149 // "a" and "b" occupy D least significant bits. 150 uint32_t Multiply(uint32_t a, uint32_t b) { 151 uint32_t product = 0; 152 uint32_t bPowX[D + 1]; // bPowX[k] = (b * x**k) mod P 153 bPowX[0] = b; 154 for (int k = 0; k < D; ++k) { 155 // If "a" has non-zero coefficient at x**k,/ add ((b * x**k) mod P) to the result. 156 if ((a & (uint64_t)(1 << (D - 1 - k))) != 0) product ^= bPowX[k]; 157 158 // Compute bPowX[k+1] = (b ** x**(k+1)) mod P. 159 if (bPowX[k] & 1) { 160 // If degree of (bPowX[k] * x) is D, then 161 // degree of (bPowX[k] * x - P) is less than D. 162 bPowX[k + 1] = (bPowX[k] >> 1) ^ P; 163 } 164 else { 165 bPowX[k + 1] = bPowX[k] >> 1; 166 } 167 } 168 return product; 169 } 170 171 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 9 172 void InitPow2k(void) { 173 // Pow2k(0) = 174 // x^(2^k) mod P(x) = x mod P(x) = x 175 // Since we are operating on a reflected values 176 // x = 10b, reflect(x) = 0x40000000 177 Pow2k[0] = 0x40000000; 178 179 for (int k = 1; k < TILL_CYCLE; k++) { 180 // Pow2k(k+1) = Pow2k(k-1)^2 mod P(x) 181 uint32_t tmp = Pow2k[k - 1]; 182 Pow2k[k] = Multiply(tmp, tmp); 183 } 184 } 185 186 // x^N mod P(x) 187 uint32_t FPowN(uint32_t n) { 188 // result = 1 (polynomial) 189 uint32_t one, result = 0x80000000, i = 0; 190 191 while (one = (n & 1), (n == 1 || n - one > 0)) { 192 if (one) { 193 result = Multiply(result, Pow2k[i]); 194 } 195 n >>= 1; 196 i++; 197 } 198 199 return result; 200 } 201 } 202 203 juint *StubRoutines::x86::_crc32c_table; 204 205 void StubRoutines::x86::GenerateCRC32CTable(bool IsPclmulqdqSupported) { 206 using namespace CRC32C; 207 208 static juint PowN[NUM_PRECOMPUTED_CONSTANTS]; 209 210 InitPow2k(); 211 212 PowN[0] = FPowN(HIGH * 8); // 8N * 8 = 64N 213 PowN[1] = FPowN(HIGH * 8 * 2); // 128N 214 215 PowN[2] = FPowN(MIDDLE * 8); 216 PowN[3] = FPowN(MIDDLE * 8 * 2); 217 218 PowN[4] = FPowN(LOW * 8); 219 PowN[NUM_PRECOMPUTED_CONSTANTS - 1] = 220 FPowN(LOW * 8 * 2); 221 222 if (IsPclmulqdqSupported) { 223 _crc32c_table = PowN; 224 } else { 225 static julong PCLMULQDQ[NUM_PRECOMPUTED_CONSTANTS * 256]; 226 227 for (int j = 0; j < NUM_PRECOMPUTED_CONSTANTS; j++) { 228 CONST = PowN[j]; 229 for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations 230 // S. Gueron / Information Processing Letters 112 (2012) 184 231 // Algorithm 3: Generating a carry-less multiplication lookup table. 232 // Input: A 32-bit constant, CONST. 233 // Output: A table of 256 entries, each one is a 64-bit quadword, 234 // that can be used for computing "byte" * CONST, for a given byte. 235 PCLMULQDQ[j * 256 + i] = 236 ((i & 1) * CONST) ^ ((i & 2) * CONST) ^ ((i & 4) * CONST) ^ 237 ((i & 8) * CONST) ^ ((i & 16) * CONST) ^ ((i & 32) * CONST) ^ 238 ((i & 64) * CONST) ^ ((i & 128) * CONST); 239 } 240 } 241 _crc32c_table = (juint*)PCLMULQDQ; 242 } 243 }