< prev index next >

src/cpu/x86/vm/stubRoutines_x86.cpp

Print this page




  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "runtime/deoptimization.hpp"
  27 #include "runtime/frame.inline.hpp"
  28 #include "runtime/stubRoutines.hpp"
  29 #include "runtime/thread.inline.hpp"

  30 
  31 // Implementation of the platform-specific part of StubRoutines - for
  32 // a description of how to extend it, see the stubRoutines.hpp file.
  33 
  34 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
  35 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
  36 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
  37 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
  38 
  39 uint64_t StubRoutines::x86::_crc_by128_masks[] =
  40 {
  41   /* The fields in this structure are arranged so that they can be
  42    * picked up two at a time with 128-bit loads.
  43    *
  44    * Because of flipped bit order for this CRC polynomials
  45    * the constant for X**N is left-shifted by 1.  This is because
  46    * a 64 x 64 polynomial multiply produces a 127-bit result
  47    * but the highest term is always aligned to bit 0 in the container.
  48    * Pre-shifting by one fixes this, at the cost of potentially making
  49    * the 32-bit constant no longer fit in a 32-bit container (thus the


 113     0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
 114     0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
 115     0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
 116     0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
 117     0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
 118     0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
 119     0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
 120     0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
 121     0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
 122     0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
 123     0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
 124     0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
 125     0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
 126     0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
 127     0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
 128     0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
 129     0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
 130     0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
 131     0x2d02ef8dUL
 132 };










































































































  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "runtime/deoptimization.hpp"
  27 #include "runtime/frame.inline.hpp"
  28 #include "runtime/stubRoutines.hpp"
  29 #include "runtime/thread.inline.hpp"
  30 #include "crc32c.h"
  31 
  32 // Implementation of the platform-specific part of StubRoutines - for
  33 // a description of how to extend it, see the stubRoutines.hpp file.
  34 
  35 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
  36 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
  37 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
  38 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
  39 
  40 uint64_t StubRoutines::x86::_crc_by128_masks[] =
  41 {
  42   /* The fields in this structure are arranged so that they can be
  43    * picked up two at a time with 128-bit loads.
  44    *
  45    * Because of flipped bit order for this CRC polynomials
  46    * the constant for X**N is left-shifted by 1.  This is because
  47    * a 64 x 64 polynomial multiply produces a 127-bit result
  48    * but the highest term is always aligned to bit 0 in the container.
  49    * Pre-shifting by one fixes this, at the cost of potentially making
  50    * the 32-bit constant no longer fit in a 32-bit container (thus the


 114     0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
 115     0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
 116     0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
 117     0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
 118     0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
 119     0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
 120     0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
 121     0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
 122     0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
 123     0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
 124     0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
 125     0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
 126     0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
 127     0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
 128     0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
 129     0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
 130     0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
 131     0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
 132     0x2d02ef8dUL
 133 };
 134 
 135 #define D 32
 136 #define P 0x82F63B78 // Reflection of Castagnoli (0x11EDC6F41)
 137 
 138 #define TILL_CYCLE 31
 139 uint32_t _crc32c_pow_2k_table[TILL_CYCLE]; // because _crc32c_pow_2k_table[TILL_CYCLE == 31] == _crc32c_pow_2k_table[0]
 140 
 141 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 8
 142 // Listing 1: Multiplication of normalized polynomials
 143 // "a" and "b" occupy D least significant bits.
 144 uint32_t crc32c_multiply(uint32_t a, uint32_t b) {
 145   uint32_t product = 0;
 146   uint32_t b_pow_x_table[D + 1]; // b_pow_x_table[k] = (b * x**k) mod P
 147   b_pow_x_table[0] = b;
 148   for (int k = 0; k < D; ++k) {
 149     // If "a" has non-zero coefficient at x**k,/ add ((b * x**k) mod P) to the result.
 150     if ((a & (uint64_t)(1 << (D - 1 - k))) != 0) product ^= b_pow_x_table[k];
 151 
 152     // Compute b_pow_x_table[k+1] = (b ** x**(k+1)) mod P.
 153     if (b_pow_x_table[k] & 1) {
 154       // If degree of (b_pow_x_table[k] * x) is D, then
 155       // degree of (b_pow_x_table[k] * x - P) is less than D.
 156       b_pow_x_table[k + 1] = (b_pow_x_table[k] >> 1) ^ P;
 157     }
 158     else {
 159       b_pow_x_table[k + 1] = b_pow_x_table[k] >> 1;
 160     }
 161   }
 162   return product;
 163 }
 164 #undef D
 165 #undef P
 166 
 167 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 9
 168 void crc32c_init_pow_2k(void) {
 169   // _crc32c_pow_2k_table(0) =
 170   // x^(2^k) mod P(x) = x mod P(x) = x
 171   // Since we are operating on a reflected values
 172   // x = 10b, reflect(x) = 0x40000000
 173   _crc32c_pow_2k_table[0] = 0x40000000;
 174 
 175   for (int k = 1; k < TILL_CYCLE; k++) {
 176     // _crc32c_pow_2k_table(k+1) = _crc32c_pow_2k_table(k-1)^2 mod P(x)
 177     uint32_t tmp = _crc32c_pow_2k_table[k - 1];
 178     _crc32c_pow_2k_table[k] = crc32c_multiply(tmp, tmp);
 179   }
 180 }
 181 
 182 // x^N mod P(x)
 183 uint32_t crc32c_f_pow_n(uint32_t n) {
 184   //            result = 1 (polynomial)
 185   uint32_t one, result = 0x80000000, i = 0;
 186 
 187   while (one = (n & 1), (n == 1 || n - one > 0)) {
 188     if (one) {
 189       result = crc32c_multiply(result, _crc32c_pow_2k_table[i]);
 190     }
 191     n >>= 1;
 192     i++;
 193   }
 194 
 195   return result;
 196 }
 197 
 198 juint *StubRoutines::x86::_crc32c_table;
 199 
 200 void StubRoutines::x86::generate_CRC32C_table(bool is_pclmulqdq_table_supported) {
 201 
 202   static juint pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
 203 
 204   crc32c_init_pow_2k();
 205 
 206   pow_n[0] = crc32c_f_pow_n(CRC32C_HIGH * 8);      // 8N * 8 = 64N
 207   pow_n[1] = crc32c_f_pow_n(CRC32C_HIGH * 8 * 2);  // 128N
 208 
 209   pow_n[2] = crc32c_f_pow_n(CRC32C_MIDDLE * 8);
 210   pow_n[3] = crc32c_f_pow_n(CRC32C_MIDDLE * 8 * 2);
 211 
 212   pow_n[4] = crc32c_f_pow_n(CRC32C_LOW * 8);
 213   pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1] =
 214             crc32c_f_pow_n(CRC32C_LOW * 8 * 2);
 215 
 216   if (is_pclmulqdq_table_supported) {
 217     _crc32c_table = pow_n;
 218   } else {
 219     static julong pclmulqdq_table[CRC32C_NUM_PRECOMPUTED_CONSTANTS * 256];
 220 
 221     for (int j = 0; j < CRC32C_NUM_PRECOMPUTED_CONSTANTS; j++) {
 222       static juint X_CONST = pow_n[j];
 223       for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations
 224       // S. Gueron / Information Processing Letters 112 (2012) 184
 225       // Algorithm 3: Generating a carry-less multiplication lookup table.
 226       // Input: A 32-bit constant, X_CONST.
 227       // Output: A table of 256 entries, each one is a 64-bit quadword,
 228       // that can be used for computing "byte" * X_CONST, for a given byte.
 229         pclmulqdq_table[j * 256 + i] =
 230           ((i & 1) * X_CONST) ^ ((i & 2) * X_CONST) ^ ((i & 4) * X_CONST) ^
 231           ((i & 8) * X_CONST) ^ ((i & 16) * X_CONST) ^ ((i & 32) * X_CONST) ^
 232           ((i & 64) * X_CONST) ^ ((i & 128) * X_CONST);
 233       }
 234     }
 235     _crc32c_table = (juint*)pclmulqdq_table;
 236   }
 237 }
< prev index next >