hotspot Sdiff src/cpu/x86/vm

src/cpu/x86/vm/stubRoutines_x86.cpp

  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "runtime/deoptimization.hpp"
  27 #include "runtime/frame.inline.hpp"
  28 #include "runtime/stubRoutines.hpp"
  29 #include "runtime/thread.inline.hpp"
  30 #include "crc32c.h"
  31 






  32 // Implementation of the platform-specific part of StubRoutines - for
  33 // a description of how to extend it, see the stubRoutines.hpp file.
  34 
  35 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
  36 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
  37 address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
  38 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
  39 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;




  40 
  41 uint64_t StubRoutines::x86::_crc_by128_masks[] =
  42 {
  43   /* The fields in this structure are arranged so that they can be
  44    * picked up two at a time with 128-bit loads.
  45    *
  46    * Because of flipped bit order for this CRC polynomials
  47    * the constant for X**N is left-shifted by 1.  This is because
  48    * a 64 x 64 polynomial multiply produces a 127-bit result
  49    * but the highest term is always aligned to bit 0 in the container.
  50    * Pre-shifting by one fixes this, at the cost of potentially making
  51    * the 32-bit constant no longer fit in a 32-bit container (thus the
  52    * use of uint64_t, though this is also the size used by the carry-
  53    * less multiply instruction.
  54    *
  55    * In addition, the flipped bit order and highest-term-at-least-bit
  56    * multiply changes the constants used.  The 96-bit result will be
  57    * aligned to the high-term end of the target 128-bit container,
  58    * not the low-term end; that is, instead of a 512-bit or 576-bit fold,
  59    * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold.

 219   } else {
 220     static julong pclmulqdq_table[CRC32C_NUM_PRECOMPUTED_CONSTANTS * 256];
 221 
 222     for (int j = 0; j < CRC32C_NUM_PRECOMPUTED_CONSTANTS; j++) {
 223       static juint X_CONST = pow_n[j];
 224       for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations
 225       // S. Gueron / Information Processing Letters 112 (2012) 184
 226       // Algorithm 3: Generating a carry-less multiplication lookup table.
 227       // Input: A 32-bit constant, X_CONST.
 228       // Output: A table of 256 entries, each one is a 64-bit quadword,
 229       // that can be used for computing "byte" * X_CONST, for a given byte.
 230         pclmulqdq_table[j * 256 + i] =
 231           ((i & 1) * X_CONST) ^ ((i & 2) * X_CONST) ^ ((i & 4) * X_CONST) ^
 232           ((i & 8) * X_CONST) ^ ((i & 16) * X_CONST) ^ ((i & 32) * X_CONST) ^
 233           ((i & 64) * X_CONST) ^ ((i & 128) * X_CONST);
 234       }
 235     }
 236     _crc32c_table = (juint*)pclmulqdq_table;
 237   }
 238 }

  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "runtime/deoptimization.hpp"
  27 #include "runtime/frame.inline.hpp"
  28 #include "runtime/stubRoutines.hpp"
  29 #include "runtime/thread.inline.hpp"
  30 #include "crc32c.h"
  31 
  32 #ifdef _MSC_VER
  33 #define ALIGNED_(x) __declspec(align(x))
  34 #else
  35 #define ALIGNED_(x) __attribute__ ((aligned(x)))
  36 #endif
  37 
  38 // Implementation of the platform-specific part of StubRoutines - for
  39 // a description of how to extend it, see the stubRoutines.hpp file.
  40 
  41 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
  42 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
  43 address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
  44 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
  45 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
  46 address StubRoutines::x86::_upper_word_mask_addr = NULL;
  47 address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
  48 address StubRoutines::x86::_k256_adr = NULL;
  49 address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
  50 
  51 uint64_t StubRoutines::x86::_crc_by128_masks[] =
  52 {
  53   /* The fields in this structure are arranged so that they can be
  54    * picked up two at a time with 128-bit loads.
  55    *
  56    * Because of flipped bit order for this CRC polynomials
  57    * the constant for X**N is left-shifted by 1.  This is because
  58    * a 64 x 64 polynomial multiply produces a 127-bit result
  59    * but the highest term is always aligned to bit 0 in the container.
  60    * Pre-shifting by one fixes this, at the cost of potentially making
  61    * the 32-bit constant no longer fit in a 32-bit container (thus the
  62    * use of uint64_t, though this is also the size used by the carry-
  63    * less multiply instruction.
  64    *
  65    * In addition, the flipped bit order and highest-term-at-least-bit
  66    * multiply changes the constants used.  The 96-bit result will be
  67    * aligned to the high-term end of the target 128-bit container,
  68    * not the low-term end; that is, instead of a 512-bit or 576-bit fold,
  69    * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold.

 229   } else {
 230     static julong pclmulqdq_table[CRC32C_NUM_PRECOMPUTED_CONSTANTS * 256];
 231 
 232     for (int j = 0; j < CRC32C_NUM_PRECOMPUTED_CONSTANTS; j++) {
 233       static juint X_CONST = pow_n[j];
 234       for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations
 235       // S. Gueron / Information Processing Letters 112 (2012) 184
 236       // Algorithm 3: Generating a carry-less multiplication lookup table.
 237       // Input: A 32-bit constant, X_CONST.
 238       // Output: A table of 256 entries, each one is a 64-bit quadword,
 239       // that can be used for computing "byte" * X_CONST, for a given byte.
 240         pclmulqdq_table[j * 256 + i] =
 241           ((i & 1) * X_CONST) ^ ((i & 2) * X_CONST) ^ ((i & 4) * X_CONST) ^
 242           ((i & 8) * X_CONST) ^ ((i & 16) * X_CONST) ^ ((i & 32) * X_CONST) ^
 243           ((i & 64) * X_CONST) ^ ((i & 128) * X_CONST);
 244       }
 245     }
 246     _crc32c_table = (juint*)pclmulqdq_table;
 247   }
 248 }
 249 
 250 ALIGNED_(64) juint StubRoutines::x86::_k256[] =
 251 {
 252     0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
 253     0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
 254     0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
 255     0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
 256     0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
 257     0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
 258     0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
 259     0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
 260     0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
 261     0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
 262     0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
 263     0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
 264     0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
 265     0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
 266     0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
 267     0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
 268 };

< prev index next >