1 /*
   2  * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "runtime/deoptimization.hpp"
  27 #include "runtime/frame.inline.hpp"
  28 #include "runtime/stubRoutines.hpp"
  29 #include "runtime/thread.inline.hpp"
  30 #include "crc32c.h"
  31 
  32 #ifdef _MSC_VER
  33 #define ALIGNED_(x) __declspec(align(x))
  34 #else
  35 #define ALIGNED_(x) __attribute__ ((aligned(x)))
  36 #endif
  37 
  38 // Implementation of the platform-specific part of StubRoutines - for
  39 // a description of how to extend it, see the stubRoutines.hpp file.
  40 
  41 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
  42 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
  43 address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
  44 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
  45 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
  46 address StubRoutines::x86::_upper_word_mask_addr = NULL;
  47 address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
  48 address StubRoutines::x86::_k256_adr = NULL;
  49 address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
  50 
  51 uint64_t StubRoutines::x86::_crc_by128_masks[] =
  52 {
  53   /* The fields in this structure are arranged so that they can be
  54    * picked up two at a time with 128-bit loads.
  55    *
  56    * Because of flipped bit order for this CRC polynomials
  57    * the constant for X**N is left-shifted by 1.  This is because
  58    * a 64 x 64 polynomial multiply produces a 127-bit result
  59    * but the highest term is always aligned to bit 0 in the container.
  60    * Pre-shifting by one fixes this, at the cost of potentially making
  61    * the 32-bit constant no longer fit in a 32-bit container (thus the
  62    * use of uint64_t, though this is also the size used by the carry-
  63    * less multiply instruction.
  64    *
  65    * In addition, the flipped bit order and highest-term-at-least-bit
  66    * multiply changes the constants used.  The 96-bit result will be
  67    * aligned to the high-term end of the target 128-bit container,
  68    * not the low-term end; that is, instead of a 512-bit or 576-bit fold,
  69    * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold.
  70    *
  71    * This cause additional problems in the 128-to-64-bit reduction; see the
  72    * code for details.  By storing a mask in the otherwise unused half of
  73    * a 128-bit constant, bits can be cleared before multiplication without
  74    * storing and reloading.  Note that staying on a 128-bit datapath means
  75    * that some data is uselessly stored and some unused data is intersected
  76    * with an irrelevant constant.
  77    */
  78 
  79   ((uint64_t) 0xffffffffUL),     /* low  of K_M_64    */
  80   ((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64    */
  81   ((uint64_t) 0xba8ccbe8U << 1), /* low  of K_160_96  */
  82   ((uint64_t) 0x6655004fU << 1), /* high of K_160_96  */
  83   ((uint64_t) 0xaa2215eaU << 1), /* low  of K_544_480 */
  84   ((uint64_t) 0xe3720acbU << 1)  /* high of K_544_480 */
  85 };
  86 
  87 /**
  88  *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
  89  */
  90 juint StubRoutines::x86::_crc_table[] =
  91 {
  92     0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
  93     0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
  94     0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
  95     0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
  96     0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
  97     0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
  98     0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
  99     0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
 100     0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
 101     0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
 102     0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
 103     0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
 104     0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
 105     0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
 106     0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
 107     0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
 108     0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
 109     0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
 110     0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
 111     0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
 112     0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
 113     0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
 114     0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
 115     0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
 116     0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
 117     0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
 118     0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
 119     0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
 120     0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
 121     0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
 122     0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
 123     0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
 124     0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
 125     0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
 126     0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
 127     0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
 128     0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
 129     0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
 130     0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
 131     0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
 132     0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
 133     0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
 134     0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
 135     0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
 136     0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
 137     0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
 138     0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
 139     0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
 140     0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
 141     0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
 142     0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
 143     0x2d02ef8dUL
 144 };
 145 
 146 #define D 32
 147 #define P 0x82F63B78 // Reflection of Castagnoli (0x11EDC6F41)
 148 
 149 #define TILL_CYCLE 31
 150 uint32_t _crc32c_pow_2k_table[TILL_CYCLE]; // because _crc32c_pow_2k_table[TILL_CYCLE == 31] == _crc32c_pow_2k_table[0]
 151 
 152 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 8
 153 // Listing 1: Multiplication of normalized polynomials
 154 // "a" and "b" occupy D least significant bits.
 155 uint32_t crc32c_multiply(uint32_t a, uint32_t b) {
 156   uint32_t product = 0;
 157   uint32_t b_pow_x_table[D + 1]; // b_pow_x_table[k] = (b * x**k) mod P
 158   b_pow_x_table[0] = b;
 159   for (int k = 0; k < D; ++k) {
 160     // If "a" has non-zero coefficient at x**k,/ add ((b * x**k) mod P) to the result.
 161     if ((a & (((uint32_t)1) << (D - 1 - k))) != 0) product ^= b_pow_x_table[k];
 162 
 163     // Compute b_pow_x_table[k+1] = (b ** x**(k+1)) mod P.
 164     if (b_pow_x_table[k] & 1) {
 165       // If degree of (b_pow_x_table[k] * x) is D, then
 166       // degree of (b_pow_x_table[k] * x - P) is less than D.
 167       b_pow_x_table[k + 1] = (b_pow_x_table[k] >> 1) ^ P;
 168     }
 169     else {
 170       b_pow_x_table[k + 1] = b_pow_x_table[k] >> 1;
 171     }
 172   }
 173   return product;
 174 }
 175 #undef D
 176 #undef P
 177 
 178 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 9
 179 void crc32c_init_pow_2k(void) {
 180   // _crc32c_pow_2k_table(0) =
 181   // x^(2^k) mod P(x) = x mod P(x) = x
 182   // Since we are operating on a reflected values
 183   // x = 10b, reflect(x) = 0x40000000
 184   _crc32c_pow_2k_table[0] = 0x40000000;
 185 
 186   for (int k = 1; k < TILL_CYCLE; k++) {
 187     // _crc32c_pow_2k_table(k+1) = _crc32c_pow_2k_table(k-1)^2 mod P(x)
 188     uint32_t tmp = _crc32c_pow_2k_table[k - 1];
 189     _crc32c_pow_2k_table[k] = crc32c_multiply(tmp, tmp);
 190   }
 191 }
 192 
 193 // x^N mod P(x)
 194 uint32_t crc32c_f_pow_n(uint32_t n) {
 195   //            result = 1 (polynomial)
 196   uint32_t one, result = 0x80000000, i = 0;
 197 
 198   while (one = (n & 1), (n == 1 || n - one > 0)) {
 199     if (one) {
 200       result = crc32c_multiply(result, _crc32c_pow_2k_table[i]);
 201     }
 202     n >>= 1;
 203     i++;
 204   }
 205 
 206   return result;
 207 }
 208 
 209 juint *StubRoutines::x86::_crc32c_table;
 210 
 211 void StubRoutines::x86::generate_CRC32C_table(bool is_pclmulqdq_table_supported) {
 212 
 213   static juint pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
 214 
 215   crc32c_init_pow_2k();
 216 
 217   pow_n[0] = crc32c_f_pow_n(CRC32C_HIGH * 8);      // 8N * 8 = 64N
 218   pow_n[1] = crc32c_f_pow_n(CRC32C_HIGH * 8 * 2);  // 128N
 219 
 220   pow_n[2] = crc32c_f_pow_n(CRC32C_MIDDLE * 8);
 221   pow_n[3] = crc32c_f_pow_n(CRC32C_MIDDLE * 8 * 2);
 222 
 223   pow_n[4] = crc32c_f_pow_n(CRC32C_LOW * 8);
 224   pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1] =
 225             crc32c_f_pow_n(CRC32C_LOW * 8 * 2);
 226 
 227   if (is_pclmulqdq_table_supported) {
 228     _crc32c_table = pow_n;
 229   } else {
 230     static julong pclmulqdq_table[CRC32C_NUM_PRECOMPUTED_CONSTANTS * 256];
 231 
 232     for (int j = 0; j < CRC32C_NUM_PRECOMPUTED_CONSTANTS; j++) {
 233       static juint X_CONST = pow_n[j];
 234       for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations
 235       // S. Gueron / Information Processing Letters 112 (2012) 184
 236       // Algorithm 3: Generating a carry-less multiplication lookup table.
 237       // Input: A 32-bit constant, X_CONST.
 238       // Output: A table of 256 entries, each one is a 64-bit quadword,
 239       // that can be used for computing "byte" * X_CONST, for a given byte.
 240         pclmulqdq_table[j * 256 + i] =
 241           ((i & 1) * X_CONST) ^ ((i & 2) * X_CONST) ^ ((i & 4) * X_CONST) ^
 242           ((i & 8) * X_CONST) ^ ((i & 16) * X_CONST) ^ ((i & 32) * X_CONST) ^
 243           ((i & 64) * X_CONST) ^ ((i & 128) * X_CONST);
 244       }
 245     }
 246     _crc32c_table = (juint*)pclmulqdq_table;
 247   }
 248 }
 249 
 250 ALIGNED_(64) juint StubRoutines::x86::_k256[] =
 251 {
 252     0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
 253     0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
 254     0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
 255     0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
 256     0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
 257     0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
 258     0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
 259     0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
 260     0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
 261     0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
 262     0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
 263     0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
 264     0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
 265     0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
 266     0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
 267     0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
 268 };