1 /*
   2  * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "runtime/deoptimization.hpp"
  27 #include "runtime/frame.inline.hpp"
  28 #include "runtime/stubRoutines.hpp"
  29 #include "runtime/thread.inline.hpp"
  30 #include "crc32c.h"
  31 
  32 // Implementation of the platform-specific part of StubRoutines - for
  33 // a description of how to extend it, see the stubRoutines.hpp file.
  34 
  35 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
  36 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
  37 address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
  38 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
  39 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
  40 
  41 uint64_t StubRoutines::x86::_crc_by128_masks[] =
  42 {
  43   /* The fields in this structure are arranged so that they can be
  44    * picked up two at a time with 128-bit loads.
  45    *
  46    * Because of flipped bit order for this CRC polynomials
  47    * the constant for X**N is left-shifted by 1.  This is because
  48    * a 64 x 64 polynomial multiply produces a 127-bit result
  49    * but the highest term is always aligned to bit 0 in the container.
  50    * Pre-shifting by one fixes this, at the cost of potentially making
  51    * the 32-bit constant no longer fit in a 32-bit container (thus the
  52    * use of uint64_t, though this is also the size used by the carry-
  53    * less multiply instruction.
  54    *
  55    * In addition, the flipped bit order and highest-term-at-least-bit
  56    * multiply changes the constants used.  The 96-bit result will be
  57    * aligned to the high-term end of the target 128-bit container,
  58    * not the low-term end; that is, instead of a 512-bit or 576-bit fold,
  59    * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold.
  60    *
  61    * This cause additional problems in the 128-to-64-bit reduction; see the
  62    * code for details.  By storing a mask in the otherwise unused half of
  63    * a 128-bit constant, bits can be cleared before multiplication without
  64    * storing and reloading.  Note that staying on a 128-bit datapath means
  65    * that some data is uselessly stored and some unused data is intersected
  66    * with an irrelevant constant.
  67    */
  68 
  69   ((uint64_t) 0xffffffffUL),     /* low  of K_M_64    */
  70   ((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64    */
  71   ((uint64_t) 0xba8ccbe8U << 1), /* low  of K_160_96  */
  72   ((uint64_t) 0x6655004fU << 1), /* high of K_160_96  */
  73   ((uint64_t) 0xaa2215eaU << 1), /* low  of K_544_480 */
  74   ((uint64_t) 0xe3720acbU << 1)  /* high of K_544_480 */
  75 };
  76 
  77 /**
  78  *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
  79  */
  80 juint StubRoutines::x86::_crc_table[] =
  81 {
  82     0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
  83     0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
  84     0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
  85     0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
  86     0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
  87     0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
  88     0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
  89     0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
  90     0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
  91     0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
  92     0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
  93     0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
  94     0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
  95     0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
  96     0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
  97     0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
  98     0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
  99     0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
 100     0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
 101     0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
 102     0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
 103     0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
 104     0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
 105     0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
 106     0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
 107     0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
 108     0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
 109     0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
 110     0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
 111     0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
 112     0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
 113     0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
 114     0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
 115     0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
 116     0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
 117     0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
 118     0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
 119     0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
 120     0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
 121     0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
 122     0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
 123     0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
 124     0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
 125     0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
 126     0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
 127     0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
 128     0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
 129     0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
 130     0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
 131     0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
 132     0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
 133     0x2d02ef8dUL
 134 };
 135 
 136 #define D 32
 137 #define P 0x82F63B78 // Reflection of Castagnoli (0x11EDC6F41)
 138 
 139 #define TILL_CYCLE 31
 140 uint32_t _crc32c_pow_2k_table[TILL_CYCLE]; // because _crc32c_pow_2k_table[TILL_CYCLE == 31] == _crc32c_pow_2k_table[0]
 141 
 142 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 8
 143 // Listing 1: Multiplication of normalized polynomials
 144 // "a" and "b" occupy D least significant bits.
 145 uint32_t crc32c_multiply(uint32_t a, uint32_t b) {
 146   uint32_t product = 0;
 147   uint32_t b_pow_x_table[D + 1]; // b_pow_x_table[k] = (b * x**k) mod P
 148   b_pow_x_table[0] = b;
 149   for (int k = 0; k < D; ++k) {
 150     // If "a" has non-zero coefficient at x**k,/ add ((b * x**k) mod P) to the result.
 151     if ((a & (((uint32_t)1) << (D - 1 - k))) != 0) product ^= b_pow_x_table[k];
 152 
 153     // Compute b_pow_x_table[k+1] = (b ** x**(k+1)) mod P.
 154     if (b_pow_x_table[k] & 1) {
 155       // If degree of (b_pow_x_table[k] * x) is D, then
 156       // degree of (b_pow_x_table[k] * x - P) is less than D.
 157       b_pow_x_table[k + 1] = (b_pow_x_table[k] >> 1) ^ P;
 158     }
 159     else {
 160       b_pow_x_table[k + 1] = b_pow_x_table[k] >> 1;
 161     }
 162   }
 163   return product;
 164 }
 165 #undef D
 166 #undef P
 167 
 168 // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 9
 169 void crc32c_init_pow_2k(void) {
 170   // _crc32c_pow_2k_table(0) =
 171   // x^(2^k) mod P(x) = x mod P(x) = x
 172   // Since we are operating on a reflected values
 173   // x = 10b, reflect(x) = 0x40000000
 174   _crc32c_pow_2k_table[0] = 0x40000000;
 175 
 176   for (int k = 1; k < TILL_CYCLE; k++) {
 177     // _crc32c_pow_2k_table(k+1) = _crc32c_pow_2k_table(k-1)^2 mod P(x)
 178     uint32_t tmp = _crc32c_pow_2k_table[k - 1];
 179     _crc32c_pow_2k_table[k] = crc32c_multiply(tmp, tmp);
 180   }
 181 }
 182 
 183 // x^N mod P(x)
 184 uint32_t crc32c_f_pow_n(uint32_t n) {
 185   //            result = 1 (polynomial)
 186   uint32_t one, result = 0x80000000, i = 0;
 187 
 188   while (one = (n & 1), (n == 1 || n - one > 0)) {
 189     if (one) {
 190       result = crc32c_multiply(result, _crc32c_pow_2k_table[i]);
 191     }
 192     n >>= 1;
 193     i++;
 194   }
 195 
 196   return result;
 197 }
 198 
 199 juint *StubRoutines::x86::_crc32c_table;
 200 
 201 void StubRoutines::x86::generate_CRC32C_table(bool is_pclmulqdq_table_supported) {
 202 
 203   static juint pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
 204 
 205   crc32c_init_pow_2k();
 206 
 207   pow_n[0] = crc32c_f_pow_n(CRC32C_HIGH * 8);      // 8N * 8 = 64N
 208   pow_n[1] = crc32c_f_pow_n(CRC32C_HIGH * 8 * 2);  // 128N
 209 
 210   pow_n[2] = crc32c_f_pow_n(CRC32C_MIDDLE * 8);
 211   pow_n[3] = crc32c_f_pow_n(CRC32C_MIDDLE * 8 * 2);
 212 
 213   pow_n[4] = crc32c_f_pow_n(CRC32C_LOW * 8);
 214   pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1] =
 215             crc32c_f_pow_n(CRC32C_LOW * 8 * 2);
 216 
 217   if (is_pclmulqdq_table_supported) {
 218     _crc32c_table = pow_n;
 219   } else {
 220     static julong pclmulqdq_table[CRC32C_NUM_PRECOMPUTED_CONSTANTS * 256];
 221 
 222     for (int j = 0; j < CRC32C_NUM_PRECOMPUTED_CONSTANTS; j++) {
 223       static juint X_CONST = pow_n[j];
 224       for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations
 225       // S. Gueron / Information Processing Letters 112 (2012) 184
 226       // Algorithm 3: Generating a carry-less multiplication lookup table.
 227       // Input: A 32-bit constant, X_CONST.
 228       // Output: A table of 256 entries, each one is a 64-bit quadword,
 229       // that can be used for computing "byte" * X_CONST, for a given byte.
 230         pclmulqdq_table[j * 256 + i] =
 231           ((i & 1) * X_CONST) ^ ((i & 2) * X_CONST) ^ ((i & 4) * X_CONST) ^
 232           ((i & 8) * X_CONST) ^ ((i & 16) * X_CONST) ^ ((i & 32) * X_CONST) ^
 233           ((i & 64) * X_CONST) ^ ((i & 128) * X_CONST);
 234       }
 235     }
 236     _crc32c_table = (juint*)pclmulqdq_table;
 237   }
 238 }