rev 53441 : 8217459: [PPC64] Cleanup non-vector version of CRC32
Reviewed-by:

   1 /*
   2  * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2019, SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "runtime/stubRoutines.hpp"
  29 #include "runtime/vm_version.hpp"
  30 
  31 // Implementation of the platform-specific part of StubRoutines - for
  32 // a description of how to extend it, see the stubRoutines.hpp file.
  33 
  34 
  35 #define __ masm->
  36 
  37 // CRC constant compute functions
  38 static juint fold_byte(juint w, juint reverse_poly) {
  39   for (int i = 0; i < 8; i++) {
  40     int poly_if_odd = (-(w & 1)) & reverse_poly;
  41     w = (w >> 1) ^ poly_if_odd;
  42   }
  43   return w;
  44 }
  45 
  46 static juint fold_word(juint w, juint reverse_poly) {
  47   for (int i = 0; i < 32; i++) {
  48     int poly_if_odd = (-(w & 1)) & reverse_poly;
  49     w = (w >> 1) ^ poly_if_odd;
  50   }
  51   return w;
  52 }
  53 
  54 static julong numberOfLeadingZeros(julong p) {
  55   julong l = 1ull << 63;
  56   for (int i = 0; i < 64; ++i) {
  57     if (p & l) return i;
  58     l >>= 1;
  59   }
  60   return 64;
  61 }
  62 
  63 static julong compute_inverse_poly(julong long_poly) {
  64   // 2^64 / p
  65   julong mod = 0, div = 0;
  66   int d = numberOfLeadingZeros(long_poly);
  67   int s = d + 1;
  68   do {
  69     mod ^= (long_poly << s);
  70     div |= (1L << s);
  71     s = d - numberOfLeadingZeros(mod);
  72   } while (s >= 0);
  73   return div;
  74 }
  75 
  76 #ifndef VM_LITTLE_ENDIAN
  77 static void reverse_bytes(juint &w) {
  78   w = ((w >> 24) & 0xFF) | (((w >> 16) & 0xFF) << 8) | (((w >> 8) & 0xFF) << 16) | ((w & 0xFF) << 24);
  79 }
  80 #endif
  81 
  82 // Constants to fold n words as needed by macroAssembler.
  83 address StubRoutines::generate_crc_constants(juint reverse_poly) {
  84   // Layout of constant table:
  85   // <= Power7 Little Endian: 4 tables for byte folding
  86   // <= Power7 Big Endian: 1 table for single byte folding + 4 tables for multi-byte folding
  87   // >= Power8: 1 table for single byte folding + constants for fast vector implementation
  88   const bool use_vector = VM_Version::has_vpmsumb();
  89   const int vector_size = 16 * (CRC32_UNROLL_FACTOR2 + CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2);
  90 
  91   const int size = use_vector ? CRC32_TABLE_SIZE + vector_size : (4 BIG_ENDIAN_ONLY(+1)) * CRC32_TABLE_SIZE;
  92   const address consts = (address)malloc(size);
  93   if (consts == NULL) {
  94     vm_exit_out_of_memory(size, OOM_MALLOC_ERROR, "CRC constants: no enough space");
  95   }
  96   juint* ptr = (juint*)consts;
  97 
  98   // Simple table used for single byte folding
  99   LITTLE_ENDIAN_ONLY(if (use_vector)) {
 100     for (int i = 0; i < 256; ++i) {
 101       ptr[i] = fold_byte(i, reverse_poly);
 102     }
 103   }
 104 
 105   if (!use_vector) {
 106     BIG_ENDIAN_ONLY(ptr = (juint*)(consts + CRC32_TABLE_SIZE);)
 107     // <= Power7: 4 tables
 108     for (int i = 0; i < 256; ++i) {
 109       juint a = fold_byte(i, reverse_poly),
 110             b = fold_byte(a, reverse_poly),
 111             c = fold_byte(b, reverse_poly),
 112             d = fold_byte(c, reverse_poly);
 113 #ifndef VM_LITTLE_ENDIAN
 114       reverse_bytes(a);
 115       reverse_bytes(b);
 116       reverse_bytes(c);
 117       reverse_bytes(d);
 118 #endif
 119       ptr[i         ] = a;
 120       ptr[i +    256] = b;
 121       ptr[i + 2* 256] = c;
 122       ptr[i + 3* 256] = d;
 123     }
 124 #if 0
 125     for (int i = 0; i < 4; ++i) {
 126       tty->print_cr("table %d:", i);
 127       for (int j = 0; j < 32; ++j) {
 128         for (int k = 0; k < 8; ++k) {
 129           tty->print("%08x ", ptr[i*256 + j*8 + k]);
 130         }
 131         tty->cr();
 132       }
 133     }
 134 #endif
 135     return consts;
 136   }
 137 
 138   // >= Power8: vector constants
 139   juint* ptr1 = (juint*)(consts + CRC32_TABLE_SIZE);
 140   guarantee(((intptr_t)ptr1 & 0xF) == 0, "16-byte alignment needed");
 141 
 142   // Generate constants for outer loop
 143   juint v0, v1, v2, v3 = 1;
 144   for (int i = 0; i < CRC32_UNROLL_FACTOR2 - 1; ++i) {
 145     v0 = fold_word(v3, reverse_poly);
 146     v1 = fold_word(v0, reverse_poly);
 147     v2 = fold_word(v1, reverse_poly);
 148     v3 = fold_word(v2, reverse_poly);
 149 #ifdef VM_LITTLE_ENDIAN
 150     ptr1[4*i  ] = v3;
 151     ptr1[4*i+1] = v2;
 152     ptr1[4*i+2] = v3;
 153     ptr1[4*i+3] = v2;
 154 #else
 155     ptr1[4*i  ] = v2;
 156     ptr1[4*i+1] = v3;
 157     ptr1[4*i+2] = v2;
 158     ptr1[4*i+3] = v3;
 159 #endif
 160   }
 161 
 162   // Generate constants for inner loop
 163   juint* ptr2 = ptr1 + 4 * (CRC32_UNROLL_FACTOR2 - 1);
 164   v3 = 1; // Restart from scratch.
 165   for (int i = 0; i < CRC32_UNROLL_FACTOR; ++i) {
 166     v0 = fold_word(v3, reverse_poly);
 167     v1 = fold_word(v0, reverse_poly);
 168     v2 = fold_word(v1, reverse_poly);
 169     v3 = fold_word(v2, reverse_poly);
 170     if (i % CRC32_UNROLL_FACTOR2 == 0) {
 171       int idx = CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2 - 1 - i / CRC32_UNROLL_FACTOR2;
 172       for (int j = 0; j < 4; ++j) {
 173 #ifdef VM_LITTLE_ENDIAN
 174         ptr2[4*idx  ] = v3;
 175         ptr2[4*idx+1] = v2;
 176         ptr2[4*idx+2] = v1;
 177         ptr2[4*idx+3] = v0;
 178 #else
 179         ptr2[4*idx  ] = v0;
 180         ptr2[4*idx+1] = v1;
 181         ptr2[4*idx+2] = v2;
 182         ptr2[4*idx+3] = v3;
 183 #endif
 184       }
 185     }
 186   }
 187 
 188   // Constants to reduce 64 to 32 bit as needed by macroAssembler.
 189   juint* ptr3 = ptr2 + 4 * (CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2);
 190   julong* c = (julong*)ptr3;
 191   julong long_poly = (((julong)reverse_poly) << 1) | 1;
 192   julong inverse_long_poly = compute_inverse_poly(long_poly);
 193 #ifdef VM_LITTLE_ENDIAN
 194   c[0] = inverse_long_poly;
 195   c[1] = long_poly;
 196 #else
 197   c[0] = long_poly;
 198   c[1] = inverse_long_poly;
 199 #endif
 200 
 201 #ifdef ASSERT
 202   if (reverse_poly == REVERSE_CRC32_POLY) {
 203     assert(INVERSE_REVERSE_CRC32_POLY == inverse_long_poly, "sanity");
 204   } else if (reverse_poly == REVERSE_CRC32C_POLY) {
 205     assert(INVERSE_REVERSE_CRC32C_POLY == inverse_long_poly, "sanity");
 206   }
 207 #endif
 208 
 209   //printf("inv poly: 0x%016llx\n", (long long unsigned int)inverse_long_poly);
 210 
 211   return consts;
 212 }



















































































































































































































































































































































































































































































































































































































--- EOF ---