New src/cpu/x86/vm/crc32c.h

   1 /*
   2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 *
   5 * This code is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 only, as
   7 * published by the Free Software Foundation.
   8 *
   9 * This code is distributed in the hope that it will be useful, but WITHOUT
  10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 * version 2 for more details (a copy is included in the LICENSE file that
  13 * accompanied this code).
  14 *
  15 * You should have received a copy of the GNU General Public License version
  16 * 2 along with this work; if not, write to the Free Software Foundation,
  17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 *
  19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 * or visit www.oracle.com if you need additional information or have any
  21 * questions.
  22 *
  23 */
  24 
  25 enum {
  26   // S. Gueron / Information Processing Letters 112 (2012) 184
  27   // shows than anything above 6K and below 32K is a good choice
  28   // 32K does not deliver any further performance gains
  29   // 6K=8*256 (*3 as we compute 3 blocks together)
  30   //
  31   // Thus selecting the smallest value so it could apply to the largest number 
  32   // of buffer sizes.
  33   HIGH = 8 * 256,
  34 
  35   // empirical
  36   // based on ubench study using methodology described in
  37   // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 8
  38   //
  39   // arbitrary value between 27 and 256
  40   MIDDLE = 8 * 86,
  41 
  42   // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 9
  43   // shows that 240 and 1024 are equally good choices as the 216==8*27
  44   //
  45   // Selecting the smallest value which resulted in a significant performance improvement over 
  46   // sequential version
  47   LOW = 8 * 27,
  48 
  49   NUM_ChunkSizeInBytes = 3
  50 };
  51 // Notes:
  52 // 1. Why we need to choose a "chunk" approach?
  53 // Overhead of computing a powers and powers of for an arbitrary buffer of size N is significant 
  54 // (implementation approaches a library perf.)
  55 // 2. Why only 3 "chunks"?
  56 // Performance experiments results showed that a HIGH+LOW was not delivering a stable speedup 
  57 // curve.
  58 //
  59 // Disclaimer: 
  60 // If you ever decide to increase/decrease number of "chunks" be sure to modify
  61 // a) constants table generation (C:\Java\jdk9hs-comp\hotspot\src\cpu\x86\vm\stubRoutines_x86.cpp)
  62 // b) constant fetch from that table (macroAssembler_x86.cpp)
  63 // c) unrolled for loop (macroAssembler_x86.cpp)
  64 
  65 // We need to compute powers of 64N and 128N for each "chunk" size
  66 enum { NUM_PRECOMPUTED_CONSTANTS = 2 * NUM_ChunkSizeInBytes };