--- old/src/cpu/x86/vm/assembler_x86.cpp	2015-08-26 13:22:13.057053800 -0700
+++ new/src/cpu/x86/vm/assembler_x86.cpp	2015-08-26 13:22:12.799028000 -0700
@@ -1605,6 +1605,85 @@
   emit_int8((unsigned char)0xA2);
 }
 
+// Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
+// F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
+// F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
+// F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
+//
+// F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
+//
+// F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
+//
+// F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
+void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
+  assert(VM_Version::supports_sse4_2(), "");
+  int8_t w = 0x01;
+  Prefix p = Prefix_EMPTY;
+
+  emit_int8((int8_t)0xF2);
+  switch (sizeInBytes) {
+  case 1:
+    w = 0;
+    break;
+  case 2:
+  case 4:
+    break;
+  LP64_ONLY(case 8:)
+    // This instruction is not valid in 32 bits
+    // Note:
+    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+    //
+    // Page B - 72   Vol. 2C says
+    // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
+    // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
+    //                                                                            F0!!!
+    // while 3 - 208 Vol. 2A
+    // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
+    //
+    // the 0 on a last bit is reserved for a different flavor of this instruction :
+    // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
+    p = REX_W;
+    break;
+  default:
+    assert(0, "Unsupported value for a sizeInBytes argument");
+    break;
+  }
+  LP64_ONLY(prefix(crc, v, p);)
+  emit_int8((int8_t)0x0F);
+  emit_int8(0x38);
+  emit_int8((int8_t)(0xF0 | w));
+  emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
+}
+
+void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
+  assert(VM_Version::supports_sse4_2(), "");
+  InstructionMark im(this);
+  int8_t w = 0x01;
+  Prefix p = Prefix_EMPTY;
+
+  emit_int8((int8_t)0xF2);
+  switch (sizeInBytes) {
+  case 1:
+    w = 0;
+    break;
+  case 2:
+  case 4:
+    break;
+  LP64_ONLY(case 8:)
+    // This instruction is not valid in 32 bits
+    p = REX_W;
+    break;
+  default:
+    assert(0, "Unsupported value for a sizeInBytes argument");
+    break;
+  }
+  LP64_ONLY(prefix(crc, adr, p);)
+  emit_int8((int8_t)0x0F);
+  emit_int8(0x38);
+  emit_int8((int8_t)(0xF0 | w));
+  emit_operand(crc, adr);
+}
+
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
@@ -6004,6 +6083,14 @@
   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }
 
+// 0F A4 / r ib
+void Assembler::shldl(Register dst, Register src, int8_t imm8) {
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA4);
+  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
+  emit_int8(imm8);
+}
+
 void Assembler::shrdl(Register dst, Register src) {
   emit_int8(0x0F);
   emit_int8((unsigned char)0xAD);
@@ -6189,6 +6276,40 @@
   }
 }
 
+void Assembler::prefix(Register dst, Register src, Prefix p) {
+  if (src->encoding() >= 8) {
+    p = (Prefix)(p | REX_B);
+  }
+  if (dst->encoding() >= 8) {
+    p = (Prefix)( p | REX_R);
+  }
+  if (p != Prefix_EMPTY) {
+    // do not generate an empty prefix
+    prefix(p);
+  }
+}
+
+void Assembler::prefix(Register dst, Address adr, Prefix p) {
+  if (adr.base_needs_rex()) {
+    if (adr.index_needs_rex()) {
+      assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
+    } else {
+      prefix(REX_B);
+    }
+  } else {
+    if (adr.index_needs_rex()) {
+      assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
+    }
+  }
+  if (dst->encoding() >= 8) {
+    p = (Prefix)(p | REX_R);
+  }
+  if (p != Prefix_EMPTY) {
+    // do not generate an empty prefix
+    prefix(p);
+  }
+}
+
 void Assembler::prefix(Address adr) {
   if (adr.base_needs_rex()) {
     if (adr.index_needs_rex()) {
--- old/src/cpu/x86/vm/assembler_x86.hpp	2015-08-26 13:22:14.946242700 -0700
+++ new/src/cpu/x86/vm/assembler_x86.hpp	2015-08-26 13:22:14.690217100 -0700
@@ -504,7 +504,8 @@
 
     VEX_3bytes = 0xC4,
     VEX_2bytes = 0xC5,
-    EVEX_4bytes = 0x62
+    EVEX_4bytes = 0x62,
+    Prefix_EMPTY = 0x0
   };
 
   enum VexPrefix {
@@ -608,6 +609,8 @@
   int prefixq_and_encode(int dst_enc, int src_enc);
 
   void prefix(Register reg);
+  void prefix(Register dst, Register src, Prefix p);
+  void prefix(Register dst, Address adr, Prefix p);
   void prefix(Address adr);
   void prefixq(Address adr);
 
@@ -1165,6 +1168,10 @@
   // Identify processor type and features
   void cpuid();
 
+  // CRC32C
+  void crc32(Register crc, Register v, int8_t sizeInBytes);
+  void crc32(Register crc, Address adr, int8_t sizeInBytes);
+
   // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
   void cvtsd2ss(XMMRegister dst, XMMRegister src);
   void cvtsd2ss(XMMRegister dst, Address src);
@@ -1764,6 +1771,7 @@
   void setb(Condition cc, Register dst);
 
   void shldl(Register dst, Register src);
+  void shldl(Register dst, Register src, int8_t imm8);
 
   void shll(Register dst, int imm8);
   void shll(Register dst);
--- old/src/cpu/x86/vm/assembler_x86.inline.hpp	2015-08-26 13:22:16.618409900 -0700
+++ new/src/cpu/x86/vm/assembler_x86.inline.hpp	2015-08-26 13:22:16.361384200 -0700
@@ -37,6 +37,8 @@
 inline int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { return dst_enc << 3 | src_enc; }
 
 inline void Assembler::prefix(Register reg) {}
+inline void Assembler::prefix(Register dst, Register src, Prefix p) {}
+inline void Assembler::prefix(Register dst, Address adr, Prefix p) {}
 inline void Assembler::prefix(Address adr) {}
 inline void Assembler::prefixq(Address adr) {}
 
--- old/src/cpu/x86/vm/interpreterGenerator_x86.hpp	2015-08-26 13:22:18.228570900 -0700
+++ new/src/cpu/x86/vm/interpreterGenerator_x86.hpp	2015-08-26 13:22:17.969545000 -0700
@@ -42,6 +42,7 @@
   address generate_Reference_get_entry();
   address generate_CRC32_update_entry();
   address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+  address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind);
   void lock_method(void);
   void generate_stack_overflow_check(void);
 
--- old/src/cpu/x86/vm/macroAssembler_x86.cpp	2015-08-26 13:22:19.832731300 -0700
+++ new/src/cpu/x86/vm/macroAssembler_x86.cpp	2015-08-26 13:22:19.573705400 -0700
@@ -8467,8 +8467,504 @@
   notl(crc); // ~c
 }
 
-#undef BIND
-#undef BLOCK_COMMENT
+namespace CRC32C {
+#include "crc32c.h"
+
+#define Nehalem(x) x
+#define Westmere(x) x
+
+#undef IN
+#define IN(x) x
+#define INOUT(x) x
+#undef OUT
+#define OUT(x) x
+#define Scratch(x) x
+
+#undef D
+
+#ifdef _LP64
+// S. Gueron / Information Processing Letters 112 (2012) 184
+// Algorithm 4: Computing carry-less multiplication using a precomputed lookup table.
+// Input: A 32 bit value B = [byte3, byte2, byte1, byte0].
+// Output: the 64-bit carry-less product of B * CONST
+  void IPL_Alg4(INOUT(Register B), uint32_t n,
+  Scratch(Register C), Scratch(Register D), Scratch(Register Z),
+  MacroAssembler * This) {
+    This->lea(Z, ExternalAddress(StubRoutines::crc32c_table_addr()));
+    if (n > 0) {
+      This->addq(Z, n * 256 * 8);
+    }
+    //    Q1 = TABLEExt[n][B & 0xFF];
+    This->movl(C, B);
+    This->andl(C, 0x000000FF);
+    This->shll(C, 3);
+    This->addq(C, Z);
+    This->movq(C, Address(C, 0));
+
+    //    Q2 = TABLEExt[n][B >> 8 & 0xFF];
+    This->movl(D, B);
+    This->shrl(D, 8);
+    This->andl(D, 0x000000FF);
+    This->shll(D, 3);
+    This->addq(D, Z);
+    This->movq(D, Address(D, 0));
+
+    This->shlq(D, 8);
+    This->xorq(C, D);
+
+    //    Q3 = TABLEExt[n][B >> 16 & 0xFF];
+    This->movl(D, B);
+    This->shrl(D, 16);
+    This->andl(D, 0x000000FF);
+    This->shll(D, 3);
+    This->addq(D, Z);
+    This->movq(D, Address(D, 0));
+
+    This->shlq(D, 16);
+    This->xorq(C, D);
+
+    //    Q4 = TABLEExt[n][B >> 24 & 0xFF];
+    This->shrl(B, 24);
+    This->andl(B, 0x000000FF);
+    This->shll(B, 3);
+    This->addq(B, Z);
+    This->movq(B, Address(B, 0));
+ 
+    This->shlq(B, 24);
+    This->xorq(B, C);
+    //    return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
+  }
+
+  void PCLMULQDQ(Westmere(Scratch(XMMRegister crcXMM)),
+    INOUT(Register crc),
+    uint32_t CONSTOrPreCompConstIndex, bool IsPclmulqdqSupported,
+    Westmere(Scratch(XMMRegister DXMM)),
+    Scratch(Register A),
+    Nehalem(Scratch(Register B)), Nehalem(Scratch(Register C)),
+    MacroAssembler * This) {
+    if (IsPclmulqdqSupported) {
+      This->movdl(crcXMM, crc); // modified blindly
+
+      This->movl(A, CONSTOrPreCompConstIndex);
+      This->movdl(DXMM, A);
+      This->pclmulqdq(crcXMM, DXMM, 0);
+
+      This->movdq(crc, crcXMM);
+    } else {
+      IPL_Alg4(crc, CONSTOrPreCompConstIndex, A, B, C, This);
+    }
+  }
+
+  // Recombination Alternative 2: No bit-reflections
+  // T1 = (CRC_A * U1) << 1
+  // T2 = (CRC_B * U2) << 1
+  // C1 = T1 >> 32
+  // C2 = T2 >> 32
+  // T1 = T1 & 0xFFFFFFFF
+  // T2 = T2 & 0xFFFFFFFF
+  // T1 = CRC32(0, T1)
+  // T2 = CRC32(0, T2)
+  // C1 = C1 ^ T1
+  // C2 = C2 ^ T2
+  // CRC = C1 ^ C2 ^ CRC_C
+  void RecAlt2(uint32_t CONSTOrPreCompConstIndexU1, uint32_t CONSTOrPreCompConstIndexU2, bool IsPclmulqdqSupported, INOUT(Register crcA), IN(Scratch(Register crcB)), IN(Register crcC),
+    Westmere(Scratch(XMMRegister AXMM)), Westmere(Scratch(XMMRegister BXMM)), Westmere(Scratch(XMMRegister CXMM)),
+    Scratch(Register E), Scratch(Register F),
+    Nehalem(Scratch(Register G)),
+    MacroAssembler * This) {
+    PCLMULQDQ(AXMM, crcA, CONSTOrPreCompConstIndexU1, IsPclmulqdqSupported, CXMM, E, F, G, This);
+    PCLMULQDQ(BXMM, crcB, CONSTOrPreCompConstIndexU2, IsPclmulqdqSupported, CXMM, E, F, G, This);
+    This->shlq(crcA, 1);
+    This->movl(E, crcA);
+    This->shrq(crcA, 32);
+    This->xorl(F, F);
+    This->crc32(F, E, 4);
+    This->xorl(crcA, F); // we don't care about upper 32 bit contents here
+    This->shlq(crcB, 1);
+    This->movl(E, crcB);
+    This->shrq(crcB, 32);
+    This->xorl(F, F);
+    This->crc32(F, E, 4);
+    This->xorl(crcB, F);
+    This->xorl(crcA, crcB);
+    This->xorl(crcA, crcC);
+  }
+
+  // Set N to predefined value
+  // Subtract from a lenght of a buffer
+  // execute in a loop:
+  // CRC_A = 0xFFFFFFFF, CRC_B = 0, CRC_C = 0
+  // for i = 1 to N do
+  //  CRC_A = CRC32(CRC_A, A[i])
+  //  CRC_B = CRC32(CRC_B, B[i])
+  //  CRC_C = CRC32(CRC_C, C[i])
+  // end for
+  // Recombine
+  void ProcChunk(uint32_t size, uint32_t CONSTOrPreCompConstIndexU1, uint32_t CONSTOrPreCompConstIndexU2, bool IsPclmulqdqSupported,
+    INOUT(Register len), INOUT(Register buf), INOUT(Register crc),
+    Scratch(Register E), Scratch(Register F), Scratch(Register end), 
+    Westmere(Scratch(XMMRegister AXMM)), Westmere(Scratch(XMMRegister BXMM)), Westmere(Scratch(XMMRegister CXMM)),
+    Scratch(Register G), Scratch(Register H), 
+    Nehalem(Scratch(Register I)),
+    MacroAssembler * This) {
+    Label L_processPartitions;
+    Label L_processPartition;
+    Label L_exit;
+    
+    This->bind(L_processPartitions);
+    This->cmpl(len, 3 * size);
+    This->jcc(Assembler::less, L_exit);
+      This->xorl(E, E);
+      This->xorl(F, F);
+      This->movq(end, buf);
+      This->addq(end, size);
+
+      This->bind(L_processPartition);
+        This->crc32(crc, Address(buf, 0), 8);
+        This->crc32(E, Address(buf, size), 8);
+        This->crc32(F, Address(buf, size * 2), 8);
+        This->addq(buf, 8);
+        This->cmpq(buf, end);
+        This->jcc(Assembler::less, L_processPartition);
+      RecAlt2(CONSTOrPreCompConstIndexU1, CONSTOrPreCompConstIndexU2, IsPclmulqdqSupported, crc, E, F, 
+      AXMM, BXMM, CXMM,
+      G, H,
+      I, 
+      This);
+      This->addq(buf, 2 * size);
+      This->subl(len, 3 * size);
+      This->jmp(L_processPartitions);
+
+    This->bind(L_exit);
+  }
+#else
+void IPL_Alg4(INOUT(Register B), uint32_t n,
+  Scratch(Register C), Scratch(Register D), Scratch(Register Z),
+  Scratch(XMMRegister CXMM), Scratch(XMMRegister DXMM),
+  MacroAssembler * This) {
+  This->lea(Z, ExternalAddress(StubRoutines::crc32c_table_addr()));
+  if (n > 0) {
+    This->addl(Z, n * 256 * 8);
+  }
+  //    Q1 = TABLEExt[n][B & 0xFF];
+  This->movl(C, B);
+  This->andl(C, 0x000000FF);
+  This->shll(C, 3);
+  This->addl(C, Z);
+  This->movq(CXMM, Address(C, 0));
+
+  //    Q2 = TABLEExt[n][B >> 8 & 0xFF];
+  This->movl(D, B);
+  This->shrl(D, 8);
+  This->andl(D, 0x000000FF);
+  This->shll(D, 3);
+  This->addl(D, Z);
+  This->movq(DXMM, Address(D, 0));
+
+  This->psllq(DXMM, 8);
+  This->pxor(CXMM, DXMM);
+
+  //    Q3 = TABLEExt[n][B >> 16 & 0xFF];
+  This->movl(D, B);
+  This->shrl(D, 16);
+  This->andl(D, 0x000000FF);
+  This->shll(D, 3);
+  This->addl(D, Z);
+  This->movq(DXMM, Address(D, 0));
+
+  This->psllq(DXMM, 16);
+  This->pxor(CXMM, DXMM);
+
+  //    Q4 = TABLEExt[n][B >> 24 & 0xFF];
+  This->shrl(B, 24);
+  This->andl(B, 0x000000FF);
+  This->shll(B, 3);
+  This->addl(B, Z);
+  This->movq(DXMM, Address(B, 0));
+
+  This->psllq(DXMM, 24);
+  This->pxor(CXMM, DXMM); // Result in CXMM
+  //    return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
+}
+
+void PCLMULQDQ(Westmere(Scratch(XMMRegister crcXMM)),
+  INOUT(Register crc),
+  uint32_t CONSTOrPreCompConstIndex, bool IsPclmulqdqSupported,
+  Westmere(Scratch(XMMRegister DXMM)),
+  Scratch(Register A),
+  Nehalem(Scratch(Register B)), Nehalem(Scratch(Register C)),
+  MacroAssembler * This) {
+  if (IsPclmulqdqSupported) {
+    This->movdl(crcXMM, crc);
+
+    This->movl(A, CONSTOrPreCompConstIndex);
+    This->movdl(DXMM, A);
+    This->pclmulqdq(crcXMM, DXMM, 0);
+    // Keep result in XMM since GPR is 32 bit in length
+  } else {
+    IPL_Alg4(crc, CONSTOrPreCompConstIndex, A, B, C, crcXMM, DXMM, This);
+  }
+}
+
+void RecAlt2(uint32_t CONSTOrPreCompConstIndexU1, uint32_t CONSTOrPreCompConstIndexU2, bool IsPclmulqdqSupported, INOUT(Register crcA), IN(Scratch(Register crcB)), IN(Register crcC),
+  Westmere(Scratch(XMMRegister AXMM)), Westmere(Scratch(XMMRegister BXMM)), Westmere(Scratch(XMMRegister CXMM)),
+  Scratch(Register E), Scratch(Register F),
+  Nehalem(Scratch(Register G)),
+  MacroAssembler * This) {
+  PCLMULQDQ(AXMM, crcA, CONSTOrPreCompConstIndexU1, IsPclmulqdqSupported, CXMM, E, F, G, This);
+  PCLMULQDQ(BXMM, crcB, CONSTOrPreCompConstIndexU2, IsPclmulqdqSupported, CXMM, E, F, G, This);
+  
+  This->psllq(AXMM, 1);
+  This->movdl(E, AXMM);
+  This->psrlq(AXMM, 32);
+  This->movdl(crcA, AXMM);
+
+  This->xorl(F, F);
+  This->crc32(F, E, 4);
+  This->xorl(crcA, F);
+  
+  This->psllq(BXMM, 1);
+  This->movdl(E, BXMM);
+  This->psrlq(BXMM, 32);
+  This->movdl(crcB, BXMM);
+
+  This->xorl(F, F);
+  This->crc32(F, E, 4);
+  This->xorl(crcB, F);
+  This->xorl(crcA, crcB);
+  This->xorl(crcA, crcC);
+}
+
+void ProcChunk(uint32_t size, uint32_t CONSTOrPreCompConstIndexU1, uint32_t CONSTOrPreCompConstIndexU2, bool IsPclmulqdqSupported,
+  INOUT(Register len), INOUT(Register buf), INOUT(Register crc),
+  Scratch(Register E), Scratch(Register F), Scratch(Register end),
+  Westmere(Scratch(XMMRegister AXMM)), Westmere(Scratch(XMMRegister BXMM)), Westmere(Scratch(XMMRegister CXMM)),
+  Scratch(Register G), Scratch(Register H),
+  Nehalem(Scratch(Register I)),
+  MacroAssembler * This) {
+  Label L_processPartitions;
+  Label L_processPartition;
+  Label L_exit;
+
+  This->bind(L_processPartitions);
+  This->cmpl(len, 3 * size);
+  This->jcc(Assembler::less, L_exit);
+    This->xorl(E, E);
+    This->xorl(F, F);
+    This->movl(end, buf);
+    This->addl(end, size);
+
+    This->bind(L_processPartition);
+      This->crc32(crc, Address(buf, 0), 4);
+      This->crc32(E, Address(buf, size), 4);
+      This->crc32(F, Address(buf, size*2), 4);
+      This->crc32(crc, Address(buf, 0+4), 4);
+      This->crc32(E, Address(buf, size+4), 4);
+      This->crc32(F, Address(buf, size*2+4), 4);
+      This->addl(buf, 8);
+      This->cmpl(buf, end);
+      This->jcc(Assembler::less, L_processPartition);
+
+        This->push(end);
+        This->push(len);
+        This->push(buf);
+        G = end;
+        H = len;
+        I = buf;
+
+    RecAlt2(CONSTOrPreCompConstIndexU1, CONSTOrPreCompConstIndexU2, IsPclmulqdqSupported, crc, E, F,
+      AXMM, BXMM, CXMM,
+      G, H,
+      I,
+      This);
+
+        This->pop(buf);
+        This->pop(len);
+        This->pop(end);
+
+    This->addl(buf, 2 * size);
+    This->subl(len, 3 * size);
+    This->jmp(L_processPartitions);
+
+  This->bind(L_exit);
+}
+#endif //LP64
+}
+#undef D
+
+#ifdef _LP64
+// Algorithm 2: Pipelined usage of the CRC32 instruction.
+// Input: A buffer I of L bytes.
+// Output: the CRC32C value of the buffer.
+// Notations:
+// Write L = 24N + r, with N = floor (L/24).
+// r = L mod 24 (0 <= r < 24).
+// Consider I as the concatenation of A|B|C|R, where A, B, C, each,
+// N quadwords, and R consists of r bytes.
+// A[j] = I [8j+7:8j], j= 0, 1, ..., N-1
+// B[j] = I [N + 8j+7:N + 8j], j= 0, 1, ..., N-1
+// C[j] = I [2N + 8j+7:2N + 8j], j= 0, 1, ..., N-1
+// if r > 0 R[j] = I [3N +j], j= 0, 1, ...,r-1
+void MacroAssembler::crc32c_IPL_Alg2Alt2Fast(Register crc, Register buf, Register len,
+  Scratch(Register A), Scratch(Register  B), Scratch(Register C),
+  Scratch(Register D), Scratch(Register  E), Scratch(Register F),
+  Westmere(Scratch(XMMRegister AXMM)), Westmere(Scratch(XMMRegister BXMM)), Westmere(Scratch(XMMRegister CXMM)),
+  bool IsPclmulqdqSupported) {
+  uint32_t CONSTOrPreCompConstIndex[CRC32C::NUM_PRECOMPUTED_CONSTANTS];
+  Label L_wordByWord;
+  Label L_byteByByteProlog;
+  Label L_byteByByte;
+  Label L_exit;
+
+  if (IsPclmulqdqSupported ) {
+    CONSTOrPreCompConstIndex[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
+    CONSTOrPreCompConstIndex[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr+1);
+
+    CONSTOrPreCompConstIndex[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
+    CONSTOrPreCompConstIndex[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
+
+    CONSTOrPreCompConstIndex[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
+    CONSTOrPreCompConstIndex[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
+    assert((CRC32C::NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5, "Checking whether you declared all of the constants based on the number of \"chunks\"");
+  } else {
+    CONSTOrPreCompConstIndex[0] = 1;
+    CONSTOrPreCompConstIndex[1] = 0;
+
+    CONSTOrPreCompConstIndex[2] = 3;
+    CONSTOrPreCompConstIndex[3] = 2;
+
+    CONSTOrPreCompConstIndex[4] = 5;
+    CONSTOrPreCompConstIndex[5] = 4;
+   }
+  CRC32C::ProcChunk(CRC32C::HIGH, CONSTOrPreCompConstIndex[0], CONSTOrPreCompConstIndex[1], IsPclmulqdqSupported,
+    len, buf, crc, 
+    A, B, C,
+    AXMM, BXMM, CXMM,
+    D, E, 
+    F, 
+    this);
+  CRC32C::ProcChunk(CRC32C::MIDDLE, CONSTOrPreCompConstIndex[2], CONSTOrPreCompConstIndex[3], IsPclmulqdqSupported,
+    len, buf, crc,
+    A, B, C,
+    AXMM, BXMM, CXMM,
+    D, E, 
+    F,
+    this);
+  CRC32C::ProcChunk(CRC32C::LOW, CONSTOrPreCompConstIndex[4], CONSTOrPreCompConstIndex[5], IsPclmulqdqSupported,
+    len, buf, crc,
+    A, B, C,
+    AXMM, BXMM, CXMM,
+    D, E, 
+    F, 
+    this);
+  movl(A, len);
+  andl(A, 0x00000007);
+  negl(A);
+  addl(A, len);
+  addq(A, buf);
+
+  BIND(L_wordByWord);
+  cmpq(buf, A);
+  jcc(Assembler::greaterEqual, L_byteByByteProlog);
+    crc32(crc, Address(buf, 0), 4);
+    addq(buf, 4);
+    jmp(L_wordByWord);
+  
+  BIND(L_byteByByteProlog);
+  andl(len, 0x00000007);
+  movl(B, 1);
+
+  BIND(L_byteByByte);
+  cmpl(B, len);
+  jccb(Assembler::greater, L_exit);
+    crc32(crc, Address(buf, 0), 1);
+    incq(buf);
+    incl(B);
+    jmp(L_byteByByte);
+
+  BIND(L_exit);
+}
+#else
+void MacroAssembler::crc32c_IPL_Alg2Alt2Fast(Register crc, Register buf, Register len,
+  Scratch(Register A), Scratch(Register  B), Scratch(Register C),
+  Scratch(Register D), Scratch(Register  E), Scratch(Register F),
+  Westmere(Scratch(XMMRegister AXMM)), Westmere(Scratch(XMMRegister BXMM)), Westmere(Scratch(XMMRegister CXMM)),
+  bool IsPclmulqdqSupported) {
+  uint32_t CONSTOrPreCompConstIndex[CRC32C::NUM_PRECOMPUTED_CONSTANTS];
+  Label L_wordByWord;
+  Label L_byteByByteProlog;
+  Label L_byteByByte;
+  Label L_exit;
+
+  if (IsPclmulqdqSupported) {
+    CONSTOrPreCompConstIndex[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
+    CONSTOrPreCompConstIndex[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1);
+
+    CONSTOrPreCompConstIndex[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
+    CONSTOrPreCompConstIndex[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
+
+    CONSTOrPreCompConstIndex[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
+    CONSTOrPreCompConstIndex[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
+  } else {
+    CONSTOrPreCompConstIndex[0] = 1;
+    CONSTOrPreCompConstIndex[1] = 0;
+
+    CONSTOrPreCompConstIndex[2] = 3;
+    CONSTOrPreCompConstIndex[3] = 2;
+
+    CONSTOrPreCompConstIndex[4] = 5;
+    CONSTOrPreCompConstIndex[5] = 4;
+  }
+  CRC32C::ProcChunk(CRC32C::HIGH, CONSTOrPreCompConstIndex[0], CONSTOrPreCompConstIndex[1], IsPclmulqdqSupported,
+    len, buf, crc,
+    A, B, C,
+    AXMM, BXMM, CXMM,
+    D, E,
+    F,
+    this);
+  CRC32C::ProcChunk(CRC32C::MIDDLE, CONSTOrPreCompConstIndex[2], CONSTOrPreCompConstIndex[3], IsPclmulqdqSupported,
+    len, buf, crc,
+    A, B, C,
+    AXMM, BXMM, CXMM,
+    D, E,
+    F,
+    this);
+  CRC32C::ProcChunk(CRC32C::LOW, CONSTOrPreCompConstIndex[4], CONSTOrPreCompConstIndex[5], IsPclmulqdqSupported,
+    len, buf, crc,
+    A, B, C,
+    AXMM, BXMM, CXMM,
+    D, E,
+    F,
+    this);
+  movl(A, len);
+  andl(A, 0x00000007);
+  negl(A);
+  addl(A, len);
+  addl(A, buf);
+
+  BIND(L_wordByWord);
+  cmpl(buf, A);
+  jcc(Assembler::greaterEqual, L_byteByByteProlog);
+    crc32(crc, Address(buf,0), 4);
+    addl(buf, 4);
+    jmp(L_wordByWord);
+
+  BIND(L_byteByByteProlog);
+  andl(len, 0x00000007);
+  movl(B, 1);
+
+  BIND(L_byteByByte);
+  cmpl(B, len);
+  jccb(Assembler::greater, L_exit);
+    movb(A, Address(buf, 0));
+    crc32(crc, A, 1);
+    incl(buf);
+    incl(B);
+    jmp(L_byteByByte);
+
+  BIND(L_exit);
+}
+#endif // LP64
 
 
 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
--- old/src/cpu/x86/vm/macroAssembler_x86.hpp	2015-08-26 13:22:21.751923200 -0700
+++ new/src/cpu/x86/vm/macroAssembler_x86.hpp	2015-08-26 13:22:21.500898100 -0700
@@ -1258,9 +1258,15 @@
                Register raxReg);
 #endif
 
-  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
+  // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
   void update_byte_crc32(Register crc, Register val, Register table);
   void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp);
+  // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic
+  void crc32c_IPL_Alg2Alt2Fast(Register crc, Register buf, Register len,
+                               Register A, Register B, Register C,
+                               Register D, Register E, Register F,
+                               XMMRegister AXMM, XMMRegister BXMM, XMMRegister CXMM,
+                               bool IsPclmulqdqSupported);
   // Fold 128-bit data chunk
   void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset);
   void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf);
--- old/src/cpu/x86/vm/stubGenerator_x86_32.cpp	2015-08-26 13:22:23.385086500 -0700
+++ new/src/cpu/x86/vm/stubGenerator_x86_32.cpp	2015-08-26 13:22:23.134061400 -0700
@@ -2941,6 +2941,63 @@
     return start;
   }
 
+  /**
+  *  Arguments:
+  *
+  * Inputs:
+  *   rsp(4)   - int crc
+  *   rsp(8)   - byte* buf
+  *   rsp(12)  - int length
+  *   rsp(16)  - table_start - optional (present only when doing a library_calll, 
+  *              not used by x86 algorithm)
+  *
+  * Ouput:
+  *       rax  - int crc result
+  */
+  address generate_updateBytesCRC32C(bool IsPclmulqdqSupported) {
+    assert(UseCRC32CIntrinsics, "need SSE4_2");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
+    address start = __ pc();
+    const Register crc = rax;  // crc
+    const Register buf = rcx;  // source java byte array address
+    const Register len = rdx;  // length
+    const Register D = rbx;
+    const Register G = rsi;
+    const Register H = rdi;
+    const Register empty = 0; // will never be used, in order not 
+                              // to change a signature for crc32c_IPL_Alg2Alt2Fast 
+                              // between 64/32 I'm just keeping it here
+    assert_different_registers(crc, buf, len, D, G, H);
+    
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    Address crc_arg(rsp, 4 + 4 + 0); // ESP+4 +
+                                     // we need to add additional 4 because __ enter 
+                                     // have just pushed ebp on a stack
+    Address buf_arg(rsp, 4 + 4 + 4);
+    Address len_arg(rsp, 4 + 4 + 8);
+      // Load up:
+      __ movl(crc, crc_arg);
+      __ movl(buf, buf_arg);
+      __ movl(len, len_arg);
+      __ push(D);
+      __ push(G);
+      __ push(H);
+      __ crc32c_IPL_Alg2Alt2Fast(crc, buf, len,
+                                 D, G, H,
+                                 empty, empty, empty,
+                                 xmm0, xmm1, xmm2,
+                                 IsPclmulqdqSupported);
+      __ pop(H);
+      __ pop(G);
+      __ pop(D);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
   // Safefetch stubs.
   void generate_safefetch(const char* name, int size, address* entry,
                           address* fault_pc, address* continuation_pc) {
@@ -3154,6 +3211,13 @@
       StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
     }
+
+    if (UseCRC32CIntrinsics) {
+      bool supports_clmul;
+      StubRoutines::x86::GenerateCRC32CTable(supports_clmul = VM_Version::supports_clmul());
+      StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
+      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
+    }
   }
 
 
--- old/src/cpu/x86/vm/stubGenerator_x86_64.cpp	2015-08-26 13:22:25.091257100 -0700
+++ new/src/cpu/x86/vm/stubGenerator_x86_64.cpp	2015-08-26 13:22:24.815229500 -0700
@@ -3895,6 +3895,64 @@
     return start;
   }
 
+  /**
+  *  Arguments:
+  *
+  * Inputs:
+  *   c_rarg0   - int crc
+  *   c_rarg1   - byte* buf
+  *   c_rarg2   - long length
+  *   c_rarg3   - table_start - optional (present only when doing a library_calll, 
+  *              not used by x86 algorithm)
+  *
+  * Ouput:
+  *       rax   - int crc result
+  */
+  address generate_updateBytesCRC32C(bool IsPclmulqdqSupported) {
+      assert(UseCRC32CIntrinsics, "need SSE4_2");
+      __ align(CodeEntryAlignment);
+      StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
+      address start = __ pc();
+      //reg.arg        int#0        int#1        int#2        int#3        int#4        int#5        float regs
+      //Windows        RCX          RDX          R8           R9           none         none         XMM0..XMM3
+      //Lin / Sol      RDI          RSI          RDX          RCX          R8           R9           XMM0..XMM7
+      const Register crc = c_rarg0;  // crc
+      const Register buf = c_rarg1;  // source java byte array address
+      const Register len = c_rarg2;  // length
+      const Register A = rax;
+      const Register J = r9;
+      const Register K = r10;
+      const Register L = r11;
+#ifdef _WIN64
+      const Register Y = rdi;
+      const Register Z = rsi;
+#else
+      const Register Y = rcx;
+      const Register Z = r8;
+#endif
+      assert_different_registers(crc, buf, len, A, J, K, L, Y, Z);
+
+      BLOCK_COMMENT("Entry:");
+      __ enter(); // required for proper stackwalking of RuntimeStub frame
+#ifdef _WIN64
+      __ push(Y);
+      __ push(Z);
+#endif
+      __ crc32c_IPL_Alg2Alt2Fast(crc, buf, len,
+                                 A, J, K,
+                                 L, Y, Z,
+                                 c_farg0, c_farg1, c_farg2,
+                                 IsPclmulqdqSupported);
+      __ movl(rax, crc);
+#ifdef _WIN64
+      __ pop(Z);
+      __ pop(Y);
+#endif
+      __ leave(); // required for proper stackwalking of RuntimeStub frame
+      __ ret(0);
+
+      return start;
+  }
 
   /**
    *  Arguments:
@@ -4239,6 +4297,13 @@
       StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
     }
+
+    if (UseCRC32CIntrinsics) {
+      bool supports_clmul;
+      StubRoutines::x86::GenerateCRC32CTable(supports_clmul = VM_Version::supports_clmul());
+      StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
+      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
+    }
   }
 
   void generate_all() {
--- old/src/cpu/x86/vm/stubRoutines_x86.cpp	2015-08-26 13:22:26.820430000 -0700
+++ new/src/cpu/x86/vm/stubRoutines_x86.cpp	2015-08-26 13:22:26.564404400 -0700
@@ -130,3 +130,114 @@
     0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
     0x2d02ef8dUL
 };
+
+namespace CRC32C {
+  #include "crc32c.h"
+
+  #undef CONST  
+  static juint x;
+  #define CONST x
+
+  #define D 32
+  #define P 0x82F63B78 // Reflection of Castagnoli (0x11EDC6F41) 
+
+  #define TILL_CYCLE 31
+  uint32_t Pow2k[TILL_CYCLE]; // because Pow2k[TILL_CYCLE == 31] == Pow2k[0]
+
+  // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 8
+  // Listing 1: Multiplication of normalized polynomials
+  // "a" and "b" occupy D least significant bits.
+  uint32_t Multiply(uint32_t a, uint32_t b) {
+    uint32_t product = 0;
+    uint32_t bPowX[D + 1]; // bPowX[k] = (b * x**k) mod P
+    bPowX[0] = b;
+    for (int k = 0; k < D; ++k) {
+      // If "a" has non-zero coefficient at x**k,/ add ((b * x**k) mod P) to the result.
+      if ((a & (uint64_t)(1 << (D - 1 - k))) != 0) product ^= bPowX[k];
+
+      // Compute bPowX[k+1] = (b ** x**(k+1)) mod P.
+      if (bPowX[k] & 1) {
+        // If degree of (bPowX[k] * x) is D, then
+        // degree of (bPowX[k] * x - P) is less than D.
+        bPowX[k + 1] = (bPowX[k] >> 1) ^ P;
+      }
+      else {
+        bPowX[k + 1] = bPowX[k] >> 1;
+      }
+    }
+    return product;
+  }
+
+  // A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 9
+  void InitPow2k(void) {
+    // Pow2k(0) =
+    // x^(2^k) mod P(x) = x mod P(x) = x
+    // Since we are operating on a reflected values
+    // x = 10b, reflect(x) = 0x40000000
+    Pow2k[0] = 0x40000000;
+
+    for (int k = 1; k < TILL_CYCLE; k++) {
+      // Pow2k(k+1) = Pow2k(k-1)^2 mod P(x)
+      uint32_t tmp = Pow2k[k - 1];
+      Pow2k[k] = Multiply(tmp, tmp);
+    }
+  }
+
+  // x^N mod P(x)
+  uint32_t FPowN(uint32_t n) {
+    //            result = 1 (polynomial)
+    uint32_t one, result = 0x80000000, i = 0;
+
+    while (one = (n & 1), (n == 1 || n - one > 0)) {
+      if (one) {
+        result = Multiply(result, Pow2k[i]);
+      }
+      n >>= 1;
+      i++;
+    }
+
+    return result;
+  }
+}
+
+juint *StubRoutines::x86::_crc32c_table;
+
+void StubRoutines::x86::GenerateCRC32CTable(bool IsPclmulqdqSupported) {
+  using namespace CRC32C;
+
+  static juint PowN[NUM_PRECOMPUTED_CONSTANTS];
+  
+  InitPow2k();
+
+  PowN[0] = FPowN(HIGH * 8);      // 8N * 8 = 64N
+  PowN[1] = FPowN(HIGH * 8 * 2);  // 128N
+
+  PowN[2] = FPowN(MIDDLE * 8);
+  PowN[3] = FPowN(MIDDLE * 8 * 2);
+
+  PowN[4] = FPowN(LOW * 8);
+  PowN[NUM_PRECOMPUTED_CONSTANTS - 1] =
+            FPowN(LOW * 8 * 2);
+
+  if (IsPclmulqdqSupported) {
+    _crc32c_table = PowN;
+  } else {
+    static julong PCLMULQDQ[NUM_PRECOMPUTED_CONSTANTS * 256];
+
+    for (int j = 0; j < NUM_PRECOMPUTED_CONSTANTS; j++) {
+      CONST = PowN[j];
+      for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations
+      // S. Gueron / Information Processing Letters 112 (2012) 184
+      // Algorithm 3: Generating a carry-less multiplication lookup table.
+      // Input: A 32-bit constant, CONST.
+      // Output: A table of 256 entries, each one is a 64-bit quadword,
+      // that can be used for computing "byte" * CONST, for a given byte.
+        PCLMULQDQ[j * 256 + i] =
+          ((i & 1) * CONST) ^ ((i & 2) * CONST) ^ ((i & 4) * CONST) ^
+          ((i & 8) * CONST) ^ ((i & 16) * CONST) ^ ((i & 32) * CONST) ^
+          ((i & 64) * CONST) ^ ((i & 128) * CONST);
+      }
+    }
+    _crc32c_table = (juint*)PCLMULQDQ;
+  }
+}
--- old/src/cpu/x86/vm/stubRoutines_x86.hpp	2015-08-26 13:22:28.525600500 -0700
+++ new/src/cpu/x86/vm/stubRoutines_x86.hpp	2015-08-26 13:22:28.239571900 -0700
@@ -36,6 +36,8 @@
   // masks and table for CRC32
   static uint64_t _crc_by128_masks[];
   static juint    _crc_table[];
+  // table for CRC32C
+  static juint* _crc32c_table;
   // swap mask for ghash
   static address _ghash_long_swap_mask_addr;
   static address _ghash_byte_swap_mask_addr;
@@ -46,5 +48,6 @@
   static address crc_by128_masks_addr()  { return (address)_crc_by128_masks; }
   static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
   static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
+  static void GenerateCRC32CTable(bool IsPclmulqdqSupported);
 
 #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
--- old/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	2015-08-26 13:22:30.226770600 -0700
+++ new/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	2015-08-26 13:22:29.961744100 -0700
@@ -809,18 +809,25 @@
     const Register buf = rdx;  // source java byte array address
     const Register len = rdi;  // length
 
+    // value              x86_32
+    // interp. arg ptr    ESP + 4
+    // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+    //                                         3           2      1        0
+    // int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+    //                                              4         2,3      1        0
+
     // Arguments are reversed on java expression stack
-    __ movl(len,   Address(rsp,   wordSize)); // Length
+    __ movl(len,   Address(rsp,   4 + 0)); // Length
     // Calculate address of start element
     if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
-      __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
-      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
-      __ movl(crc,   Address(rsp, 5*wordSize)); // Initial CRC
+      __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf
+      __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
+      __ movl(crc,   Address(rsp, 4 + 4 * wordSize)); // Initial CRC
     } else {
-      __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
+      __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
       __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
-      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
-      __ movl(crc,   Address(rsp, 4*wordSize)); // Initial CRC
+      __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
+      __ movl(crc,   Address(rsp, 4 + 3 * wordSize)); // Initial CRC
     }
 
     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
@@ -838,6 +845,53 @@
 
     return entry;
   }
+  return generate_native_entry(false);
+}
+
+/**
+* Method entry for static native methods:
+*   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
+*   int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
+*/
+address InterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  if (UseCRC32CIntrinsics) {
+    address entry = __ pc();
+    // Load parameters
+    const Register crc = rax;  // crc
+    const Register buf = rcx;  // source java byte array address
+    const Register len = rdx;  // length
+    const Register end = len;
+
+    // value              x86_32
+    // interp. arg ptr    ESP + 4
+    // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end)
+    //                                         3           2      1        0
+    // int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end)
+    //                                              4         2,3          1        0
+
+    // Arguments are reversed on java expression stack
+    __ movl(end, Address(rsp, 4 + 0)); // end
+    __ subl(len, Address(rsp, 4 + 1 * wordSize));  // end - offset == length
+    // Calculate address of start element
+    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
+      __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address
+      __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
+      __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC
+    } else {
+      __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
+      __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
+      __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
+      __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC
+    }
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
+    // result in rax
+    // _areturn
+    __ pop(rdi);                // get return address
+    __ mov(rsp, rsi);           // set sp to sender sp
+    __ jmp(rdi);
+
+    return entry;
+  }
   return generate_native_entry(false);
 }
 
--- old/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	2015-08-26 13:22:32.024950400 -0700
+++ new/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	2015-08-26 13:22:31.770925000 -0700
@@ -804,6 +804,57 @@
   return generate_native_entry(false);
 }
 
+/**
+* Method entry for static native methods:
+*   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
+*   int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
+*/
+address InterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  if (UseCRC32CIntrinsics) {
+    address entry = __ pc();
+    // Load parameters
+    const Register crc = c_rarg0;  // crc
+    const Register buf = c_rarg1;  // source java byte array address
+    const Register len = c_rarg2;
+    const Register off = c_rarg3;  // offset
+    const Register end = len;
+
+    // Arguments are reversed on java expression stack
+    // Calculate address of start element
+    if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
+      __ movptr(buf, Address(rsp, 3 * wordSize)); // long buf
+      __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
+      __ addq(buf, off); // + offset
+      __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC
+      // Note on 5 * wordSize vs. 4 * wordSize:
+      // *   int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
+      //                                                   4         2,3          1        0
+      // end starts at SP + 8
+      // The Java� Virtual Machine Specification Java SE 7 Edition
+      // 4.10.2.3. Values of Types long and double
+      //    "When calculating operand stack length, values of type�long�and�double�have length two."
+    } else {
+      __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array
+      __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
+      __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
+      __ addq(buf, off); // + offset
+      __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC
+    }
+    __ movl(end, Address(rsp, wordSize)); // end
+    __ subl(end, off); // end - off
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
+    // result in rax
+    // _areturn
+    __ pop(rdi);                // get return address
+    __ mov(rsp, r13);           // set sp to sender sp
+    __ jmp(rdi);
+
+    return entry;
+  }
+
+  return generate_native_entry(false);
+}
+
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the
 // native method than the typical interpreter frame setup.
--- old/src/cpu/x86/vm/vm_version_x86.cpp	2015-08-26 13:22:33.742122100 -0700
+++ new/src/cpu/x86/vm/vm_version_x86.cpp	2015-08-26 13:22:33.492097100 -0700
@@ -665,6 +665,18 @@
     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
   }
 
+  if (supports_sse4_2()) {
+    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
+      UseCRC32CIntrinsics = true;
+    }
+  }
+  else if (UseCRC32CIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
+      warning("CRC32C intrinsics are not available on this CPU");
+    }
+    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
+  }
+
   // The AES intrinsic stubs require AES instruction support (of course)
   // but also require sse3 mode for instructions it use.
   if (UseAES && (UseSSE > 2)) {
@@ -699,12 +711,6 @@
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
 
-  if (UseCRC32CIntrinsics) {
-    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
-      warning("CRC32C intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
-  }
-
   // Adjust RTM (Restricted Transactional Memory) flags
   if (!supports_rtm() && UseRTMLocking) {
     // Can't continue because UseRTMLocking affects UseBiasedLocking flag
--- old/src/cpu/zero/vm/interpreterGenerator_zero.hpp	2015-08-26 13:22:35.320279900 -0700
+++ new/src/cpu/zero/vm/interpreterGenerator_zero.hpp	2015-08-26 13:22:35.070254900 -0700
@@ -42,4 +42,5 @@
   // Not supported
   address generate_CRC32_update_entry() { return NULL; }
   address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
+  address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
 #endif // CPU_ZERO_VM_INTERPRETERGENERATOR_ZERO_HPP
--- old/src/share/vm/classfile/vmSymbols.hpp	2015-08-26 13:22:36.933441200 -0700
+++ new/src/share/vm/classfile/vmSymbols.hpp	2015-08-26 13:22:36.676415500 -0700
@@ -865,9 +865,9 @@
                                                                                                                         \
   /* support for java.util.zip.CRC32C */                                                                                \
   do_class(java_util_zip_CRC32C,          "java/util/zip/CRC32C")                                                       \
-  do_intrinsic(_updateBytesCRC32C,         java_util_zip_CRC32C,  updateBytes_name, updateBytes_signature,       F_S)   \
-  do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_name, updateByteBuffer_signature, F_S) \
-   do_name(     updateDirectByteBuffer_name,                     "updateDirectByteBuffer")                              \
+   do_intrinsic(_updateBytesCRC32C,        java_util_zip_CRC32C, updateBytes_name, updateBytes_signature, F_S) \
+   do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_name, updateByteBuffer_signature, F_S) \
+    do_name(updateDirectByteBuffer_name,                            "updateDirectByteBuffer")                           \
                                                                                                                         \
   /* support for sun.misc.Unsafe */                                                                                     \
   do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
--- old/src/share/vm/interpreter/abstractInterpreter.hpp	2015-08-26 13:22:38.538601700 -0700
+++ new/src/share/vm/interpreter/abstractInterpreter.hpp	2015-08-26 13:22:38.288576700 -0700
@@ -90,6 +90,8 @@
     java_util_zip_CRC32_update,                                 // implementation of java.util.zip.CRC32.update()
     java_util_zip_CRC32_updateBytes,                            // implementation of java.util.zip.CRC32.updateBytes()
     java_util_zip_CRC32_updateByteBuffer,                       // implementation of java.util.zip.CRC32.updateByteBuffer()
+    java_util_zip_CRC32C_updateBytes,                           // implementation of java.util.zip.CRC32C.updateBytes(crc, b[], off, end)
+    java_util_zip_CRC32C_updateDirectByteBuffer,                // implementation of java.util.zip.CRC32C.updateDirectByteBuffer(crc, address, off, end)
     number_of_method_entries,
     invalid = -1
   };
--- old/src/share/vm/interpreter/interpreter.cpp	2015-08-26 13:22:40.114759300 -0700
+++ new/src/share/vm/interpreter/interpreter.cpp	2015-08-26 13:22:39.864734300 -0700
@@ -232,6 +232,13 @@
       case vmIntrinsics::_updateByteBufferCRC32  : return java_util_zip_CRC32_updateByteBuffer;
     }
   }
+  if (UseCRC32CIntrinsics) {
+    // Use optimized stub code for CRC32C methods.
+    switch (m->intrinsic_id()) {
+      case vmIntrinsics::_updateBytesCRC32C             : return java_util_zip_CRC32C_updateBytes;
+      case vmIntrinsics::_updateDirectByteBufferCRC32C  : return java_util_zip_CRC32C_updateDirectByteBuffer;
+    }
+  }
 #endif
 
   // Native method?
@@ -339,6 +346,8 @@
     case java_util_zip_CRC32_update           : tty->print("java_util_zip_CRC32_update"); break;
     case java_util_zip_CRC32_updateBytes      : tty->print("java_util_zip_CRC32_updateBytes"); break;
     case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break;
+    case java_util_zip_CRC32C_updateBytes     : tty->print("java_util_zip_CRC32C_updateBytes"); break;
+    case java_util_zip_CRC32C_updateDirectByteBuffer: tty->print("java_util_zip_CRC32C_updateDirectByteByffer"); break;
     default:
       if (kind >= method_handle_invoke_FIRST &&
           kind <= method_handle_invoke_LAST) {
@@ -557,6 +566,11 @@
                                            : // fall thru
   case Interpreter::java_util_zip_CRC32_updateByteBuffer
                                            : entry_point = generate_CRC32_updateBytes_entry(kind); break;
+  case Interpreter::java_util_zip_CRC32C_updateBytes
+                                           : // fall thru
+  case Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer
+                                           : entry_point = generate_CRC32C_updateBytes_entry(kind); break;
+    
 #endif // CC_INTERP
   default:
     fatal(err_msg("unexpected method kind: %d", kind));
--- old/src/share/vm/interpreter/templateInterpreter.cpp	2015-08-26 13:22:41.694917300 -0700
+++ new/src/share/vm/interpreter/templateInterpreter.cpp	2015-08-26 13:22:41.433891200 -0700
@@ -390,6 +390,11 @@
     method_entry(java_util_zip_CRC32_updateByteBuffer)
   }
 
+  if (UseCRC32CIntrinsics) {
+    method_entry(java_util_zip_CRC32C_updateBytes)
+    method_entry(java_util_zip_CRC32C_updateDirectByteBuffer)
+  }
+
   initialize_method_handle_entries();
 
   // all native method kinds (must be one contiguous block)
--- old/src/share/vm/runtime/stubRoutines.cpp	2015-08-26 13:22:43.281075900 -0700
+++ new/src/share/vm/runtime/stubRoutines.cpp	2015-08-26 13:22:43.029050700 -0700
@@ -135,8 +135,9 @@
 address StubRoutines::_sha512_implCompressMB = NULL;
 
 address StubRoutines::_updateBytesCRC32 = NULL;
-address StubRoutines::_crc_table_adr = NULL;
+address StubRoutines::_crc_table_adr =    NULL;
 
+address StubRoutines::_crc32c_table_addr = NULL;
 address StubRoutines::_updateBytesCRC32C = NULL;
 
 address StubRoutines::_multiplyToLen = NULL;
--- old/src/share/vm/runtime/stubRoutines.hpp	2015-08-26 13:22:44.890236800 -0700
+++ new/src/share/vm/runtime/stubRoutines.hpp	2015-08-26 13:22:44.638211600 -0700
@@ -197,6 +197,7 @@
   static address _updateBytesCRC32;
   static address _crc_table_adr;
 
+  static address _crc32c_table_addr;
   static address _updateBytesCRC32C;
 
   static address _multiplyToLen;
@@ -361,6 +362,7 @@
   static address updateBytesCRC32()    { return _updateBytesCRC32; }
   static address crc_table_addr()      { return _crc_table_adr; }
 
+  static address crc32c_table_addr()   { return _crc32c_table_addr; }
   static address updateBytesCRC32C()   { return _updateBytesCRC32C; }
 
   static address multiplyToLen()       {return _multiplyToLen; }
--- old/src/share/vm/runtime/vmStructs.cpp	2015-08-26 13:22:46.480395800 -0700
+++ new/src/share/vm/runtime/vmStructs.cpp	2015-08-26 13:22:46.220369800 -0700
@@ -830,6 +830,7 @@
      static_field(StubRoutines,                _ghash_processBlocks,                          address)                               \
      static_field(StubRoutines,                _updateBytesCRC32,                             address)                               \
      static_field(StubRoutines,                _crc_table_adr,                                address)                               \
+     static_field(StubRoutines,                _crc32c_table_addr,                            address)                               \
      static_field(StubRoutines,                _updateBytesCRC32C,                            address)                               \
      static_field(StubRoutines,                _multiplyToLen,                                address)                               \
      static_field(StubRoutines,                _squareToLen,                                  address)                               \
--- /dev/null	2015-08-26 13:22:48.000000000 -0700
+++ new/src/cpu/x86/vm/crc32c.h	2015-08-26 13:22:47.906538400 -0700
@@ -0,0 +1,66 @@
+/*
+* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+enum {
+  // S. Gueron / Information Processing Letters 112 (2012) 184
+  // shows than anything above 6K and below 32K is a good choice
+  // 32K does not deliver any further performance gains
+  // 6K=8*256 (*3 as we compute 3 blocks together)
+  //
+  // Thus selecting the smallest value so it could apply to the largest number 
+  // of buffer sizes.
+  HIGH = 8 * 256,
+
+  // empirical
+  // based on ubench study using methodology described in
+  // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 8
+  //
+  // arbitrary value between 27 and 256
+  MIDDLE = 8 * 86,
+
+  // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 9
+  // shows that 240 and 1024 are equally good choices as the 216==8*27
+  //
+  // Selecting the smallest value which resulted in a significant performance improvement over 
+  // sequential version
+  LOW = 8 * 27,
+
+  NUM_ChunkSizeInBytes = 3
+};
+// Notes:
+// 1. Why we need to choose a "chunk" approach?
+// Overhead of computing a powers and powers of for an arbitrary buffer of size N is significant 
+// (implementation approaches a library perf.)
+// 2. Why only 3 "chunks"?
+// Performance experiments results showed that a HIGH+LOW was not delivering a stable speedup 
+// curve.
+//
+// Disclaimer: 
+// If you ever decide to increase/decrease number of "chunks" be sure to modify
+// a) constants table generation (C:\Java\jdk9hs-comp\hotspot\src\cpu\x86\vm\stubRoutines_x86.cpp)
+// b) constant fetch from that table (macroAssembler_x86.cpp)
+// c) unrolled for loop (macroAssembler_x86.cpp)
+
+// We need to compute powers of 64N and 128N for each "chunk" size
+enum { NUM_PRECOMPUTED_CONSTANTS = 2 * NUM_ChunkSizeInBytes };