--- old/src/cpu/aarch64/vm/vm_version_aarch64.cpp 2015-06-25 16:47:21.000000000 -0700 +++ new/src/cpu/aarch64/vm/vm_version_aarch64.cpp 2015-06-25 16:47:21.000000000 -0700 @@ -199,6 +199,12 @@ UseCRC32Intrinsics = true; } + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) + warning("CRC32C intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) { if (FLAG_IS_DEFAULT(UseSHA)) { FLAG_SET_DEFAULT(UseSHA, true); --- old/src/cpu/ppc/vm/vm_version_ppc.cpp 2015-06-25 16:47:22.000000000 -0700 +++ new/src/cpu/ppc/vm/vm_version_ppc.cpp 2015-06-25 16:47:21.000000000 -0700 @@ -191,6 +191,13 @@ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) + warning("CRC32C intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + // Adjust RTM (Restricted Transactional Memory) flags. if (!has_tcheck() && UseRTMLocking) { // Can't continue because UseRTMLocking affects UseBiasedLocking flag --- old/src/cpu/sparc/vm/assembler_sparc.hpp 2015-06-25 16:47:22.000000000 -0700 +++ new/src/cpu/sparc/vm/assembler_sparc.hpp 2015-06-25 16:47:22.000000000 -0700 @@ -128,8 +128,11 @@ faligndata_op3 = 0x36, flog3_op3 = 0x36, edge_op3 = 0x36, + fzero_op3 = 0x36, fsrc_op3 = 0x36, + fnot_op3 = 0x36, xmulx_op3 = 0x36, + crc32c_op3 = 0x36, impdep2_op3 = 0x37, stpartialf_op3 = 0x37, jmpl_op3 = 0x38, @@ -231,7 +234,9 @@ sha1_opf = 0x141, sha256_opf = 0x142, - sha512_opf = 0x143 + sha512_opf = 0x143, + + crc32c_opf = 0x147 }; enum op5s { @@ -600,6 +605,11 @@ return x & ((1 << 10) - 1); } + // create a low12 __value__ (not a field) for a given a 32-bit constant + static int low12( int x ) { + return x & ((1 << 12) - 1); + } + // AES crypto instructions supported only on certain processors static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); } @@ -608,6 +618,9 @@ static void sha256_only() { assert( VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); } static void sha512_only() { assert( VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); } + // CRC32C instruction supported only on certain processors + static void crc32c_only() { assert( VM_Version::has_crc32c(), "This instruction only works on SPARC with CRC32C"); } + // instruction only in VIS1 static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); } @@ -1022,6 +1035,7 @@ void nop() { emit_int32( op(branch_op) | op2(sethi_op2) ); } + void sw_count() { emit_int32( op(branch_op) | op2(sethi_op2) | 0x3f0 ); } // pp 202 @@ -1198,8 +1212,14 @@ void faligndata( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(faligndata_op3) | fs1(s1, FloatRegisterImpl::D) | opf(faligndata_opf) | fs2(s2, FloatRegisterImpl::D)); } + void fzero( FloatRegisterImpl::Width w, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fzero_op3) | opf(0x62 - w)); } + void fsrc2( FloatRegisterImpl::Width w, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fsrc_op3) | opf(0x7A - w) | fs2(s2, w)); } + void fnot1( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fnot_op3) | fs1(s1, w) | opf(0x6C - w)); } + + void fpmerge( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(0x36) | fs1(s1, FloatRegisterImpl::S) | opf(0x4b) | fs2(s2, FloatRegisterImpl::S)); } + void stpartialf( Register s1, Register s2, FloatRegister d, int ia = -1 ) { vis1_only(); emit_int32( op(ldst_op) | fd(d, FloatRegisterImpl::D) | op3(stpartialf_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); } // VIS2 instructions @@ -1224,6 +1244,10 @@ void sha256() { sha256_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha256_opf)); } void sha512() { sha512_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha512_opf)); } + // CRC32C instruction + + void crc32c( FloatRegister s1, FloatRegister s2, FloatRegister d ) { crc32c_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D)); } + // Creation Assembler(CodeBuffer* code) : AbstractAssembler(code) { #ifdef CHECK_DELAY --- old/src/cpu/sparc/vm/macroAssembler_sparc.cpp 2015-06-25 16:47:23.000000000 -0700 +++ new/src/cpu/sparc/vm/macroAssembler_sparc.cpp 2015-06-25 16:47:23.000000000 -0700 @@ -956,6 +956,7 @@ int hi = (int)(value >> 32); int lo = (int)(value & ~0); + int bits_33to2 = (int)((value >> 2) & ~0); // (Matcher::isSimpleConstant64 knows about the following optimizations.) if (Assembler::is_simm13(lo) && value == lo) { or3(G0, lo, d); @@ -964,6 +965,12 @@ if (low10(lo) != 0) or3(d, low10(lo), d); } + else if ((hi >> 2) == 0) { + Assembler::sethi(bits_33to2, d); // hardware version zero-extends to upper 32 + sllx(d, 2, d); + if (low12(lo) != 0) + or3(d, low12(lo), d); + } else if (hi == -1) { Assembler::sethi(~lo, d); // hardware version zero-extends to upper 32 xor3(d, low10(lo) ^ ~low10(~0), d); @@ -4351,3 +4358,52 @@ cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop); nop(); // Separate short branches } + +/** + * Update CRC-32[C] with a byte value according to constants in table + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); + */ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + xor3(val, crc, val); + and3(val, 0xFF, val); + sllx(val, 2, val); + lduw(table, val, val); + srlx(crc, 8, crc); + xor3(val, crc, crc); +} + +// Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros +void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) { + srlx(src, 24, dst); + + sllx(src, 32+8, tmp); + srlx(tmp, 32+24, tmp); + sllx(tmp, 8, tmp); + or3(dst, tmp, dst); + + sllx(src, 32+16, tmp); + srlx(tmp, 32+24, tmp); + sllx(tmp, 16, tmp); + or3(dst, tmp, dst); + + sllx(src, 32+24, tmp); + srlx(tmp, 32, tmp); + or3(dst, tmp, dst); +} + +void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) { + reverse_bytes_32(src, tmp1, tmp2); + movxtod(tmp1, dst); +} + +void MacroAssembler::movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2) { + movdtox(src, tmp1); + reverse_bytes_32(tmp1, dst, tmp2); +} --- old/src/cpu/sparc/vm/macroAssembler_sparc.hpp 2015-06-25 16:47:24.000000000 -0700 +++ new/src/cpu/sparc/vm/macroAssembler_sparc.hpp 2015-06-25 16:47:23.000000000 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -903,6 +903,10 @@ inline void ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d); inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0); + // little-endian + inline void ldxl(Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); } + inline void ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { ldfa(w, s1, s2, ASI_PRIMARY_LITTLE, d); } + // membar psuedo instruction. takes into account target memory model. inline void membar( Assembler::Membar_mask_bits const7a ); @@ -1436,6 +1440,14 @@ // Use BIS for zeroing void bis_zeroing(Register to, Register count, Register temp, Label& Ldone); + // Update CRC-32[C] with a byte value according to constants in table + void update_byte_crc32(Register crc, Register val, Register table); + + // Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros + void reverse_bytes_32(Register src, Register dst, Register tmp); + void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2); + void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2); + #undef VIRTUAL }; --- old/src/cpu/sparc/vm/stubGenerator_sparc.cpp 2015-06-25 16:47:24.000000000 -0700 +++ new/src/cpu/sparc/vm/stubGenerator_sparc.cpp 2015-06-25 16:47:24.000000000 -0700 @@ -4910,6 +4910,206 @@ return start; } +#define CHUNK_LEN 128 /* 128 x 8B = 1KB */ +#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */ +#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */ +#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */ + + /** + * Arguments: + * + * Inputs: + * O0 - int crc + * O1 - byte* buf + * O2 - int len + * O3 - int* table + * + * Output: + * O0 - int crc result + */ + address generate_updateBytesCRC32C() { + assert(UseCRC32CIntrinsics, "need CRC32C instruction"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); + address start = __ pc(); + + const Register crc = O0; // crc + const Register buf = O1; // source java byte array address + const Register len = O2; // number of bytes + const Register table = O3; // byteTable + + Label L_crc32c_head, L_crc32c_aligned; + Label L_crc32c_parallel, L_crc32c_parallel_loop; + Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop; + Label L_crc32c_done, L_crc32c_tail, L_crc32c_return; + + __ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return); + + // clear upper 32 bits of crc + __ clruwu(crc); + + __ and3(buf, 7, G4); + __ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned); + + __ mov(8, G1); + __ sub(G1, G4, G4); + + // ------ process the misaligned head (7 bytes or less) ------ + __ BIND(L_crc32c_head); + + // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; + __ ldub(buf, 0, G1); + __ update_byte_crc32(crc, G1, table); + + __ inc(buf); + __ dec(len); + __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return); + __ dec(G4); + __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head); + + // ------ process the 8-byte-aligned body ------ + __ BIND(L_crc32c_aligned); + __ nop(); + __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail); + + // reverse the byte order of lower 32 bits to big endian, and move to FP side + __ movitof_revbytes(crc, F0, G1, G3); + + __ set(CHUNK_LEN*8*4, G4); + __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial); + + // ------ process four 1KB chunks in parallel ------ + __ BIND(L_crc32c_parallel); + + __ fzero(FloatRegisterImpl::D, F2); + __ fzero(FloatRegisterImpl::D, F4); + __ fzero(FloatRegisterImpl::D, F6); + + __ mov(CHUNK_LEN - 1, G4); + __ BIND(L_crc32c_parallel_loop); + // schedule ldf's ahead of crc32c's to hide the load-use latency + __ ldf(FloatRegisterImpl::D, buf, 0, F8); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14); + __ crc32c(F0, F8, F0); + __ crc32c(F2, F10, F2); + __ crc32c(F4, F12, F4); + __ crc32c(F6, F14, F6); + __ inc(buf, 8); + __ dec(G4); + __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop); + + __ ldf(FloatRegisterImpl::D, buf, 0, F8); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); + __ crc32c(F0, F8, F0); + __ crc32c(F2, F10, F2); + __ crc32c(F4, F12, F4); + + __ inc(buf, CHUNK_LEN*24); + __ ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian + __ inc(buf, 8); + + __ prefetch(buf, 0, Assembler::severalReads); + __ prefetch(buf, CHUNK_LEN*8, Assembler::severalReads); + __ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads); + __ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads); + + // move to INT side, and reverse the byte order of lower 32 bits to little endian + __ movftoi_revbytes(F0, O4, G1, G4); + __ movftoi_revbytes(F2, O5, G1, G4); + __ movftoi_revbytes(F4, G5, G1, G4); + + // combine the results of 4 chunks + __ set64(CHUNK_K1, G3, G1); + __ xmulx(O4, G3, O4); + __ set64(CHUNK_K2, G3, G1); + __ xmulx(O5, G3, O5); + __ set64(CHUNK_K3, G3, G1); + __ xmulx(G5, G3, G5); + + __ movdtox(F14, G4); + __ xor3(O4, O5, O5); + __ xor3(G5, O5, O5); + __ xor3(G4, O5, O5); + + // reverse the byte order to big endian, via stack, and move to FP side + __ add(SP, -8, G1); + __ srlx(G1, 3, G1); + __ sllx(G1, 3, G1); + __ stx(O5, G1, G0); + __ ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian + + __ crc32c(F6, F2, F0); + + __ set(CHUNK_LEN*8*4, G4); + __ sub(len, G4, len); + __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel); + __ nop(); + __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done); + + __ BIND(L_crc32c_serial); + + __ mov(32, G4); + __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8); + + // ------ process 32B chunks ------ + __ BIND(L_crc32c_x32_loop); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ dec(len, 32); + __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop); + + __ BIND(L_crc32c_x8); + __ nop(); + __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done); + + // ------ process 8B chunks ------ + __ BIND(L_crc32c_x8_loop); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ dec(len, 8); + __ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop); + + __ BIND(L_crc32c_done); + + // move to INT side, and reverse the byte order of lower 32 bits to little endian + __ movftoi_revbytes(F0, crc, G1, G3); + + __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return); + + // ------ process the misaligned tail (7 bytes or less) ------ + __ BIND(L_crc32c_tail); + + // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; + __ ldub(buf, 0, G1); + __ update_byte_crc32(crc, G1, table); + + __ inc(buf); + __ dec(len); + __ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail); + + __ BIND(L_crc32c_return); + __ nop(); + __ retl(); + __ delayed()->nop(); + + return start; + } + void generate_initial() { // Generates all stubs and initializes the entry points @@ -5001,6 +5201,11 @@ StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); } + + // generate CRC32C intrinsic code + if (UseCRC32CIntrinsics) { + StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); + } } --- old/src/cpu/sparc/vm/stubRoutines_sparc.hpp 2015-06-25 16:47:25.000000000 -0700 +++ new/src/cpu/sparc/vm/stubRoutines_sparc.hpp 2015-06-25 16:47:25.000000000 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,7 +41,7 @@ enum /* platform_dependent_constants */ { // %%%%%%%% May be able to shrink this a lot code_size1 = 20000, // simply increase if too small (assembler will crash if too small) - code_size2 = 23000 // simply increase if too small (assembler will crash if too small) + code_size2 = 24000 // simply increase if too small (assembler will crash if too small) }; class Sparc { --- old/src/cpu/sparc/vm/vm_version_sparc.cpp 2015-06-25 16:47:26.000000000 -0700 +++ new/src/cpu/sparc/vm/vm_version_sparc.cpp 2015-06-25 16:47:25.000000000 -0700 @@ -230,7 +230,7 @@ assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); char buf[512]; - jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")), (has_hardware_popc() ? ", popc" : ""), (has_vis1() ? ", vis1" : ""), @@ -242,6 +242,7 @@ (has_sha1() ? ", sha1" : ""), (has_sha256() ? ", sha256" : ""), (has_sha512() ? ", sha512" : ""), + (has_crc32c() ? ", crc32c" : ""), (is_ultra3() ? ", ultra3" : ""), (is_sun4v() ? ", sun4v" : ""), (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")), @@ -363,6 +364,23 @@ } } + // SPARC T4 and above should have support for CRC32C instruction + if (has_crc32c()) { + if (UseVIS > 2) { // CRC32C intrinsics use VIS3 instructions + if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true); + } + } else { + if (UseCRC32CIntrinsics) { + warning("SPARC CRC32C intrinsics require VIS3 instruction support. Intrinsics will be disabled."); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + } + } else if (UseCRC32CIntrinsics) { + warning("CRC32C instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) ContendedPaddingWidth = cache_line_size; --- old/src/cpu/sparc/vm/vm_version_sparc.hpp 2015-06-25 16:47:26.000000000 -0700 +++ new/src/cpu/sparc/vm/vm_version_sparc.hpp 2015-06-25 16:47:26.000000000 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,7 +53,8 @@ aes_instructions = 19, sha1_instruction = 20, sha256_instruction = 21, - sha512_instruction = 22 + sha512_instruction = 22, + crc32c_instruction = 23 }; enum Feature_Flag_Set { @@ -83,6 +84,7 @@ sha1_instruction_m = 1 << sha1_instruction, sha256_instruction_m = 1 << sha256_instruction, sha512_instruction_m = 1 << sha512_instruction, + crc32c_instruction_m = 1 << crc32c_instruction, generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m, generic_v9_m = generic_v8_m | v9_instructions_m, @@ -141,6 +143,7 @@ static bool has_sha1() { return (_features & sha1_instruction_m) != 0; } static bool has_sha256() { return (_features & sha256_instruction_m) != 0; } static bool has_sha512() { return (_features & sha512_instruction_m) != 0; } + static bool has_crc32c() { return (_features & crc32c_instruction_m) != 0; } static bool supports_compare_and_exchange() { return has_v9(); } --- old/src/cpu/x86/vm/vm_version_x86.cpp 2015-06-25 16:47:27.000000000 -0700 +++ new/src/cpu/x86/vm/vm_version_x86.cpp 2015-06-25 16:47:27.000000000 -0700 @@ -699,6 +699,12 @@ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) + warning("CRC32C intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + // Adjust RTM (Restricted Transactional Memory) flags if (!supports_rtm() && UseRTMLocking) { // Can't continue because UseRTMLocking affects UseBiasedLocking flag --- old/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp 2015-06-25 16:47:27.000000000 -0700 +++ new/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp 2015-06-25 16:47:27.000000000 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -363,6 +363,11 @@ #endif if (av & AV_SPARC_CBCOND) features |= cbcond_instructions_m; +#ifndef AV_SPARC_CRC32C +#define AV_SPARC_CRC32C 0x20000000 /* crc32c instruction supported */ +#endif + if (av & AV_SPARC_CRC32C) features |= crc32c_instruction_m; + #ifndef AV_SPARC_AES #define AV_SPARC_AES 0x00020000 /* aes instrs supported */ #endif --- old/src/share/vm/classfile/vmSymbols.hpp 2015-06-25 16:47:28.000000000 -0700 +++ new/src/share/vm/classfile/vmSymbols.hpp 2015-06-25 16:47:28.000000000 -0700 @@ -863,6 +863,12 @@ do_name( updateByteBuffer_name, "updateByteBuffer") \ do_signature(updateByteBuffer_signature, "(IJII)I") \ \ + /* support for java.util.zip.CRC32C */ \ + do_class(java_util_zip_CRC32C, "java/util/zip/CRC32C") \ + do_intrinsic(_updateBytesCRC32C, java_util_zip_CRC32C, updateBytes_name, updateBytes_signature, F_S) \ + do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_name, updateByteBuffer_signature, F_S) \ + do_name( updateDirectByteBuffer_name, "updateDirectByteBuffer") \ + \ /* support for sun.misc.Unsafe */ \ do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \ \ --- old/src/share/vm/opto/escape.cpp 2015-06-25 16:47:29.000000000 -0700 +++ new/src/share/vm/opto/escape.cpp 2015-06-25 16:47:28.000000000 -0700 @@ -962,6 +962,7 @@ (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 || strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 || strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 || + strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32C") == 0 || strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 || strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 || strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 || --- old/src/share/vm/opto/library_call.cpp 2015-06-25 16:47:29.000000000 -0700 +++ new/src/share/vm/opto/library_call.cpp 2015-06-25 16:47:29.000000000 -0700 @@ -197,7 +197,7 @@ CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) { return generate_method_call(method_id, true, false); } - Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static); + Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls); Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2); Node* make_string_method_node(int opcode, Node* str1, Node* str2); @@ -291,6 +291,9 @@ bool inline_updateCRC32(); bool inline_updateBytesCRC32(); bool inline_updateByteBufferCRC32(); + Node* get_table_from_crc32c_class(ciInstanceKlass *crc32c_class); + bool inline_updateBytesCRC32C(); + bool inline_updateDirectByteBufferCRC32C(); bool inline_multiplyToLen(); bool inline_squareToLen(); bool inline_mulAdd(); @@ -539,6 +542,11 @@ if (!UseCRC32Intrinsics) return NULL; break; + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: + if (!UseCRC32CIntrinsics) return NULL; + break; + case vmIntrinsics::_incrementExactI: case vmIntrinsics::_addExactI: if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL; @@ -947,6 +955,11 @@ case vmIntrinsics::_updateByteBufferCRC32: return inline_updateByteBufferCRC32(); + case vmIntrinsics::_updateBytesCRC32C: + return inline_updateBytesCRC32C(); + case vmIntrinsics::_updateDirectByteBufferCRC32C: + return inline_updateDirectByteBufferCRC32C(); + case vmIntrinsics::_profileBoolean: return inline_profileBoolean(); case vmIntrinsics::_isCompileConstant: @@ -5536,6 +5549,106 @@ return true; } +//------------------------------get_table_from_crc32c_class----------------------- +Node * LibraryCallKit::get_table_from_crc32c_class(ciInstanceKlass *crc32c_class) { + Node* table = load_field_from_object(NULL, "byteTable", "[I", /*is_exact*/ false, /*is_static*/ true, crc32c_class); + assert (table != NULL, "wrong version of java.util.zip.CRC32C"); + + return table; +} + +//------------------------------inline_updateBytesCRC32C----------------------- +// +// Calculate CRC32C for byte[] array. +// int java.util.zip.CRC32C.updateBytes(int crc, byte[] buf, int off, int end) +// +bool LibraryCallKit::inline_updateBytesCRC32C() { + assert(UseCRC32CIntrinsics, "need CRC32C instruction support"); + assert(callee()->signature()->size() == 4, "updateBytes has 4 parameters"); + assert(callee()->holder()->is_loaded(), "CRC32C class must be loaded"); + // no receiver since it is a static method + Node* crc = argument(0); // type: int + Node* src = argument(1); // type: oop + Node* offset = argument(2); // type: int + Node* end = argument(3); // type: int + + Node* length = _gvn.transform(new SubINode(end, offset)); + + const Type* src_type = src->Value(&_gvn); + const TypeAryPtr* top_src = src_type->isa_aryptr(); + if (top_src == NULL || top_src->klass() == NULL) { + // failed array check + return false; + } + + // Figure out the size and type of the elements we will be copying. + BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (src_elem != T_BYTE) { + return false; + } + + // 'src_start' points to src array + scaled offset + Node* src_start = array_element_address(src, offset, src_elem); + + // static final int[] byteTable in class CRC32C + Node* table = get_table_from_crc32c_class(callee()->holder()); + Node* table_start = array_element_address(table, intcon(0), T_INT); + + // We assume that range check is done by caller. + // TODO: generate range check (offset+length < src.length) in debug VM. + + // Call the stub. + address stubAddr = StubRoutines::updateBytesCRC32C(); + const char *stubName = "updateBytesCRC32C"; + + Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesCRC32C_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + crc, src_start, length, table_start); + Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); + set_result(result); + return true; +} + +//------------------------------inline_updateDirectByteBufferCRC32C----------------------- +// +// Calculate CRC32C for DirectByteBuffer. +// int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) +// +bool LibraryCallKit::inline_updateDirectByteBufferCRC32C() { + assert(UseCRC32CIntrinsics, "need CRC32C instruction support"); + assert(callee()->signature()->size() == 5, "updateDirectByteBuffer has 4 parameters and one is long"); + assert(callee()->holder()->is_loaded(), "CRC32C class must be loaded"); + // no receiver since it is a static method + Node* crc = argument(0); // type: int + Node* src = argument(1); // type: long + Node* offset = argument(3); // type: int + Node* end = argument(4); // type: int + + Node* length = _gvn.transform(new SubINode(end, offset)); + + src = ConvL2X(src); // adjust Java long to machine word + Node* base = _gvn.transform(new CastX2PNode(src)); + offset = ConvI2X(offset); + + // 'src_start' points to src array + scaled offset + Node* src_start = basic_plus_adr(top(), base, offset); + + // static final int[] byteTable in class CRC32C + Node* table = get_table_from_crc32c_class(callee()->holder()); + Node* table_start = array_element_address(table, intcon(0), T_INT); + + // Call the stub. + address stubAddr = StubRoutines::updateBytesCRC32C(); + const char *stubName = "updateBytesCRC32C"; + + Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesCRC32C_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + crc, src_start, length, table_start); + Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); + set_result(result); + return true; +} + //----------------------------inline_reference_get---------------------------- // public T java.lang.ref.Reference.get(); bool LibraryCallKit::inline_reference_get() { @@ -5571,18 +5684,28 @@ Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, - bool is_exact=true, bool is_static=false) { + bool is_exact=true, bool is_static=false, + ciInstanceKlass * fromKls=NULL) { + if (fromKls == NULL) { + const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr(); + assert(tinst != NULL, "obj is null"); + assert(tinst->klass()->is_loaded(), "obj is not loaded"); + assert(!is_exact || tinst->klass_is_exact(), "klass not exact"); + fromKls = tinst->klass()->as_instance_klass(); + } else { + assert(is_static, "only for static field access"); + } + ciField* field = fromKls->get_field_by_name(ciSymbol::make(fieldName), + ciSymbol::make(fieldTypeString), + is_static); - const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr(); - assert(tinst != NULL, "obj is null"); - assert(tinst->klass()->is_loaded(), "obj is not loaded"); - assert(!is_exact || tinst->klass_is_exact(), "klass not exact"); - - ciField* field = tinst->klass()->as_instance_klass()->get_field_by_name(ciSymbol::make(fieldName), - ciSymbol::make(fieldTypeString), - is_static); - if (field == NULL) return (Node *) NULL; assert (field != NULL, "undefined field"); + if (field == NULL) return (Node *) NULL; + + if (is_static) { + const TypeInstPtr* tip = TypeInstPtr::make(fromKls->java_mirror()); + fromObj = makecon(tip); + } // Next code copied from Parse::do_get_xxx(): --- old/src/share/vm/opto/runtime.cpp 2015-06-25 16:47:30.000000000 -0700 +++ new/src/share/vm/opto/runtime.cpp 2015-06-25 16:47:30.000000000 -0700 @@ -851,6 +851,29 @@ return TypeFunc::make(domain, range); } +/** + * int updateBytesCRC32C(int crc, byte* buf, int len, int* table) + */ +const TypeFunc* OptoRuntime::updateBytesCRC32C_Type() { + // create input type (domain) + int num_args = 4; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypeInt::INT; // crc + fields[argp++] = TypePtr::NOTNULL; // buf + fields[argp++] = TypeInt::INT; // len + fields[argp++] = TypePtr::NOTNULL; // table + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = TypeInt::INT; // crc result + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields); + return TypeFunc::make(domain, range); +} + // for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() { // create input type (domain) --- old/src/share/vm/opto/runtime.hpp 2015-06-25 16:47:31.000000000 -0700 +++ new/src/share/vm/opto/runtime.hpp 2015-06-25 16:47:31.000000000 -0700 @@ -319,6 +319,7 @@ static const TypeFunc* ghash_processBlocks_Type(); static const TypeFunc* updateBytesCRC32_Type(); + static const TypeFunc* updateBytesCRC32C_Type(); // leaf on stack replacement interpreter accessor types static const TypeFunc* osr_end_Type(); --- old/src/share/vm/runtime/globals.hpp 2015-06-25 16:47:31.000000000 -0700 +++ new/src/share/vm/runtime/globals.hpp 2015-06-25 16:47:31.000000000 -0700 @@ -848,6 +848,9 @@ product(bool, UseCRC32Intrinsics, false, \ "use intrinsics for java.util.zip.CRC32") \ \ + product(bool, UseCRC32CIntrinsics, false, \ + "use intrinsics for java.util.zip.CRC32C") \ + \ develop(bool, TraceCallFixup, false, \ "Trace all call fixups") \ \ --- old/src/share/vm/runtime/stubRoutines.cpp 2015-06-25 16:47:32.000000000 -0700 +++ new/src/share/vm/runtime/stubRoutines.cpp 2015-06-25 16:47:32.000000000 -0700 @@ -137,6 +137,8 @@ address StubRoutines::_updateBytesCRC32 = NULL; address StubRoutines::_crc_table_adr = NULL; +address StubRoutines::_updateBytesCRC32C = NULL; + address StubRoutines::_multiplyToLen = NULL; address StubRoutines::_squareToLen = NULL; address StubRoutines::_mulAdd = NULL; --- old/src/share/vm/runtime/stubRoutines.hpp 2015-06-25 16:47:33.000000000 -0700 +++ new/src/share/vm/runtime/stubRoutines.hpp 2015-06-25 16:47:32.000000000 -0700 @@ -197,6 +197,8 @@ static address _updateBytesCRC32; static address _crc_table_adr; + static address _updateBytesCRC32C; + static address _multiplyToLen; static address _squareToLen; static address _mulAdd; @@ -359,6 +361,8 @@ static address updateBytesCRC32() { return _updateBytesCRC32; } static address crc_table_addr() { return _crc_table_adr; } + static address updateBytesCRC32C() { return _updateBytesCRC32C; } + static address multiplyToLen() {return _multiplyToLen; } static address squareToLen() {return _squareToLen; } static address mulAdd() {return _mulAdd; } --- old/src/share/vm/runtime/vmStructs.cpp 2015-06-25 16:47:33.000000000 -0700 +++ new/src/share/vm/runtime/vmStructs.cpp 2015-06-25 16:47:33.000000000 -0700 @@ -830,6 +830,7 @@ static_field(StubRoutines, _ghash_processBlocks, address) \ static_field(StubRoutines, _updateBytesCRC32, address) \ static_field(StubRoutines, _crc_table_adr, address) \ + static_field(StubRoutines, _updateBytesCRC32C, address) \ static_field(StubRoutines, _multiplyToLen, address) \ static_field(StubRoutines, _squareToLen, address) \ static_field(StubRoutines, _mulAdd, address) \ --- /dev/null 2015-06-25 16:47:34.000000000 -0700 +++ new/test/compiler/intrinsics/crc32c/TestCRC32C.java 2015-06-25 16:47:34.000000000 -0700 @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8073583 + * @summary C2 support for CRC32C on SPARC + * + * @run main/othervm/timeout=600 -Xbatch TestCRC32C -m + */ + +import java.nio.ByteBuffer; +import java.util.zip.Checksum; +import java.util.zip.CRC32C; + +public class TestCRC32C { + public static void main(String[] args) { + int offset = Integer.getInteger("offset", 0); + int msgSize = Integer.getInteger("msgSize", 512); + boolean multi = false; + int iters = 20000; + int warmupIters = 20000; + + if (args.length > 0) { + if (args[0].equals("-m")) { + multi = true; + } else { + iters = Integer.valueOf(args[0]); + } + if (args.length > 1) { + warmupIters = Integer.valueOf(args[1]); + } + } + + if (multi) { + test_multi(warmupIters); + return; + } + + System.out.println(" offset = " + offset); + System.out.println("msgSize = " + msgSize + " bytes"); + System.out.println(" iters = " + iters); + + byte[] b = initializedBytes(msgSize, offset); + + CRC32C crc0 = new CRC32C(); + CRC32C crc1 = new CRC32C(); + CRC32C crc2 = new CRC32C(); + + crc0.update(b, offset, msgSize); + + System.out.println("-------------------------------------------------------"); + + /* warm up */ + for (int i = 0; i < warmupIters; i++) { + crc1.reset(); + crc1.update(b, offset, msgSize); + } + + /* measure performance */ + long start = System.nanoTime(); + for (int i = 0; i < iters; i++) { + crc1.reset(); + crc1.update(b, offset, msgSize); + } + long end = System.nanoTime(); + double total = (double)(end - start)/1e9; // in seconds + double thruput = (double)msgSize*iters/1e6/total; // in MB/s + System.out.println("CRC32C.update(byte[]) runtime = " + total + " seconds"); + System.out.println("CRC32C.update(byte[]) throughput = " + thruput + " MB/s"); + + /* check correctness */ + for (int i = 0; i < iters; i++) { + crc1.reset(); + crc1.update(b, offset, msgSize); + if (!check(crc0, crc1)) break; + } + report("CRCs", crc0, crc1); + + System.out.println("-------------------------------------------------------"); + + ByteBuffer buf = ByteBuffer.allocateDirect(msgSize); + buf.put(b, offset, msgSize); + buf.flip(); + + /* warm up */ + for (int i = 0; i < warmupIters; i++) { + crc2.reset(); + crc2.update(buf); + buf.rewind(); + } + + /* measure performance */ + start = System.nanoTime(); + for (int i = 0; i < iters; i++) { + crc2.reset(); + crc2.update(buf); + buf.rewind(); + } + end = System.nanoTime(); + total = (double)(end - start)/1e9; // in seconds + thruput = (double)msgSize*iters/1e6/total; // in MB/s + System.out.println("CRC32C.update(ByteBuffer) runtime = " + total + " seconds"); + System.out.println("CRC32C.update(ByteBuffer) throughput = " + thruput + " MB/s"); + + /* check correctness */ + for (int i = 0; i < iters; i++) { + crc2.reset(); + crc2.update(buf); + buf.rewind(); + if (!check(crc0, crc2)) break; + } + report("CRCs", crc0, crc2); + + System.out.println("-------------------------------------------------------"); + } + + private static void report(String s, Checksum crc0, Checksum crc1) { + System.out.printf("%s: crc0 = %08x, crc1 = %08x\n", + s, crc0.getValue(), crc1.getValue()); + } + + private static boolean check(Checksum crc0, Checksum crc1) { + if (crc0.getValue() != crc1.getValue()) { + System.err.printf("ERROR: crc0 = %08x, crc1 = %08x\n", + crc0.getValue(), crc1.getValue()); + return false; + } + return true; + } + + private static byte[] initializedBytes(int M, int offset) { + byte[] bytes = new byte[M + offset]; + for (int i = 0; i < offset; i++) { + bytes[i] = (byte) i; + } + for (int i = offset; i < bytes.length; i++) { + bytes[i] = (byte) (i - offset); + } + return bytes; + } + + private static void test_multi(int iters) { + int len1 = 8; // the 8B/iteration loop + int len2 = 32; // the 32B/iteration loop + int len3 = 4096; // the 4KB/iteration loop + + byte[] b = initializedBytes(len3*16, 0); + int[] offsets = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512 }; + int[] sizes = { 0, 1, 2, 3, 4, 5, 6, 7, + len1, len1+1, len1+2, len1+3, len1+4, len1+5, len1+6, len1+7, + len1*2, len1*2+1, len1*2+3, len1*2+5, len1*2+7, + len2, len2+1, len2+3, len2+5, len2+7, + len2*2, len2*4, len2*8, len2*16, len2*32, len2*64, + len3, len3+1, len3+3, len3+5, len3+7, + len3*2, len3*4, len3*8, + len1+len2, len1+len2+1, len1+len2+3, len1+len2+5, len1+len2+7, + len1+len3, len1+len3+1, len1+len3+3, len1+len3+5, len1+len3+7, + len2+len3, len2+len3+1, len2+len3+3, len2+len3+5, len2+len3+7, + len1+len2+len3, len1+len2+len3+1, len1+len2+len3+3, + len1+len2+len3+5, len1+len2+len3+7, + (len1+len2+len3)*2, (len1+len2+len3)*2+1, (len1+len2+len3)*2+3, + (len1+len2+len3)*2+5, (len1+len2+len3)*2+7, + (len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3, + (len1+len2+len3)*3-5, (len1+len2+len3)*3-7 }; + CRC32C[] crc0 = new CRC32C[offsets.length*sizes.length]; + CRC32C[] crc1 = new CRC32C[offsets.length*sizes.length]; + int i, j, k; + + System.out.printf("testing %d cases ...\n", offsets.length*sizes.length); + + /* set the result from interpreter as reference */ + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + crc0[i*sizes.length + j] = new CRC32C(); + crc1[i*sizes.length + j] = new CRC32C(); + crc0[i*sizes.length + j].update(b, offsets[i], sizes[j]); + } + } + + /* warm up the JIT compiler and get result */ + for (k = 0; k < iters; k++) { + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + crc1[i*sizes.length + j].reset(); + crc1[i*sizes.length + j].update(b, offsets[i], sizes[j]); + } + } + } + + /* check correctness */ + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + if (!check(crc0[i*sizes.length + j], crc1[i*sizes.length + j])) { + System.out.printf("offsets[%d] = %d", i, offsets[i]); + System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]); + } + } + } + } +}