< prev index next >

src/hotspot/cpu/ppc/stubRoutines_ppc_64.cpp

Print this page
rev 53302 : 8216060: [PPC64] Vector CRC implementation should be used by interpreter and be faster for short arrays
Reviewed-by: gromero, goetz

*** 1,8 **** /* ! * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2012, 2018, SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. --- 1,8 ---- /* ! * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2012, 2019, SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation.
*** 40,73 **** void StubRoutines::ppc64::generate_load_crc_constants_addr(MacroAssembler* masm, Register table) { __ load_const_optimized(table, (address)StubRoutines::ppc64::_crc_constants, R0); } - void StubRoutines::ppc64::generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table) { - __ load_const_optimized(table, (address)StubRoutines::ppc64::_crc_barret_constants, R0); - } - void StubRoutines::ppc64::generate_load_crc32c_table_addr(MacroAssembler* masm, Register table) { __ load_const_optimized(table, StubRoutines::_crc32c_table_addr, R0); } void StubRoutines::ppc64::generate_load_crc32c_constants_addr(MacroAssembler* masm, Register table) { __ load_const_optimized(table, (address)StubRoutines::ppc64::_crc32c_constants, R0); } ! void StubRoutines::ppc64::generate_load_crc32c_barret_constants_addr(MacroAssembler* masm, Register table) { ! __ load_const_optimized(table, (address)StubRoutines::ppc64::_crc32c_barret_constants, R0); ! } ! ! // CRC constants and compute functions ! #define REVERSE_CRC32_POLY 0xEDB88320 ! #define REVERSE_CRC32C_POLY 0x82F63B78 ! #define INVERSE_REVERSE_CRC32_POLY 0x1aab14226ull ! #define INVERSE_REVERSE_CRC32C_POLY 0x105fd79bdull ! #define UNROLL_FACTOR 2048 ! #define UNROLL_FACTOR2 8 ! static juint fold_word(juint w, juint reverse_poly) { for (int i = 0; i < 32; i++) { int poly_if_odd = (-(w & 1)) & reverse_poly; w = (w >> 1) ^ poly_if_odd; } --- 40,58 ---- void StubRoutines::ppc64::generate_load_crc_constants_addr(MacroAssembler* masm, Register table) { __ load_const_optimized(table, (address)StubRoutines::ppc64::_crc_constants, R0); } void StubRoutines::ppc64::generate_load_crc32c_table_addr(MacroAssembler* masm, Register table) { __ load_const_optimized(table, StubRoutines::_crc32c_table_addr, R0); } void StubRoutines::ppc64::generate_load_crc32c_constants_addr(MacroAssembler* masm, Register table) { __ load_const_optimized(table, (address)StubRoutines::ppc64::_crc32c_constants, R0); } ! // CRC constant compute functions static juint fold_word(juint w, juint reverse_poly) { for (int i = 0; i < 32; i++) { int poly_if_odd = (-(w & 1)) & reverse_poly; w = (w >> 1) ^ poly_if_odd; }
*** 96,112 **** return div; } // Constants to fold n words as needed by macroAssembler. juint* StubRoutines::ppc64::generate_crc_constants(juint reverse_poly) { ! juint* ptr = (juint*) malloc(sizeof(juint) * 4 * (UNROLL_FACTOR2 - 1 + UNROLL_FACTOR / UNROLL_FACTOR2)); guarantee(((intptr_t)ptr & 0xF) == 0, "16-byte alignment needed"); guarantee(ptr != NULL, "allocation error of a crc table"); // Generate constants for outer loop juint v0, v1, v2, v3 = 1; ! for (int i = 0; i < UNROLL_FACTOR2 - 1; ++i) { v0 = fold_word(v3, reverse_poly); v1 = fold_word(v0, reverse_poly); v2 = fold_word(v1, reverse_poly); v3 = fold_word(v2, reverse_poly); #ifdef VM_LITTLE_ENDIAN --- 81,97 ---- return div; } // Constants to fold n words as needed by macroAssembler. juint* StubRoutines::ppc64::generate_crc_constants(juint reverse_poly) { ! juint* ptr = (juint*) malloc(sizeof(juint) * 4 * (CRC32_UNROLL_FACTOR2 + CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2)); guarantee(((intptr_t)ptr & 0xF) == 0, "16-byte alignment needed"); guarantee(ptr != NULL, "allocation error of a crc table"); // Generate constants for outer loop juint v0, v1, v2, v3 = 1; ! for (int i = 0; i < CRC32_UNROLL_FACTOR2 - 1; ++i) { v0 = fold_word(v3, reverse_poly); v1 = fold_word(v0, reverse_poly); v2 = fold_word(v1, reverse_poly); v3 = fold_word(v2, reverse_poly); #ifdef VM_LITTLE_ENDIAN
*** 121,139 **** ptr[4*i+3] = v3; #endif } // Generate constants for inner loop ! juint* ptr2 = ptr + 4 * (UNROLL_FACTOR2 - 1); v3 = 1; // Restart from scratch. ! for (int i = 0; i < UNROLL_FACTOR; ++i) { v0 = fold_word(v3, reverse_poly); v1 = fold_word(v0, reverse_poly); v2 = fold_word(v1, reverse_poly); v3 = fold_word(v2, reverse_poly); ! if (i % UNROLL_FACTOR2 == 0) { ! int idx = UNROLL_FACTOR / UNROLL_FACTOR2 - 1 - i / UNROLL_FACTOR2; for (int j = 0; j < 4; ++j) { #ifdef VM_LITTLE_ENDIAN ptr2[4*idx ] = v3; ptr2[4*idx+1] = v2; ptr2[4*idx+2] = v1; --- 106,124 ---- ptr[4*i+3] = v3; #endif } // Generate constants for inner loop ! juint* ptr2 = ptr + 4 * (CRC32_UNROLL_FACTOR2 - 1); v3 = 1; // Restart from scratch. ! for (int i = 0; i < CRC32_UNROLL_FACTOR; ++i) { v0 = fold_word(v3, reverse_poly); v1 = fold_word(v0, reverse_poly); v2 = fold_word(v1, reverse_poly); v3 = fold_word(v2, reverse_poly); ! if (i % CRC32_UNROLL_FACTOR2 == 0) { ! int idx = CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2 - 1 - i / CRC32_UNROLL_FACTOR2; for (int j = 0; j < 4; ++j) { #ifdef VM_LITTLE_ENDIAN ptr2[4*idx ] = v3; ptr2[4*idx+1] = v2; ptr2[4*idx+2] = v1;
*** 146,165 **** #endif } } } ! return ptr; ! } ! ! // Constants to reduce 64 to 32 bit as needed by macroAssembler. ! juint* StubRoutines::ppc64::generate_crc_barret_constants(juint reverse_poly) { ! juint* ptr = (juint*) malloc(sizeof(juint) * CRC32_BARRET_CONSTANTS); ! guarantee(((intptr_t)ptr & 0xF) == 0, "16-byte alignment needed"); ! guarantee(ptr != NULL, "allocation error of a crc table"); ! ! julong* c = (julong*)ptr; julong long_poly = (((julong)reverse_poly) << 1) | 1; julong inverse_long_poly = compute_inverse_poly(long_poly); #ifdef VM_LITTLE_ENDIAN c[0] = inverse_long_poly; c[1] = long_poly; --- 131,143 ---- #endif } } } ! // Constants to reduce 64 to 32 bit as needed by macroAssembler. ! juint* ptr3 = ptr2 + 4 * (CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2); ! julong* c = (julong*)ptr3; julong long_poly = (((julong)reverse_poly) << 1) | 1; julong inverse_long_poly = compute_inverse_poly(long_poly); #ifdef VM_LITTLE_ENDIAN c[0] = inverse_long_poly; c[1] = long_poly;
*** 175,184 **** --- 153,163 ---- assert(INVERSE_REVERSE_CRC32C_POLY == inverse_long_poly, "sanity"); } #endif //printf("inv poly: 0x%016llx\n", (long long unsigned int)inverse_long_poly); + return ptr; } // CRC32 Intrinsics. /**
*** 772,779 **** #endif }; juint* StubRoutines::ppc64::_crc_constants = StubRoutines::ppc64::generate_crc_constants(REVERSE_CRC32_POLY); juint* StubRoutines::ppc64::_crc32c_constants = StubRoutines::ppc64::generate_crc_constants(REVERSE_CRC32C_POLY); - - juint* StubRoutines::ppc64::_crc_barret_constants = StubRoutines::ppc64::generate_crc_barret_constants(REVERSE_CRC32_POLY); - juint* StubRoutines::ppc64::_crc32c_barret_constants = StubRoutines::ppc64::generate_crc_barret_constants(REVERSE_CRC32C_POLY); --- 751,755 ----
< prev index next >