< prev index next >
src/hotspot/cpu/ppc/stubRoutines_ppc_64.cpp
Print this page
rev 53302 : 8216060: [PPC64] Vector CRC implementation should be used by interpreter and be faster for short arrays
Reviewed-by: gromero, goetz
*** 1,8 ****
/*
! * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
! * Copyright (c) 2012, 2018, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
--- 1,8 ----
/*
! * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
! * Copyright (c) 2012, 2019, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*** 40,73 ****
void StubRoutines::ppc64::generate_load_crc_constants_addr(MacroAssembler* masm, Register table) {
__ load_const_optimized(table, (address)StubRoutines::ppc64::_crc_constants, R0);
}
- void StubRoutines::ppc64::generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table) {
- __ load_const_optimized(table, (address)StubRoutines::ppc64::_crc_barret_constants, R0);
- }
-
void StubRoutines::ppc64::generate_load_crc32c_table_addr(MacroAssembler* masm, Register table) {
__ load_const_optimized(table, StubRoutines::_crc32c_table_addr, R0);
}
void StubRoutines::ppc64::generate_load_crc32c_constants_addr(MacroAssembler* masm, Register table) {
__ load_const_optimized(table, (address)StubRoutines::ppc64::_crc32c_constants, R0);
}
! void StubRoutines::ppc64::generate_load_crc32c_barret_constants_addr(MacroAssembler* masm, Register table) {
! __ load_const_optimized(table, (address)StubRoutines::ppc64::_crc32c_barret_constants, R0);
! }
!
! // CRC constants and compute functions
! #define REVERSE_CRC32_POLY 0xEDB88320
! #define REVERSE_CRC32C_POLY 0x82F63B78
! #define INVERSE_REVERSE_CRC32_POLY 0x1aab14226ull
! #define INVERSE_REVERSE_CRC32C_POLY 0x105fd79bdull
! #define UNROLL_FACTOR 2048
! #define UNROLL_FACTOR2 8
!
static juint fold_word(juint w, juint reverse_poly) {
for (int i = 0; i < 32; i++) {
int poly_if_odd = (-(w & 1)) & reverse_poly;
w = (w >> 1) ^ poly_if_odd;
}
--- 40,58 ----
void StubRoutines::ppc64::generate_load_crc_constants_addr(MacroAssembler* masm, Register table) {
__ load_const_optimized(table, (address)StubRoutines::ppc64::_crc_constants, R0);
}
void StubRoutines::ppc64::generate_load_crc32c_table_addr(MacroAssembler* masm, Register table) {
__ load_const_optimized(table, StubRoutines::_crc32c_table_addr, R0);
}
void StubRoutines::ppc64::generate_load_crc32c_constants_addr(MacroAssembler* masm, Register table) {
__ load_const_optimized(table, (address)StubRoutines::ppc64::_crc32c_constants, R0);
}
! // CRC constant compute functions
static juint fold_word(juint w, juint reverse_poly) {
for (int i = 0; i < 32; i++) {
int poly_if_odd = (-(w & 1)) & reverse_poly;
w = (w >> 1) ^ poly_if_odd;
}
*** 96,112 ****
return div;
}
// Constants to fold n words as needed by macroAssembler.
juint* StubRoutines::ppc64::generate_crc_constants(juint reverse_poly) {
! juint* ptr = (juint*) malloc(sizeof(juint) * 4 * (UNROLL_FACTOR2 - 1 + UNROLL_FACTOR / UNROLL_FACTOR2));
guarantee(((intptr_t)ptr & 0xF) == 0, "16-byte alignment needed");
guarantee(ptr != NULL, "allocation error of a crc table");
// Generate constants for outer loop
juint v0, v1, v2, v3 = 1;
! for (int i = 0; i < UNROLL_FACTOR2 - 1; ++i) {
v0 = fold_word(v3, reverse_poly);
v1 = fold_word(v0, reverse_poly);
v2 = fold_word(v1, reverse_poly);
v3 = fold_word(v2, reverse_poly);
#ifdef VM_LITTLE_ENDIAN
--- 81,97 ----
return div;
}
// Constants to fold n words as needed by macroAssembler.
juint* StubRoutines::ppc64::generate_crc_constants(juint reverse_poly) {
! juint* ptr = (juint*) malloc(sizeof(juint) * 4 * (CRC32_UNROLL_FACTOR2 + CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2));
guarantee(((intptr_t)ptr & 0xF) == 0, "16-byte alignment needed");
guarantee(ptr != NULL, "allocation error of a crc table");
// Generate constants for outer loop
juint v0, v1, v2, v3 = 1;
! for (int i = 0; i < CRC32_UNROLL_FACTOR2 - 1; ++i) {
v0 = fold_word(v3, reverse_poly);
v1 = fold_word(v0, reverse_poly);
v2 = fold_word(v1, reverse_poly);
v3 = fold_word(v2, reverse_poly);
#ifdef VM_LITTLE_ENDIAN
*** 121,139 ****
ptr[4*i+3] = v3;
#endif
}
// Generate constants for inner loop
! juint* ptr2 = ptr + 4 * (UNROLL_FACTOR2 - 1);
v3 = 1; // Restart from scratch.
! for (int i = 0; i < UNROLL_FACTOR; ++i) {
v0 = fold_word(v3, reverse_poly);
v1 = fold_word(v0, reverse_poly);
v2 = fold_word(v1, reverse_poly);
v3 = fold_word(v2, reverse_poly);
! if (i % UNROLL_FACTOR2 == 0) {
! int idx = UNROLL_FACTOR / UNROLL_FACTOR2 - 1 - i / UNROLL_FACTOR2;
for (int j = 0; j < 4; ++j) {
#ifdef VM_LITTLE_ENDIAN
ptr2[4*idx ] = v3;
ptr2[4*idx+1] = v2;
ptr2[4*idx+2] = v1;
--- 106,124 ----
ptr[4*i+3] = v3;
#endif
}
// Generate constants for inner loop
! juint* ptr2 = ptr + 4 * (CRC32_UNROLL_FACTOR2 - 1);
v3 = 1; // Restart from scratch.
! for (int i = 0; i < CRC32_UNROLL_FACTOR; ++i) {
v0 = fold_word(v3, reverse_poly);
v1 = fold_word(v0, reverse_poly);
v2 = fold_word(v1, reverse_poly);
v3 = fold_word(v2, reverse_poly);
! if (i % CRC32_UNROLL_FACTOR2 == 0) {
! int idx = CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2 - 1 - i / CRC32_UNROLL_FACTOR2;
for (int j = 0; j < 4; ++j) {
#ifdef VM_LITTLE_ENDIAN
ptr2[4*idx ] = v3;
ptr2[4*idx+1] = v2;
ptr2[4*idx+2] = v1;
*** 146,165 ****
#endif
}
}
}
! return ptr;
! }
!
! // Constants to reduce 64 to 32 bit as needed by macroAssembler.
! juint* StubRoutines::ppc64::generate_crc_barret_constants(juint reverse_poly) {
! juint* ptr = (juint*) malloc(sizeof(juint) * CRC32_BARRET_CONSTANTS);
! guarantee(((intptr_t)ptr & 0xF) == 0, "16-byte alignment needed");
! guarantee(ptr != NULL, "allocation error of a crc table");
!
! julong* c = (julong*)ptr;
julong long_poly = (((julong)reverse_poly) << 1) | 1;
julong inverse_long_poly = compute_inverse_poly(long_poly);
#ifdef VM_LITTLE_ENDIAN
c[0] = inverse_long_poly;
c[1] = long_poly;
--- 131,143 ----
#endif
}
}
}
! // Constants to reduce 64 to 32 bit as needed by macroAssembler.
! juint* ptr3 = ptr2 + 4 * (CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2);
! julong* c = (julong*)ptr3;
julong long_poly = (((julong)reverse_poly) << 1) | 1;
julong inverse_long_poly = compute_inverse_poly(long_poly);
#ifdef VM_LITTLE_ENDIAN
c[0] = inverse_long_poly;
c[1] = long_poly;
*** 175,184 ****
--- 153,163 ----
assert(INVERSE_REVERSE_CRC32C_POLY == inverse_long_poly, "sanity");
}
#endif
//printf("inv poly: 0x%016llx\n", (long long unsigned int)inverse_long_poly);
+
return ptr;
}
// CRC32 Intrinsics.
/**
*** 772,779 ****
#endif
};
juint* StubRoutines::ppc64::_crc_constants = StubRoutines::ppc64::generate_crc_constants(REVERSE_CRC32_POLY);
juint* StubRoutines::ppc64::_crc32c_constants = StubRoutines::ppc64::generate_crc_constants(REVERSE_CRC32C_POLY);
-
- juint* StubRoutines::ppc64::_crc_barret_constants = StubRoutines::ppc64::generate_crc_barret_constants(REVERSE_CRC32_POLY);
- juint* StubRoutines::ppc64::_crc32c_barret_constants = StubRoutines::ppc64::generate_crc_barret_constants(REVERSE_CRC32C_POLY);
--- 751,755 ----
< prev index next >