< prev index next >
src/cpu/s390/vm/macroAssembler_s390.cpp
Print this page
rev 12672 : [mq]: crc32_s390.patch
*** 1,8 ****
/*
! * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
! * Copyright (c) 2016 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
--- 1,8 ----
/*
! * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
! * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*** 5908,5942 ****
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
* @param len register containing number of bytes
* @param table register pointing to CRC table
*/
! void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
! Register data, bool invertCRC) {
assert_different_registers(crc, buf, len, table, data);
Label L_mainLoop, L_done;
const int mainLoop_stepping = 1;
// Process all bytes in a single-byte loop.
z_ltr(len, len);
z_brnh(L_done);
- if (invertCRC) {
- not_(crc, noreg, false); // ~c
- }
-
bind(L_mainLoop);
z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
add2reg(buf, mainLoop_stepping); // Advance buffer position.
update_byte_crc32(crc, data, table);
z_brct(len, L_mainLoop); // Iterate.
- if (invertCRC) {
- not_(crc, noreg, false); // ~c
- }
-
bind(L_done);
}
/**
* Emits code to update CRC-32 with a 4-byte value according to constants in table.
--- 5908,5933 ----
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
* @param len register containing number of bytes
* @param table register pointing to CRC table
*/
! void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) {
assert_different_registers(crc, buf, len, table, data);
Label L_mainLoop, L_done;
const int mainLoop_stepping = 1;
// Process all bytes in a single-byte loop.
z_ltr(len, len);
z_brnh(L_done);
bind(L_mainLoop);
z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
add2reg(buf, mainLoop_stepping); // Advance buffer position.
update_byte_crc32(crc, data, table);
z_brct(len, L_mainLoop); // Iterate.
bind(L_done);
}
/**
* Emits code to update CRC-32 with a 4-byte value according to constants in table.
*** 5949,5958 ****
--- 5940,5950 ----
//
// #define DOBIG4 c ^= *++buf4; \
// c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
// crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
// #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+ // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian.
const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
*** 5967,5987 ****
rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2
rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2
rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2
rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2
! // Load pre-calculated table values.
! // Use columns 4..7 for big-endian.
! z_ly(t3, Address(table, t3, (intptr_t)ix0));
z_ly(t2, Address(table, t2, (intptr_t)ix1));
- z_ly(t1, Address(table, t1, (intptr_t)ix2));
z_ly(t0, Address(table, t0, (intptr_t)ix3));
!
! // Calculate new crc from table values.
! z_xr(t2, t3);
! z_xr(t0, t1);
! z_xr(t0, t2); // Now crc contains the final checksum value.
lgr_if_needed(crc, t0);
}
/**
* @param crc register containing existing CRC (32-bit)
--- 5959,5974 ----
rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2
rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2
rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2
rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2
! // XOR indexed table values to calculate updated crc.
z_ly(t2, Address(table, t2, (intptr_t)ix1));
z_ly(t0, Address(table, t0, (intptr_t)ix3));
! z_xy(t2, Address(table, t3, (intptr_t)ix0));
! z_xy(t0, Address(table, t1, (intptr_t)ix2));
! z_xr(t0, t2); // Now t0 contains the updated CRC value.
lgr_if_needed(crc, t0);
}
/**
* @param crc register containing existing CRC (32-bit)
*** 5990,6000 ****
* @param table register pointing to CRC table
*
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
*/
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
! Register t0, Register t1, Register t2, Register t3) {
assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail;
Register data = t0;
Register ctr = Z_R0;
--- 5977,5988 ----
* @param table register pointing to CRC table
*
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
*/
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
! Register t0, Register t1, Register t2, Register t3,
! bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail;
Register data = t0;
Register ctr = Z_R0;
*** 6005,6015 ****
--- 5993,6005 ----
// Don't test for len <= 0 here. This pathological case should not occur anyway.
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
// The situation itself is detected and handled correctly by the conditional branches
// following aghi(len, -stepping) and aghi(len, +stepping).
+ if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
+ }
#if 0
{
// Pre-mainLoop alignment did not show any positive effect on performance.
// We leave the code in for reference. Maybe the vector instructions in z13 depend on alignment.
*** 6020,6030 ****
// Align buf to word (4-byte) boundary.
z_lcr(ctr, buf);
rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc
z_sgfr(len, ctr); // Remaining len after alignment.
! update_byteLoop_crc32(crc, buf, ctr, table, data, false);
}
#endif
// Check for short (<mainLoop_stepping bytes) buffer.
z_srag(ctr, len, log_stepping);
--- 6010,6020 ----
// Align buf to word (4-byte) boundary.
z_lcr(ctr, buf);
rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc
z_sgfr(len, ctr); // Remaining len after alignment.
! update_byteLoop_crc32(crc, buf, ctr, table, data);
}
#endif
// Check for short (<mainLoop_stepping bytes) buffer.
z_srag(ctr, len, log_stepping);
*** 6040,6052 ****
z_lrvr(crc, crc); // Revert byte order back to original.
// Process last few (<8) bytes of buffer.
BIND(L_tail);
! update_byteLoop_crc32(crc, buf, len, table, data, false);
not_(crc, noreg, false); // 1s complement of crc
}
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
--- 6030,6044 ----
z_lrvr(crc, crc); // Revert byte order back to original.
// Process last few (<8) bytes of buffer.
BIND(L_tail);
! update_byteLoop_crc32(crc, buf, len, table, data);
+ if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
+ }
}
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
*** 6054,6064 ****
* @param table register pointing to CRC table
*
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
*/
void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
! Register t0, Register t1, Register t2, Register t3) {
assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail;
Register data = t0;
Register ctr = Z_R0;
--- 6046,6057 ----
* @param table register pointing to CRC table
*
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
*/
void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
! Register t0, Register t1, Register t2, Register t3,
! bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail;
Register data = t0;
Register ctr = Z_R0;
*** 6068,6078 ****
--- 6061,6073 ----
// Don't test for len <= 0 here. This pathological case should not occur anyway.
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
// The situation itself is detected and handled correctly by the conditional branches
// following aghi(len, -stepping) and aghi(len, +stepping).
+ if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
+ }
// Check for short (<4 bytes) buffer.
z_srag(ctr, len, log_stepping);
z_brnh(L_tail);
*** 6080,6121 ****
rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
BIND(L_mainLoop);
update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3);
z_brct(ctr, L_mainLoop); // Iterate.
z_lrvr(crc, crc); // Revert byte order back to original.
// Process last few (<8) bytes of buffer.
BIND(L_tail);
! update_byteLoop_crc32(crc, buf, len, table, data, false);
not_(crc, noreg, false); // 1s complement of crc
}
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
* @param len register containing number of bytes
* @param table register pointing to CRC table
*/
void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
! Register t0, Register t1, Register t2, Register t3) {
assert_different_registers(crc, buf, len, table);
Register data = t0;
! update_byteLoop_crc32(crc, buf, len, table, data, true);
}
! void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) {
assert_different_registers(crc, buf, len, table, tmp);
! not_(crc, noreg, false); // ~c
z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
update_byte_crc32(crc, tmp, table);
! not_(crc, noreg, false); // ~c
}
//
// Code for BigInteger::multiplyToLen() intrinsic.
//
--- 6075,6148 ----
rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
BIND(L_mainLoop);
update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3);
z_brct(ctr, L_mainLoop); // Iterate.
+
z_lrvr(crc, crc); // Revert byte order back to original.
// Process last few (<8) bytes of buffer.
BIND(L_tail);
! update_byteLoop_crc32(crc, buf, len, table, data);
+ if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
+ }
}
/**
* @param crc register containing existing CRC (32-bit)
* @param buf register pointing to input byte buffer (byte*)
* @param len register containing number of bytes
* @param table register pointing to CRC table
*/
void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
! Register t0, Register t1, Register t2, Register t3,
! bool invertCRC) {
assert_different_registers(crc, buf, len, table);
Register data = t0;
! if (invertCRC) {
! not_(crc, noreg, false); // 1s complement of crc
! }
!
! update_byteLoop_crc32(crc, buf, len, table, data);
!
! if (invertCRC) {
! not_(crc, noreg, false); // 1s complement of crc
! }
}
! void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp,
! bool invertCRC) {
assert_different_registers(crc, buf, len, table, tmp);
! if (invertCRC) {
! not_(crc, noreg, false); // 1s complement of crc
! }
z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
update_byte_crc32(crc, tmp, table);
! if (invertCRC) {
! not_(crc, noreg, false); // 1s complement of crc
! }
! }
!
! void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table,
! bool invertCRC) {
! assert_different_registers(crc, val, table);
!
! if (invertCRC) {
! not_(crc, noreg, false); // 1s complement of crc
! }
!
! update_byte_crc32(crc, val, table);
!
! if (invertCRC) {
! not_(crc, noreg, false); // 1s complement of crc
! }
}
//
// Code for BigInteger::multiplyToLen() intrinsic.
//
< prev index next >