< prev index next >

src/cpu/s390/vm/macroAssembler_s390.cpp

Print this page
rev 12672 : [mq]: crc32_s390.patch

*** 1,8 **** /* ! * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2016 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. --- 1,8 ---- /* ! * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2016, 2017, SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation.
*** 5908,5942 **** * @param crc register containing existing CRC (32-bit) * @param buf register pointing to input byte buffer (byte*) * @param len register containing number of bytes * @param table register pointing to CRC table */ ! void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, ! Register data, bool invertCRC) { assert_different_registers(crc, buf, len, table, data); Label L_mainLoop, L_done; const int mainLoop_stepping = 1; // Process all bytes in a single-byte loop. z_ltr(len, len); z_brnh(L_done); - if (invertCRC) { - not_(crc, noreg, false); // ~c - } - bind(L_mainLoop); z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register. add2reg(buf, mainLoop_stepping); // Advance buffer position. update_byte_crc32(crc, data, table); z_brct(len, L_mainLoop); // Iterate. - if (invertCRC) { - not_(crc, noreg, false); // ~c - } - bind(L_done); } /** * Emits code to update CRC-32 with a 4-byte value according to constants in table. --- 5908,5933 ---- * @param crc register containing existing CRC (32-bit) * @param buf register pointing to input byte buffer (byte*) * @param len register containing number of bytes * @param table register pointing to CRC table */ ! void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) { assert_different_registers(crc, buf, len, table, data); Label L_mainLoop, L_done; const int mainLoop_stepping = 1; // Process all bytes in a single-byte loop. z_ltr(len, len); z_brnh(L_done); bind(L_mainLoop); z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register. add2reg(buf, mainLoop_stepping); // Advance buffer position. update_byte_crc32(crc, data, table); z_brct(len, L_mainLoop); // Iterate. bind(L_done); } /** * Emits code to update CRC-32 with a 4-byte value according to constants in table.
*** 5949,5958 **** --- 5940,5950 ---- // // #define DOBIG4 c ^= *++buf4; \ // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian. const int ix0 = 4*(4*CRC32_COLUMN_SIZE); const int ix1 = 5*(4*CRC32_COLUMN_SIZE); const int ix2 = 6*(4*CRC32_COLUMN_SIZE); const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
*** 5967,5987 **** rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2 rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2 ! // Load pre-calculated table values. ! // Use columns 4..7 for big-endian. ! z_ly(t3, Address(table, t3, (intptr_t)ix0)); z_ly(t2, Address(table, t2, (intptr_t)ix1)); - z_ly(t1, Address(table, t1, (intptr_t)ix2)); z_ly(t0, Address(table, t0, (intptr_t)ix3)); ! ! // Calculate new crc from table values. ! z_xr(t2, t3); ! z_xr(t0, t1); ! z_xr(t0, t2); // Now crc contains the final checksum value. lgr_if_needed(crc, t0); } /** * @param crc register containing existing CRC (32-bit) --- 5959,5974 ---- rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2 rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2 ! // XOR indexed table values to calculate updated crc. z_ly(t2, Address(table, t2, (intptr_t)ix1)); z_ly(t0, Address(table, t0, (intptr_t)ix3)); ! z_xy(t2, Address(table, t3, (intptr_t)ix0)); ! z_xy(t0, Address(table, t1, (intptr_t)ix2)); ! z_xr(t0, t2); // Now t0 contains the updated CRC value. lgr_if_needed(crc, t0); } /** * @param crc register containing existing CRC (32-bit)
*** 5990,6000 **** * @param table register pointing to CRC table * * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! */ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table, ! Register t0, Register t1, Register t2, Register t3) { assert_different_registers(crc, buf, len, table); Label L_mainLoop, L_tail; Register data = t0; Register ctr = Z_R0; --- 5977,5988 ---- * @param table register pointing to CRC table * * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! */ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table, ! Register t0, Register t1, Register t2, Register t3, ! bool invertCRC) { assert_different_registers(crc, buf, len, table); Label L_mainLoop, L_tail; Register data = t0; Register ctr = Z_R0;
*** 6005,6015 **** --- 5993,6005 ---- // Don't test for len <= 0 here. This pathological case should not occur anyway. // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. // The situation itself is detected and handled correctly by the conditional branches // following aghi(len, -stepping) and aghi(len, +stepping). + if (invertCRC) { not_(crc, noreg, false); // 1s complement of crc + } #if 0 { // Pre-mainLoop alignment did not show any positive effect on performance. // We leave the code in for reference. Maybe the vector instructions in z13 depend on alignment.
*** 6020,6030 **** // Align buf to word (4-byte) boundary. z_lcr(ctr, buf); rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc z_sgfr(len, ctr); // Remaining len after alignment. ! update_byteLoop_crc32(crc, buf, ctr, table, data, false); } #endif // Check for short (<mainLoop_stepping bytes) buffer. z_srag(ctr, len, log_stepping); --- 6010,6020 ---- // Align buf to word (4-byte) boundary. z_lcr(ctr, buf); rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc z_sgfr(len, ctr); // Remaining len after alignment. ! update_byteLoop_crc32(crc, buf, ctr, table, data); } #endif // Check for short (<mainLoop_stepping bytes) buffer. z_srag(ctr, len, log_stepping);
*** 6040,6052 **** z_lrvr(crc, crc); // Revert byte order back to original. // Process last few (<8) bytes of buffer. BIND(L_tail); ! update_byteLoop_crc32(crc, buf, len, table, data, false); not_(crc, noreg, false); // 1s complement of crc } /** * @param crc register containing existing CRC (32-bit) * @param buf register pointing to input byte buffer (byte*) --- 6030,6044 ---- z_lrvr(crc, crc); // Revert byte order back to original. // Process last few (<8) bytes of buffer. BIND(L_tail); ! update_byteLoop_crc32(crc, buf, len, table, data); + if (invertCRC) { not_(crc, noreg, false); // 1s complement of crc + } } /** * @param crc register containing existing CRC (32-bit) * @param buf register pointing to input byte buffer (byte*)
*** 6054,6064 **** * @param table register pointing to CRC table * * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! */ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, ! Register t0, Register t1, Register t2, Register t3) { assert_different_registers(crc, buf, len, table); Label L_mainLoop, L_tail; Register data = t0; Register ctr = Z_R0; --- 6046,6057 ---- * @param table register pointing to CRC table * * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! */ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, ! Register t0, Register t1, Register t2, Register t3, ! bool invertCRC) { assert_different_registers(crc, buf, len, table); Label L_mainLoop, L_tail; Register data = t0; Register ctr = Z_R0;
*** 6068,6078 **** --- 6061,6073 ---- // Don't test for len <= 0 here. This pathological case should not occur anyway. // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. // The situation itself is detected and handled correctly by the conditional branches // following aghi(len, -stepping) and aghi(len, +stepping). + if (invertCRC) { not_(crc, noreg, false); // 1s complement of crc + } // Check for short (<4 bytes) buffer. z_srag(ctr, len, log_stepping); z_brnh(L_tail);
*** 6080,6121 **** rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop BIND(L_mainLoop); update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3); z_brct(ctr, L_mainLoop); // Iterate. z_lrvr(crc, crc); // Revert byte order back to original. // Process last few (<8) bytes of buffer. BIND(L_tail); ! update_byteLoop_crc32(crc, buf, len, table, data, false); not_(crc, noreg, false); // 1s complement of crc } /** * @param crc register containing existing CRC (32-bit) * @param buf register pointing to input byte buffer (byte*) * @param len register containing number of bytes * @param table register pointing to CRC table */ void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, ! Register t0, Register t1, Register t2, Register t3) { assert_different_registers(crc, buf, len, table); Register data = t0; ! update_byteLoop_crc32(crc, buf, len, table, data, true); } ! void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) { assert_different_registers(crc, buf, len, table, tmp); ! not_(crc, noreg, false); // ~c z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register. update_byte_crc32(crc, tmp, table); ! not_(crc, noreg, false); // ~c } // // Code for BigInteger::multiplyToLen() intrinsic. // --- 6075,6148 ---- rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop BIND(L_mainLoop); update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3); z_brct(ctr, L_mainLoop); // Iterate. + z_lrvr(crc, crc); // Revert byte order back to original. // Process last few (<8) bytes of buffer. BIND(L_tail); ! update_byteLoop_crc32(crc, buf, len, table, data); + if (invertCRC) { not_(crc, noreg, false); // 1s complement of crc + } } /** * @param crc register containing existing CRC (32-bit) * @param buf register pointing to input byte buffer (byte*) * @param len register containing number of bytes * @param table register pointing to CRC table */ void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, ! Register t0, Register t1, Register t2, Register t3, ! bool invertCRC) { assert_different_registers(crc, buf, len, table); Register data = t0; ! if (invertCRC) { ! not_(crc, noreg, false); // 1s complement of crc ! } ! ! update_byteLoop_crc32(crc, buf, len, table, data); ! ! if (invertCRC) { ! not_(crc, noreg, false); // 1s complement of crc ! } } ! void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, ! bool invertCRC) { assert_different_registers(crc, buf, len, table, tmp); ! if (invertCRC) { ! not_(crc, noreg, false); // 1s complement of crc ! } z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register. update_byte_crc32(crc, tmp, table); ! if (invertCRC) { ! not_(crc, noreg, false); // 1s complement of crc ! } ! } ! ! void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, ! bool invertCRC) { ! assert_different_registers(crc, val, table); ! ! if (invertCRC) { ! not_(crc, noreg, false); // 1s complement of crc ! } ! ! update_byte_crc32(crc, val, table); ! ! if (invertCRC) { ! not_(crc, noreg, false); // 1s complement of crc ! } } // // Code for BigInteger::multiplyToLen() intrinsic. //
< prev index next >