< prev index next >

src/cpu/ppc/vm/macroAssembler_ppc.cpp

Print this page
rev 9821 : 8146613: PPC64: C2 does no longer respect int to long conversion for stub calls
Reviewed-by: goetz
   1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright 2012, 2015 SAP AG. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *


3639   const int ix0 = 3*(4*CRC32_COLUMN_SIZE);
3640   const int ix1 = 2*(4*CRC32_COLUMN_SIZE);
3641   const int ix2 = 1*(4*CRC32_COLUMN_SIZE);
3642   const int ix3 = 0*(4*CRC32_COLUMN_SIZE);
3643 #else
3644   // This is what we implement (the DOBIG4 part):
3645   // =========================================================================
3646   // #define DOBIG4 c ^= *++buf4; \
3647   //         c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
3648   //             crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
3649   // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
3650   // =========================================================================
3651   const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
3652   const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
3653   const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
3654   const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
3655 #endif
3656   assert_different_registers(table, tc0, tc1, tc2);
3657   assert(table == tc3, "must be!");
3658 
3659   if (ix0 != 0) addi(tc0, table, ix0);
3660   if (ix1 != 0) addi(tc1, table, ix1);
3661   if (ix2 != 0) addi(tc2, table, ix2);
3662   if (ix3 != 0) addi(tc3, table, ix3);
3663 
3664   return ix3;
3665 }
3666 
3667 /**
3668  * uint32_t crc;
3669  * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
3670  */
3671 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) {
3672   assert_different_registers(crc, table, tmp);
3673   assert_different_registers(val, table);
3674 
3675   if (crc == val) {                   // Must rotate first to use the unmodified value.
3676     rlwinm(tmp, val, 2, 24-2, 31-2);  // Insert (rightmost) byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
3677                                       // As we use a word (4-byte) instruction, we have to adapt the mask bit positions.
3678     srwi(crc, crc, 8);                // Unsigned shift, clear leftmost 8 bits.
3679   } else {
3680     srwi(crc, crc, 8);                // Unsigned shift, clear leftmost 8 bits.
3681     rlwinm(tmp, val, 2, 24-2, 31-2);  // Insert (rightmost) byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.


3707   BLOCK_COMMENT("update_byte_crc32:");
3708   xorr(val, val, crc);
3709   fold_byte_crc32(crc, val, table, val);
3710 }
3711 
3712 /**
3713  * @param crc   register containing existing CRC (32-bit)
3714  * @param buf   register pointing to input byte buffer (byte*)
3715  * @param len   register containing number of bytes
3716  * @param table register pointing to CRC table
3717  */
3718 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
3719                                            Register data, bool loopAlignment, bool invertCRC) {
3720   assert_different_registers(crc, buf, len, table, data);
3721 
3722   Label L_mainLoop, L_done;
3723   const int mainLoop_stepping  = 1;
3724   const int mainLoop_alignment = loopAlignment ? 32 : 4; // (InputForNewCode > 4 ? InputForNewCode : 32) : 4;
3725 
3726   // Process all bytes in a single-byte loop.
3727   cmpdi(CCR0, len, 0);                           // Anything to do?
3728   mtctr(len);
3729   beq(CCR0, L_done);
3730 
3731   if (invertCRC) {
3732     nand(crc, crc, crc);                         // ~c
3733   }
3734 

3735   align(mainLoop_alignment);
3736   BIND(L_mainLoop);
3737     lbz(data, 0, buf);                           // Byte from buffer, zero-extended.
3738     addi(buf, buf, mainLoop_stepping);           // Advance buffer position.
3739     update_byte_crc32(crc, data, table);
3740     bdnz(L_mainLoop);                            // Iterate.
3741 
3742   if (invertCRC) {
3743     nand(crc, crc, crc);                         // ~c
3744   }
3745 
3746   bind(L_done);
3747 }
3748 
3749 /**
3750  * Emits code to update CRC-32 with a 4-byte value according to constants in table
3751  * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c
3752  */
3753 // A not on the lookup table address(es):
3754 // The lookup table consists of two sets of four columns each.


3930     if (complexThreshold > mainLoop_stepping) {
3931       sub(len, len, tmp2);                       // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
3932     } else {
3933       sub(tmp, len, tmp2);                       // Remaining bytes for main loop.
3934       cmpdi(CCR0, tmp, mainLoop_stepping);
3935       blt(CCR0, L_tail);                         // For less than one mainloop_stepping left, do only tail processing
3936       mr(len, tmp);                              // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
3937     }
3938     update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
3939   }
3940 
3941   srdi(tmp2, len, log_stepping);                 // #iterations for mainLoop
3942   andi(len, len, mainLoop_stepping-1);           // remaining bytes for tailLoop
3943   mtctr(tmp2);
3944 
3945 #ifdef VM_LITTLE_ENDIAN
3946   Register crc_rv = crc;
3947 #else
3948   Register crc_rv = tmp;                         // Load_reverse needs separate registers to work on.
3949                                                  // Occupies tmp, but frees up crc.
3950   load_reverse_32(crc_rv, crc);                  // evert byte order because we are dealing with big-endian data.
3951   tmp = crc;
3952 #endif
3953 
3954   int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3);
3955 
3956   align(mainLoop_alignment);                     // Octoword-aligned loop address. Shows 2% improvement.
3957   BIND(L_mainLoop);
3958     update_1word_crc32(crc_rv, buf, table, 0, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
3959     bdnz(L_mainLoop);
3960 
3961 #ifndef VM_LITTLE_ENDIAN
3962   load_reverse_32(crc, crc_rv);                  // Revert byte order because we are dealing with big-endian data.
3963   tmp = crc_rv;                                  // Tmp uses it's original register again.
3964 #endif
3965 
3966   // Restore original table address for tailLoop.
3967   if (reconstructTableOffset != 0) {
3968     addi(table, table, -reconstructTableOffset);
3969   }
3970 


   1 /*
   2  * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright 2012, 2016 SAP AG. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *


3639   const int ix0 = 3*(4*CRC32_COLUMN_SIZE);
3640   const int ix1 = 2*(4*CRC32_COLUMN_SIZE);
3641   const int ix2 = 1*(4*CRC32_COLUMN_SIZE);
3642   const int ix3 = 0*(4*CRC32_COLUMN_SIZE);
3643 #else
3644   // This is what we implement (the DOBIG4 part):
3645   // =========================================================================
3646   // #define DOBIG4 c ^= *++buf4; \
3647   //         c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
3648   //             crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
3649   // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
3650   // =========================================================================
3651   const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
3652   const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
3653   const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
3654   const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
3655 #endif
3656   assert_different_registers(table, tc0, tc1, tc2);
3657   assert(table == tc3, "must be!");
3658 
3659   addi(tc0, table, ix0);
3660   addi(tc1, table, ix1);
3661   addi(tc2, table, ix2);
3662   if (ix3 != 0) addi(tc3, table, ix3);
3663 
3664   return ix3;
3665 }
3666 
3667 /**
3668  * uint32_t crc;
3669  * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
3670  */
3671 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) {
3672   assert_different_registers(crc, table, tmp);
3673   assert_different_registers(val, table);
3674 
3675   if (crc == val) {                   // Must rotate first to use the unmodified value.
3676     rlwinm(tmp, val, 2, 24-2, 31-2);  // Insert (rightmost) byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
3677                                       // As we use a word (4-byte) instruction, we have to adapt the mask bit positions.
3678     srwi(crc, crc, 8);                // Unsigned shift, clear leftmost 8 bits.
3679   } else {
3680     srwi(crc, crc, 8);                // Unsigned shift, clear leftmost 8 bits.
3681     rlwinm(tmp, val, 2, 24-2, 31-2);  // Insert (rightmost) byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.


3707   BLOCK_COMMENT("update_byte_crc32:");
3708   xorr(val, val, crc);
3709   fold_byte_crc32(crc, val, table, val);
3710 }
3711 
3712 /**
3713  * @param crc   register containing existing CRC (32-bit)
3714  * @param buf   register pointing to input byte buffer (byte*)
3715  * @param len   register containing number of bytes
3716  * @param table register pointing to CRC table
3717  */
3718 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
3719                                            Register data, bool loopAlignment, bool invertCRC) {
3720   assert_different_registers(crc, buf, len, table, data);
3721 
3722   Label L_mainLoop, L_done;
3723   const int mainLoop_stepping  = 1;
3724   const int mainLoop_alignment = loopAlignment ? 32 : 4; // (InputForNewCode > 4 ? InputForNewCode : 32) : 4;
3725 
3726   // Process all bytes in a single-byte loop.
3727   clrldi_(len, len, 32);                         // Enforce 32 bit. Anything to do?

3728   beq(CCR0, L_done);
3729 
3730   if (invertCRC) {
3731     nand(crc, crc, crc);                         // ~c
3732   }
3733 
3734   mtctr(len);
3735   align(mainLoop_alignment);
3736   BIND(L_mainLoop);
3737     lbz(data, 0, buf);                           // Byte from buffer, zero-extended.
3738     addi(buf, buf, mainLoop_stepping);           // Advance buffer position.
3739     update_byte_crc32(crc, data, table);
3740     bdnz(L_mainLoop);                            // Iterate.
3741 
3742   if (invertCRC) {
3743     nand(crc, crc, crc);                         // ~c
3744   }
3745 
3746   bind(L_done);
3747 }
3748 
3749 /**
3750  * Emits code to update CRC-32 with a 4-byte value according to constants in table
3751  * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c
3752  */
3753 // A not on the lookup table address(es):
3754 // The lookup table consists of two sets of four columns each.


3930     if (complexThreshold > mainLoop_stepping) {
3931       sub(len, len, tmp2);                       // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
3932     } else {
3933       sub(tmp, len, tmp2);                       // Remaining bytes for main loop.
3934       cmpdi(CCR0, tmp, mainLoop_stepping);
3935       blt(CCR0, L_tail);                         // For less than one mainloop_stepping left, do only tail processing
3936       mr(len, tmp);                              // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
3937     }
3938     update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
3939   }
3940 
3941   srdi(tmp2, len, log_stepping);                 // #iterations for mainLoop
3942   andi(len, len, mainLoop_stepping-1);           // remaining bytes for tailLoop
3943   mtctr(tmp2);
3944 
3945 #ifdef VM_LITTLE_ENDIAN
3946   Register crc_rv = crc;
3947 #else
3948   Register crc_rv = tmp;                         // Load_reverse needs separate registers to work on.
3949                                                  // Occupies tmp, but frees up crc.
3950   load_reverse_32(crc_rv, crc);                  // Revert byte order because we are dealing with big-endian data.
3951   tmp = crc;
3952 #endif
3953 
3954   int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3);
3955 
3956   align(mainLoop_alignment);                     // Octoword-aligned loop address. Shows 2% improvement.
3957   BIND(L_mainLoop);
3958     update_1word_crc32(crc_rv, buf, table, 0, mainLoop_stepping, crc_rv, t1, t2, t3, tc0, tc1, tc2, tc3);
3959     bdnz(L_mainLoop);
3960 
3961 #ifndef VM_LITTLE_ENDIAN
3962   load_reverse_32(crc, crc_rv);                  // Revert byte order because we are dealing with big-endian data.
3963   tmp = crc_rv;                                  // Tmp uses it's original register again.
3964 #endif
3965 
3966   // Restore original table address for tailLoop.
3967   if (reconstructTableOffset != 0) {
3968     addi(table, table, -reconstructTableOffset);
3969   }
3970 


< prev index next >