< prev index next >

src/hotspot/cpu/sparc/macroAssembler_sparc.cpp

Print this page
rev 51719 : [mq]: 8210676


2631 // SPARC refworkload performance - specifically jetstream and scimark - are
2632 // extremely sensitive to the size of the code emitted by compiler_lock_object
2633 // and compiler_unlock_object.  Critically, the key factor is code size, not path
2634 // length.  (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
2635 // effect).
2636 
2637 
2638 void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
2639                                           Register Rbox, Register Rscratch,
2640                                           BiasedLockingCounters* counters,
2641                                           bool try_bias) {
2642    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
2643 
2644    verify_oop(Roop);
2645    Label done ;
2646 
2647    if (counters != NULL) {
2648      inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
2649    }
2650 
2651    Label Egress ;
2652 
2653    // Aggressively avoid the Store-before-CAS penalty
2654    // Defer the store into box->dhw until after the CAS
2655    Label IsInflated, Recursive ;
2656 
2657 // Anticipate CAS -- Avoid RTS->RTO upgrade
2658 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
2659 
2660    ld_ptr(mark_addr, Rmark);           // fetch obj->mark
2661    // Triage: biased, stack-locked, neutral, inflated
2662 
2663    if (try_bias) {
2664      biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
2665      // Invariant: if control reaches this point in the emitted stream
2666      // then Rmark has not been modified.
2667    }
2668    andcc(Rmark, 2, G0);
2669    brx(Assembler::notZero, false, Assembler::pn, IsInflated);
2670    delayed()->                         // Beware - dangling delay-slot
2671 
2672    // Try stack-lock acquisition.


4154   srlx(crc, 8, crc);
4155   sllx(tmp, 2, tmp);
4156   lduw(table, tmp, tmp);
4157   xor3(tmp, crc, crc);
4158 }
4159 
4160 #define CRC32_TMP_REG_NUM 18
4161 
4162 #define CRC32_CONST_64  0x163cd6124
4163 #define CRC32_CONST_96  0x0ccaa009e
4164 #define CRC32_CONST_160 0x1751997d0
4165 #define CRC32_CONST_480 0x1c6e41596
4166 #define CRC32_CONST_544 0x154442bd4
4167 
4168 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table) {
4169 
4170   Label L_cleanup_loop, L_cleanup_check, L_align_loop, L_align_check;
4171   Label L_main_loop_prologue;
4172   Label L_fold_512b, L_fold_512b_loop, L_fold_128b;
4173   Label L_fold_tail, L_fold_tail_loop;
4174   Label L_8byte_fold_loop, L_8byte_fold_check;
4175 
4176   const Register tmp[CRC32_TMP_REG_NUM] = {L0, L1, L2, L3, L4, L5, L6, G1, I0, I1, I2, I3, I4, I5, I7, O4, O5, G3};
4177 
4178   Register const_64  = tmp[CRC32_TMP_REG_NUM-1];
4179   Register const_96  = tmp[CRC32_TMP_REG_NUM-1];
4180   Register const_160 = tmp[CRC32_TMP_REG_NUM-2];
4181   Register const_480 = tmp[CRC32_TMP_REG_NUM-1];
4182   Register const_544 = tmp[CRC32_TMP_REG_NUM-2];
4183 
4184   set(ExternalAddress(StubRoutines::crc_table_addr()), table);
4185 
4186   not1(crc); // ~c
4187   clruwu(crc); // clear upper 32 bits of crc
4188 
4189   // Check if below cutoff, proceed directly to cleanup code
4190   mov(31, G4);
4191   cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check);
4192 
4193   // Align buffer to 8 byte boundry
4194   mov(8, O5);




2631 // SPARC refworkload performance - specifically jetstream and scimark - are
2632 // extremely sensitive to the size of the code emitted by compiler_lock_object
2633 // and compiler_unlock_object.  Critically, the key factor is code size, not path
2634 // length.  (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
2635 // effect).
2636 
2637 
2638 void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
2639                                           Register Rbox, Register Rscratch,
2640                                           BiasedLockingCounters* counters,
2641                                           bool try_bias) {
2642    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
2643 
2644    verify_oop(Roop);
2645    Label done ;
2646 
2647    if (counters != NULL) {
2648      inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
2649    }
2650 


2651    // Aggressively avoid the Store-before-CAS penalty
2652    // Defer the store into box->dhw until after the CAS
2653    Label IsInflated, Recursive ;
2654 
2655 // Anticipate CAS -- Avoid RTS->RTO upgrade
2656 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
2657 
2658    ld_ptr(mark_addr, Rmark);           // fetch obj->mark
2659    // Triage: biased, stack-locked, neutral, inflated
2660 
2661    if (try_bias) {
2662      biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
2663      // Invariant: if control reaches this point in the emitted stream
2664      // then Rmark has not been modified.
2665    }
2666    andcc(Rmark, 2, G0);
2667    brx(Assembler::notZero, false, Assembler::pn, IsInflated);
2668    delayed()->                         // Beware - dangling delay-slot
2669 
2670    // Try stack-lock acquisition.


4152   srlx(crc, 8, crc);
4153   sllx(tmp, 2, tmp);
4154   lduw(table, tmp, tmp);
4155   xor3(tmp, crc, crc);
4156 }
4157 
4158 #define CRC32_TMP_REG_NUM 18
4159 
4160 #define CRC32_CONST_64  0x163cd6124
4161 #define CRC32_CONST_96  0x0ccaa009e
4162 #define CRC32_CONST_160 0x1751997d0
4163 #define CRC32_CONST_480 0x1c6e41596
4164 #define CRC32_CONST_544 0x154442bd4
4165 
4166 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table) {
4167 
4168   Label L_cleanup_loop, L_cleanup_check, L_align_loop, L_align_check;
4169   Label L_main_loop_prologue;
4170   Label L_fold_512b, L_fold_512b_loop, L_fold_128b;
4171   Label L_fold_tail, L_fold_tail_loop;
4172   Label L_8byte_fold_check;
4173 
4174   const Register tmp[CRC32_TMP_REG_NUM] = {L0, L1, L2, L3, L4, L5, L6, G1, I0, I1, I2, I3, I4, I5, I7, O4, O5, G3};
4175 
4176   Register const_64  = tmp[CRC32_TMP_REG_NUM-1];
4177   Register const_96  = tmp[CRC32_TMP_REG_NUM-1];
4178   Register const_160 = tmp[CRC32_TMP_REG_NUM-2];
4179   Register const_480 = tmp[CRC32_TMP_REG_NUM-1];
4180   Register const_544 = tmp[CRC32_TMP_REG_NUM-2];
4181 
4182   set(ExternalAddress(StubRoutines::crc_table_addr()), table);
4183 
4184   not1(crc); // ~c
4185   clruwu(crc); // clear upper 32 bits of crc
4186 
4187   // Check if below cutoff, proceed directly to cleanup code
4188   mov(31, G4);
4189   cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check);
4190 
4191   // Align buffer to 8 byte boundry
4192   mov(8, O5);


< prev index next >