# HG changeset patch # User mdoerr # Date 1466591769 -7200 # Wed Jun 22 12:36:09 2016 +0200 # Node ID ff88bbb136f102a9de6401f1f0d428a4fae173cd # Parent ba5ca8b6e48ff904de806bf3ed3860f0b16e7a70 8159976: PPC64: Add missing intrinsics for sub-word atomics Reviewed-by: simonis diff --git a/src/cpu/ppc/vm/assembler_ppc.hpp b/src/cpu/ppc/vm/assembler_ppc.hpp --- a/src/cpu/ppc/vm/assembler_ppc.hpp +++ b/src/cpu/ppc/vm/assembler_ppc.hpp @@ -706,9 +706,13 @@ TW_OPCODE = (31u << OPCODE_SHIFT | 4u << 1), // Atomics. + LBARX_OPCODE = (31u << OPCODE_SHIFT | 52u << 1), + LHARX_OPCODE = (31u << OPCODE_SHIFT | 116u << 1), LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1), LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1), LQARX_OPCODE = (31u << OPCODE_SHIFT | 276u << 1), + STBCX_OPCODE = (31u << OPCODE_SHIFT | 694u << 1), + STHCX_OPCODE = (31u << OPCODE_SHIFT | 726u << 1), STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1), STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1), STQCX_OPCODE = (31u << OPCODE_SHIFT | 182u << 1) @@ -1796,13 +1800,19 @@ inline void waitrsv(); // >=Power7 // atomics + inline void lbarx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8 + inline void lharx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8 inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0); inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0); - inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0); + inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8 inline bool lxarx_hint_exclusive_access(); + inline void lbarx( Register d, Register a, Register b, bool hint_exclusive_access = false); + inline void lharx( Register d, Register a, Register b, bool hint_exclusive_access = false); inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inline void lqarx( Register d, Register a, Register b, bool hint_exclusive_access = false); + inline void stbcx_( Register s, Register a, Register b); + inline void sthcx_( Register s, Register a, Register b); inline void stwcx_( Register s, Register a, Register b); inline void stdcx_( Register s, Register a, Register b); inline void stqcx_( Register s, Register a, Register b); @@ -2169,12 +2179,18 @@ inline void dcbtstct(Register s2, int ct); // Atomics: use ra0mem to disallow R0 as base. + inline void lbarx_unchecked(Register d, Register b, int eh1); + inline void lharx_unchecked(Register d, Register b, int eh1); inline void lwarx_unchecked(Register d, Register b, int eh1); inline void ldarx_unchecked(Register d, Register b, int eh1); inline void lqarx_unchecked(Register d, Register b, int eh1); + inline void lbarx( Register d, Register b, bool hint_exclusive_access); + inline void lharx( Register d, Register b, bool hint_exclusive_access); inline void lwarx( Register d, Register b, bool hint_exclusive_access); inline void ldarx( Register d, Register b, bool hint_exclusive_access); inline void lqarx( Register d, Register b, bool hint_exclusive_access); + inline void stbcx_(Register s, Register b); + inline void sthcx_(Register s, Register b); inline void stwcx_(Register s, Register b); inline void stdcx_(Register s, Register b); inline void stqcx_(Register s, Register b); diff --git a/src/cpu/ppc/vm/assembler_ppc.inline.hpp b/src/cpu/ppc/vm/assembler_ppc.inline.hpp --- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp +++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp @@ -594,13 +594,19 @@ // atomics // Use ra0mem to disallow R0 as base. +inline void Assembler::lbarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LBARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } +inline void Assembler::lharx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LHARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); } +inline void Assembler::lbarx( Register d, Register a, Register b, bool hint_exclusive_access) { lbarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lharx( Register d, Register a, Register b, bool hint_exclusive_access) { lharx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::stbcx_(Register s, Register a, Register b) { emit_int32( STBCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } +inline void Assembler::sthcx_(Register s, Register a, Register b) { emit_int32( STHCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } inline void Assembler::stqcx_(Register s, Register a, Register b) { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } @@ -933,12 +939,18 @@ inline void Assembler::dcbtstct(Register s2, int ct) { emit_int32( DCBTST_OPCODE | rb(s2) | thct(ct)); } // ra0 version +inline void Assembler::lbarx_unchecked(Register d, Register b, int eh1) { emit_int32( LBARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } +inline void Assembler::lharx_unchecked(Register d, Register b, int eh1) { emit_int32( LHARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } +inline void Assembler::lbarx( Register d, Register b, bool hint_exclusive_access){ lbarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lharx( Register d, Register b, bool hint_exclusive_access){ lharx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::stbcx_(Register s, Register b) { emit_int32( STBCX_OPCODE | rs(s) | rb(b) | rc(1)); } +inline void Assembler::sthcx_(Register s, Register b) { emit_int32( STHCX_OPCODE | rs(s) | rb(b) | rc(1)); } inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); } inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); } inline void Assembler::stqcx_(Register s, Register b) { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); } diff --git a/src/cpu/ppc/vm/macroAssembler_ppc.cpp b/src/cpu/ppc/vm/macroAssembler_ppc.cpp --- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp +++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp @@ -1422,42 +1422,157 @@ bind(no_reserved_zone_enabling); } -// CmpxchgX sets condition register to cmpX(current, compare). -void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_value, - Register compare_value, Register exchange_value, - Register addr_base, int semantics, bool cmpxchgx_hint, - Register int_flag_success, bool contention_hint, bool weak) { +void MacroAssembler::getandsetd(Register dest_current_value, Register exchange_value, Register addr_base, + bool cmpxchgx_hint) { Label retry; - Label failed; - Label done; - - // Save one branch if result is returned via register and - // result register is different from the other ones. - bool use_result_reg = (int_flag_success != noreg); - bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value && - int_flag_success != exchange_value && int_flag_success != addr_base); - assert(!weak || flag == CCR0, "weak only supported with CCR0"); - - if (use_result_reg && preset_result_reg) { - li(int_flag_success, 0); // preset (assume cas failed) + bind(retry); + ldarx(dest_current_value, addr_base, cmpxchgx_hint); + stdcx_(exchange_value, addr_base); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0. + } else { + bne( CCR0, retry); // StXcx_ sets CCR0. } - - // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). - if (contention_hint) { // Don't try to reserve if cmp fails. - lwz(dest_current_value, 0, addr_base); - cmpw(flag, dest_current_value, compare_value); - bne(flag, failed); +} + +void MacroAssembler::getandaddd(Register dest_current_value, Register inc_value, Register addr_base, + Register tmp, bool cmpxchgx_hint) { + Label retry; + bind(retry); + ldarx(dest_current_value, addr_base, cmpxchgx_hint); + add(tmp, dest_current_value, inc_value); + stdcx_(tmp, addr_base); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0. + } else { + bne( CCR0, retry); // StXcx_ sets CCR0. } - - // release/fence semantics - if (semantics & MemBarRel) { - release(); +} + +// Word/sub-word atomic helper functions + +// Temps and addr_base are killed if size < 4 and processor does not support respective instructions. +// Atomic add always kills tmp1. +void MacroAssembler::atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value, + Register addr_base, Register tmp1, Register tmp2, Register tmp3, + bool cmpxchgx_hint, bool is_add, int size) { + int instruction_type = VM_Version::has_lqarx() ? size : 4; // Sub-word instructions available since Power 8. + + Label retry; + Register shift_amount = noreg, + val32 = dest_current_value, + modval = is_add ? tmp1 : exchange_value; + + if (instruction_type != size) { + assert_different_registers(tmp1, tmp2, tmp3, dest_current_value, exchange_value, addr_base); + modval = tmp1; + shift_amount = tmp2; + val32 = tmp3; + // Need some preperation: Compute shift amount, align address. Note: shorts must be 2 byte aligned. +#ifdef VM_LITTLE_ENDIAN + rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8; + clrrdi(addr_base, addr_base, 2); +#else + xori(shift_amount, addr_base, (size == 1) ? 3 : 2); + clrrdi(addr_base, addr_base, 2); + rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16; +#endif } // atomic emulation loop bind(retry); - lwarx(dest_current_value, addr_base, cmpxchgx_hint); + switch (instruction_type) { + case 4: lwarx(val32, addr_base, cmpxchgx_hint); break; + case 2: lharx(val32, addr_base, cmpxchgx_hint); break; + case 1: lbarx(val32, addr_base, cmpxchgx_hint); break; + default: ShouldNotReachHere(); + } + + if (instruction_type != size) { + srw(dest_current_value, val32, shift_amount); + } + + if (is_add) { add(modval, dest_current_value, exchange_value); } + + if (instruction_type != size) { + // Transform exchange value such that the replacement can be done by one xor instruction + xorr(modval, dest_current_value, is_add ? modval : exchange_value); + clrldi(modval, modval, (size == 1) ? 56 : 48); + slw(modval, modval, shift_amount); + xorr(modval, val32, modval); + } + + switch (instruction_type) { + case 4: stwcx_(modval, addr_base); break; + case 2: sthcx_(modval, addr_base); break; + case 1: stbcx_(modval, addr_base); break; + default: ShouldNotReachHere(); + } + + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0. + } else { + bne( CCR0, retry); // StXcx_ sets CCR0. + } + + if (size == 1) { + extsb(dest_current_value, dest_current_value); + } else if (size == 2) { + extsh(dest_current_value, dest_current_value); + }; +} + +// Temps, addr_base and exchange_value are killed if size < 4 and processor does not support respective instructions. +void MacroAssembler::cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value, + Register compare_value, Register exchange_value, + Register addr_base, Register tmp1, Register tmp2, + Label &retry, Label &failed, bool cmpxchgx_hint, int size) { + int instruction_type = VM_Version::has_lqarx() ? size : 4; // Sub-word instructions available since Power 8. + + Register shift_amount = noreg, + val32 = dest_current_value, + modval = exchange_value; + + if (instruction_type != size) { + assert_different_registers(tmp1, tmp2, dest_current_value, compare_value, exchange_value, addr_base); + shift_amount = tmp1; + val32 = tmp2; + modval = tmp2; + // Need some preperation: Compute shift amount, align address. Note: shorts must be 2 byte aligned. +#ifdef VM_LITTLE_ENDIAN + rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8; + clrrdi(addr_base, addr_base, 2); +#else + xori(shift_amount, addr_base, (size == 1) ? 3 : 2); + clrrdi(addr_base, addr_base, 2); + rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16; +#endif + // Transform exchange value such that the replacement can be done by one xor instruction. + xorr(exchange_value, compare_value, exchange_value); + clrldi(exchange_value, exchange_value, (size == 1) ? 56 : 48); + slw(exchange_value, exchange_value, shift_amount); + } + + // atomic emulation loop + bind(retry); + + switch (instruction_type) { + case 4: lwarx(val32, addr_base, cmpxchgx_hint); break; + case 2: lharx(val32, addr_base, cmpxchgx_hint); break; + case 1: lbarx(val32, addr_base, cmpxchgx_hint); break; + default: ShouldNotReachHere(); + } + + if (instruction_type != size) { + srw(dest_current_value, val32, shift_amount); + } + if (size == 1) { + extsb(dest_current_value, dest_current_value); + } else if (size == 2) { + extsh(dest_current_value, dest_current_value); + }; + cmpw(flag, dest_current_value, compare_value); if (UseStaticBranchPredictionInCompareAndSwapPPC64) { bne_predict_not_taken(flag, failed); @@ -1467,7 +1582,60 @@ // branch to done => (flag == ne), (dest_current_value != compare_value) // fall through => (flag == eq), (dest_current_value == compare_value) - stwcx_(exchange_value, addr_base); + if (instruction_type != size) { + xorr(modval, val32, exchange_value); + } + + switch (instruction_type) { + case 4: stwcx_(modval, addr_base); break; + case 2: sthcx_(modval, addr_base); break; + case 1: stbcx_(modval, addr_base); break; + default: ShouldNotReachHere(); + } +} + +// CmpxchgX sets condition register to cmpX(current, compare). +void MacroAssembler::cmpxchg_generic(ConditionRegister flag, Register dest_current_value, + Register compare_value, Register exchange_value, + Register addr_base, Register tmp1, Register tmp2, + int semantics, bool cmpxchgx_hint, + Register int_flag_success, bool contention_hint, bool weak, int size) { + Label retry; + Label failed; + Label done; + + // Save one branch if result is returned via register and + // result register is different from the other ones. + bool use_result_reg = (int_flag_success != noreg); + bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value && + int_flag_success != exchange_value && int_flag_success != addr_base && + int_flag_success != tmp1 && int_flag_success != tmp2); + assert(!weak || flag == CCR0, "weak only supported with CCR0"); + assert(size == 1 || size == 2 || size == 4, "unsupported"); + + if (use_result_reg && preset_result_reg) { + li(int_flag_success, 0); // preset (assume cas failed) + } + + // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). + if (contention_hint) { // Don't try to reserve if cmp fails. + switch (size) { + case 1: lbz(dest_current_value, 0, addr_base); extsb(dest_current_value, dest_current_value); break; + case 2: lha(dest_current_value, 0, addr_base); break; + case 4: lwz(dest_current_value, 0, addr_base); break; + default: ShouldNotReachHere(); + } + cmpw(flag, dest_current_value, compare_value); + bne(flag, failed); + } + + // release/fence semantics + if (semantics & MemBarRel) { + release(); + } + + cmpxchg_loop_body(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, + retry, failed, cmpxchgx_hint, size); if (!weak || use_result_reg) { if (UseStaticBranchPredictionInCompareAndSwapPPC64) { bne_predict_not_taken(CCR0, weak ? failed : retry); // StXcx_ sets CCR0. @@ -3751,454 +3919,6 @@ bind(Ldone); } - -// Intrinsics for non-CompactStrings - -// Search for a single jchar in an jchar[]. -// -// Assumes that result differs from all other registers. -// -// 'haystack' is the addresses of a jchar-array. -// 'needle' is either the character to search for or R0. -// 'needleChar' is the character to search for if 'needle' == R0.. -// 'haycnt' is the length of the haystack. We assume 'haycnt' >=1. -// -// Preserves haystack, haycnt, needle and kills all other registers. -// -// If needle == R0, we search for the constant needleChar. -void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt, - Register needle, jchar needleChar, - Register tmp1, Register tmp2) { - - assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2); - - Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End; - Register addr = tmp1, - ch1 = tmp2, - ch2 = R0; - -//3: - dcbtct(haystack, 0x00); // Indicate R/O access to haystack. - - srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR). - mr(addr, haystack); - beq(CCR0, L_FinalCheck); - mtctr(tmp2); // Move to count register. -//8: - bind(L_InnerLoop); // Main work horse (2x unrolled search loop). - lhz(ch1, 0, addr); // Load characters from haystack. - lhz(ch2, 2, addr); - (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, needleChar); - (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, needleChar); - beq(CCR0, L_Found1); // Did we find the needle? - beq(CCR1, L_Found2); - addi(addr, addr, 4); - bdnz(L_InnerLoop); -//16: - bind(L_FinalCheck); - andi_(R0, haycnt, 1); - beq(CCR0, L_NotFound); - lhz(ch1, 0, addr); // One position left at which we have to compare. - (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, needleChar); - beq(CCR1, L_Found3); -//21: - bind(L_NotFound); - li(result, -1); // Not found. - b(L_End); - - bind(L_Found2); - addi(addr, addr, 2); -//24: - bind(L_Found1); - bind(L_Found3); // Return index ... - subf(addr, haystack, addr); // relative to haystack, - srdi(result, addr, 1); // in characters. - bind(L_End); -} - - -// Implementation of IndexOf for jchar arrays. -// -// The length of haystack and needle are not constant, i.e. passed in a register. -// -// Preserves registers haystack, needle. -// Kills registers haycnt, needlecnt. -// Assumes that result differs from all other registers. -// Haystack, needle are the addresses of jchar-arrays. -// Haycnt, needlecnt are the lengths of them, respectively. -// -// Needlecntval must be zero or 15-bit unsigned immediate and > 1. -void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt, - Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, - Register tmp1, Register tmp2, Register tmp3, Register tmp4) { - - // Ensure 0=2, bail out otherwise. - // ************************************************************************************************** - -//1 (variable) or 3 (const): - dcbtct(needle, 0x00); // Indicate R/O access to str1. - dcbtct(haystack, 0x00); // Indicate R/O access to str2. - - // Compute last haystack addr to use if no match gets found. - if (needlecntval == 0) { // variable needlecnt -//3: - subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt. - addi(addr, haystack, -2); // Accesses use pre-increment. - cmpwi(CCR6, needlecnt, 2); - blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately. - slwi(ch1, ch1, 1); // Scale to number of bytes. - lwz(n_start, 0, needle); // Load first 2 characters of needle. - add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). - addi(needlecnt, needlecnt, -2); // Rest of needle. - } else { // constant needlecnt - guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately"); - assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate"); -//5: - addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt. - lwz(n_start, 0, needle); // Load first 2 characters of needle. - addi(addr, haystack, -2); // Accesses use pre-increment. - slwi(ch1, ch1, 1); // Scale to number of bytes. - add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). - li(needlecnt, needlecntval-2); // Rest of needle. - } - - // Main Loop (now we have at least 3 characters). -//11: - Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2, L_Comp3; - bind(L_OuterLoop); // Search for 1st 2 characters. - Register addr_diff = tmp4; - subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check. - addi(addr, addr, 2); // This is the new address we want to use for comparing. - srdi_(ch2, addr_diff, 2); - beq(CCR0, L_FinalCheck); // 2 characters left? - mtctr(ch2); // addr_diff/4 -//16: - bind(L_InnerLoop); // Main work horse (2x unrolled search loop) - lwz(ch1, 0, addr); // Load 2 characters of haystack (ignore alignment). - lwz(ch2, 2, addr); - cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop). - cmpw(CCR1, ch2, n_start); - beq(CCR0, L_Comp1); // Did we find the needle start? - beq(CCR1, L_Comp2); - addi(addr, addr, 4); - bdnz(L_InnerLoop); -//24: - bind(L_FinalCheck); - rldicl_(addr_diff, addr_diff, 64-1, 63); // Remaining characters not covered by InnerLoop: (addr_diff>>1)&1. - beq(CCR0, L_NotFound); - lwz(ch1, 0, addr); // One position left at which we have to compare. - cmpw(CCR1, ch1, n_start); - beq(CCR1, L_Comp3); -//29: - bind(L_NotFound); - li(result, -1); // not found - b(L_End); - - - // ************************************************************************************************** - // Special Case: unfortunately, the variable needle case can be called with needlecnt<2 - // ************************************************************************************************** -//31: - if ((needlecntval>>1) !=1 ) { // Const needlecnt is 2 or 3? Reduce code size. - int nopcnt = 5; - if (needlecntval !=0 ) ++nopcnt; // Balance alignment (other case: see below). - if (needlecntval == 0) { // We have to handle these cases separately. - Label L_OneCharLoop; - bind(L_TooShort); - mtctr(haycnt); - lhz(n_start, 0, needle); // First character of needle - bind(L_OneCharLoop); - lhzu(ch1, 2, addr); - cmpw(CCR1, ch1, n_start); - beq(CCR1, L_Found); // Did we find the one character needle? - bdnz(L_OneCharLoop); - li(result, -1); // Not found. - b(L_End); - } // 8 instructions, so no impact on alignment. - for (int x = 0; x < nopcnt; ++x) nop(); - } - - // ************************************************************************************************** - // Regular Case Part II: compare rest of needle (first 2 characters have been compared already) - // ************************************************************************************************** - - // Compare the rest -//36 if needlecntval==0, else 37: - bind(L_Comp2); - addi(addr, addr, 2); // First comparison has failed, 2nd one hit. - bind(L_Comp1); // Addr points to possible needle start. - bind(L_Comp3); // Could have created a copy and use a different return address but saving code size here. - if (needlecntval != 2) { // Const needlecnt==2? - if (needlecntval != 3) { - if (needlecntval == 0) beq(CCR6, L_Found); // Variable needlecnt==2? - Register ind_reg = tmp4; - li(ind_reg, 2*2); // First 2 characters are already compared, use index 2. - mtctr(needlecnt); // Decremented by 2, still > 0. -//40: - Label L_CompLoop; - bind(L_CompLoop); - lhzx(ch2, needle, ind_reg); - lhzx(ch1, addr, ind_reg); - cmpw(CCR1, ch1, ch2); - bne(CCR1, L_OuterLoop); - addi(ind_reg, ind_reg, 2); - bdnz(L_CompLoop); - } else { // No loop required if there's only one needle character left. - lhz(ch2, 2*2, needle); - lhz(ch1, 2*2, addr); - cmpw(CCR1, ch1, ch2); - bne(CCR1, L_OuterLoop); - } - } - // Return index ... -//46: - bind(L_Found); - subf(addr, haystack, addr); // relative to haystack, ... - srdi(result, addr, 1); // in characters. -//48: - bind(L_End); -} - -// Implementation of Compare for jchar arrays. -// -// Kills the registers str1, str2, cnt1, cnt2. -// Kills cr0, ctr. -// Assumes that result differes from the input registers. -void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg, - Register result_reg, Register tmp_reg) { - assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg); - - Label Ldone, Lslow_case, Lslow_loop, Lfast_loop; - Register cnt_diff = R0, - limit_reg = cnt1_reg, - chr1_reg = result_reg, - chr2_reg = cnt2_reg, - addr_diff = str2_reg; - - // 'cnt_reg' contains the number of characters in the string's character array for the - // pre-CompactStrings strings implementation and the number of bytes in the string's - // byte array for the CompactStrings strings implementation. - const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array - - // Offset 0 should be 32 byte aligned. -//-6: - srawi(cnt1_reg, cnt1_reg, HAS_COMPACT_STRING); - srawi(cnt2_reg, cnt2_reg, HAS_COMPACT_STRING); -//-4: - dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. - dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. -//-2: - // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters). - subf(result_reg, cnt2_reg, cnt1_reg); // difference between cnt1/2 - subf_(addr_diff, str1_reg, str2_reg); // alias? - beq(CCR0, Ldone); // return cnt difference if both ones are identical - srawi(limit_reg, result_reg, 31); // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow) - mr(cnt_diff, result_reg); - andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt14 characters for fast loop - andi(limit_reg, tmp_reg, 4-1); // remaining characters - - // Adapt str1_reg str2_reg for the first loop iteration - mtctr(chr2_reg); // (min(cnt1, cnt2)-1)/4 - addi(limit_reg, limit_reg, 4+1); // compare last 5-8 characters in slow_case if mismatch found in fast_loop -//16: - // Compare the rest of the characters - bind(Lfast_loop); - ld(chr1_reg, 0, str1_reg); - ldx(chr2_reg, str1_reg, addr_diff); - cmpd(CCR0, chr2_reg, chr1_reg); - bne(CCR0, Lslow_case); // return chr1_reg - addi(str1_reg, str1_reg, 4*2); - bdnz(Lfast_loop); - addi(limit_reg, limit_reg, -4); // no mismatch found in fast_loop, only 1-4 characters missing -//23: - bind(Lslow_case); - mtctr(limit_reg); -//24: - bind(Lslow_loop); - lhz(chr1_reg, 0, str1_reg); - lhzx(chr2_reg, str1_reg, addr_diff); - subf_(result_reg, chr2_reg, chr1_reg); - bne(CCR0, Ldone); // return chr1_reg - addi(str1_reg, str1_reg, 1*2); - bdnz(Lslow_loop); -//30: - // If strings are equal up to min length, return the length difference. - mr(result_reg, cnt_diff); - nop(); // alignment -//32: - // Otherwise, return the difference between the first mismatched chars. - bind(Ldone); -} - - -// Compare char[] arrays. -// -// str1_reg USE only -// str2_reg USE only -// cnt_reg USE_DEF, due to tmp reg shortage -// result_reg DEF only, might compromise USE only registers -void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg, - Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg, - Register tmp5_reg) { - - // Str1 may be the same register as str2 which can occur e.g. after scalar replacement. - assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg); - assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg); - - // Offset 0 should be 32 byte aligned. - Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false; - Register index_reg = tmp5_reg; - Register cbc_iter = tmp4_reg; - - // 'cnt_reg' contains the number of characters in the string's character array for the - // pre-CompactStrings strings implementation and the number of bytes in the string's - // byte array for the CompactStrings strings implementation. - const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array - -//-1: - dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. - dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. -//1: - // cbc_iter: remaining characters after the '4 java characters per iteration' loop. - rlwinm(cbc_iter, cnt_reg, 32 - HAS_COMPACT_STRING, 30, 31); // (cnt_reg % (HAS_COMPACT_STRING ? 8 : 4)) >> HAS_COMPACT_STRING - li(index_reg, 0); // init - li(result_reg, 0); // assume false - // tmp2_reg: units of 4 java characters (i.e. 8 bytes) per iteration (main loop). - srwi_(tmp2_reg, cnt_reg, exact_log2(4 << HAS_COMPACT_STRING)); // cnt_reg / (HAS_COMPACT_STRING ? 8 : 4) - - cmpwi(CCR1, cbc_iter, 0); // CCR1 = (cbc_iter==0) - beq(CCR0, Linit_cbc); // too short - mtctr(tmp2_reg); -//8: - bind(Lloop); - ldx(tmp1_reg, str1_reg, index_reg); - ldx(tmp2_reg, str2_reg, index_reg); - cmpd(CCR0, tmp1_reg, tmp2_reg); - bne(CCR0, Ldone_false); // Unequal char pair found -> done. - addi(index_reg, index_reg, 4*sizeof(jchar)); - bdnz(Lloop); -//14: - bind(Linit_cbc); - beq(CCR1, Ldone_true); - mtctr(cbc_iter); -//16: - bind(Lcbc); - lhzx(tmp1_reg, str1_reg, index_reg); - lhzx(tmp2_reg, str2_reg, index_reg); - cmpw(CCR0, tmp1_reg, tmp2_reg); - bne(CCR0, Ldone_false); // Unequal char pair found -> done. - addi(index_reg, index_reg, 1*sizeof(jchar)); - bdnz(Lcbc); - nop(); - bind(Ldone_true); - li(result_reg, 1); -//24: - bind(Ldone_false); -} - - -void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg, - Register tmp1_reg, Register tmp2_reg) { - // Str1 may be the same register as str2 which can occur e.g. after scalar replacement. - assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg); - assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg); - assert(sizeof(jchar) == 2, "must be"); - assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate"); - - // 'cntval' contains the number of characters in the string's character array for the - // pre-CompactStrings strings implementation and the number of bytes in the string's - // byte array for the CompactStrings strings implementation. - cntval >>= (java_lang_String::has_coder_field() ? 1 : 0); // '1' = byte array strings, '0' = char array strings - - Label Ldone_false; - - if (cntval < 16) { // short case - if (cntval != 0) li(result_reg, 0); // assume false - - const int num_bytes = cntval*sizeof(jchar); - int index = 0; - for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) { - ld(tmp1_reg, index, str1_reg); - ld(tmp2_reg, index, str2_reg); - cmpd(CCR0, tmp1_reg, tmp2_reg); - bne(CCR0, Ldone_false); - } - if (cntval & 2) { - lwz(tmp1_reg, index, str1_reg); - lwz(tmp2_reg, index, str2_reg); - cmpw(CCR0, tmp1_reg, tmp2_reg); - bne(CCR0, Ldone_false); - index += 4; - } - if (cntval & 1) { - lhz(tmp1_reg, index, str1_reg); - lhz(tmp2_reg, index, str2_reg); - cmpw(CCR0, tmp1_reg, tmp2_reg); - bne(CCR0, Ldone_false); - } - // fallthrough: true - } else { - Label Lloop; - Register index_reg = tmp1_reg; - const int loopcnt = cntval/4; - assert(loopcnt > 0, "must be"); - // Offset 0 should be 32 byte aligned. - //2: - dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. - dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. - li(tmp2_reg, loopcnt); - li(index_reg, 0); // init - li(result_reg, 0); // assume false - mtctr(tmp2_reg); - //8: - bind(Lloop); - ldx(R0, str1_reg, index_reg); - ldx(tmp2_reg, str2_reg, index_reg); - cmpd(CCR0, R0, tmp2_reg); - bne(CCR0, Ldone_false); // Unequal char pair found -> done. - addi(index_reg, index_reg, 4*sizeof(jchar)); - bdnz(Lloop); - //14: - if (cntval & 2) { - lwzx(R0, str1_reg, index_reg); - lwzx(tmp2_reg, str2_reg, index_reg); - cmpw(CCR0, R0, tmp2_reg); - bne(CCR0, Ldone_false); - if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar)); - } - if (cntval & 1) { - lhzx(R0, str1_reg, index_reg); - lhzx(tmp2_reg, str2_reg, index_reg); - cmpw(CCR0, R0, tmp2_reg); - bne(CCR0, Ldone_false); - } - // fallthru: true - } - li(result_reg, 1); - bind(Ldone_false); -} - #endif // Compiler2 // Helpers for Intrinsic Emitters diff --git a/src/cpu/ppc/vm/macroAssembler_ppc.hpp b/src/cpu/ppc/vm/macroAssembler_ppc.hpp --- a/src/cpu/ppc/vm/macroAssembler_ppc.hpp +++ b/src/cpu/ppc/vm/macroAssembler_ppc.hpp @@ -431,10 +431,77 @@ MemBarAcq = 2, MemBarFenceAfter = 4 // use powers of 2 }; + private: + // Helper functions for word/sub-word atomics. + void atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value, + Register addr_base, Register tmp1, Register tmp2, Register tmp3, + bool cmpxchgx_hint, bool is_add, int size); + void cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value, + Register compare_value, Register exchange_value, + Register addr_base, Register tmp1, Register tmp2, + Label &retry, Label &failed, bool cmpxchgx_hint, int size); + void cmpxchg_generic(ConditionRegister flag, + Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, + Register tmp1, Register tmp2, + int semantics, bool cmpxchgx_hint, Register int_flag_success, bool contention_hint, bool weak, int size); + public: + // Temps and addr_base are killed if processor does not support Power 8 instructions. + void getandsetb(Register dest_current_value, Register exchange_value, Register addr_base, + Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { + atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 1); + } + // Temps and addr_base are killed if processor does not support Power 8 instructions. + void getandseth(Register dest_current_value, Register exchange_value, Register addr_base, + Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { + atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 2); + } + void getandsetw(Register dest_current_value, Register exchange_value, Register addr_base, + bool cmpxchgx_hint) { + atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, noreg, noreg, noreg, cmpxchgx_hint, false, 4); + } + void getandsetd(Register dest_current_value, Register exchange_value, Register addr_base, + bool cmpxchgx_hint); + // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed). + void getandaddb(Register dest_current_value, Register inc_value, Register addr_base, + Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { + atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 1); + } + // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed). + void getandaddh(Register dest_current_value, Register inc_value, Register addr_base, + Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { + atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 2); + } + void getandaddw(Register dest_current_value, Register inc_value, Register addr_base, + Register tmp1, bool cmpxchgx_hint) { + atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, noreg, noreg, cmpxchgx_hint, true, 4); + } + void getandaddd(Register dest_current_value, Register exchange_value, Register addr_base, + Register tmp, bool cmpxchgx_hint); + // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions. + // compare_value must be at least 32 bit sign extended. + void cmpxchgb(ConditionRegister flag, + Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, + Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false, + Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) { + cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, + semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 1); + } + // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions. + // compare_value must be at least 32 bit sign extended. + void cmpxchgh(ConditionRegister flag, + Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, + Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false, + Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) { + cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, + semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 2); + } void cmpxchgw(ConditionRegister flag, Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, int semantics, bool cmpxchgx_hint = false, - Register int_flag_success = noreg, bool contention_hint = false, bool weak = false); + Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) { + cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, noreg, noreg, + semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 4); + } void cmpxchgd(ConditionRegister flag, Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, Register addr_base, int semantics, bool cmpxchgx_hint = false, @@ -717,23 +784,6 @@ Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte); void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2); - - // Intrinsics for non-CompactStrings - // Needle of length 1. - void string_indexof_1(Register result, Register haystack, Register haycnt, - Register needle, jchar needleChar, - Register tmp1, Register tmp2); - // General indexof, eventually with constant needle length. - void string_indexof(Register result, Register haystack, Register haycnt, - Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, - Register tmp1, Register tmp2, Register tmp3, Register tmp4); - void string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg, - Register result_reg, Register tmp_reg); - void char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg, - Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg, - Register tmp5_reg); - void char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg, - Register tmp1_reg, Register tmp2_reg); #endif // Emitters for BigInteger.multiplyToLen intrinsic. diff --git a/src/cpu/ppc/vm/ppc.ad b/src/cpu/ppc/vm/ppc.ad --- a/src/cpu/ppc/vm/ppc.ad +++ b/src/cpu/ppc/vm/ppc.ad @@ -965,41 +965,9 @@ // is the number of bytes (not instructions) which will be inserted before // the instruction. The padding must match the size of a NOP instruction. -int string_indexOf_imm1_charNode::compute_padding(int current_offset) const { - return (3*4-current_offset)&31; // see MacroAssembler::string_indexof_1 -} - -int string_indexOf_imm1Node::compute_padding(int current_offset) const { - return (3*4-current_offset)&31; // see MacroAssembler::string_indexof_1 -} - -int string_indexOfCharNode::compute_padding(int current_offset) const { - return (3*4-current_offset)&31; // see MacroAssembler::string_indexof_1 -} - -int string_indexOf_immNode::compute_padding(int current_offset) const { - return (3*4-current_offset)&31; // see MacroAssembler::string_indexof(constant needlecount) -} - -int string_indexOfNode::compute_padding(int current_offset) const { - return (1*4-current_offset)&31; // see MacroAssembler::string_indexof(variable needlecount) -} - -int string_compareNode::compute_padding(int current_offset) const { - return (2*4-current_offset)&31; // see MacroAssembler::string_compare -} - -int string_equals_immNode::compute_padding(int current_offset) const { - if (opnd_array(3)->constant() < 16) return 0; // For strlen < 16 no nops because loop completely unrolled - return (2*4-current_offset)&31; // Genral case - see MacroAssembler::char_arrays_equalsImm -} - -int string_equalsNode::compute_padding(int current_offset) const { - return (7*4-current_offset)&31; // see MacroAssembler::char_arrays_equals -} - int inlineCallClearArrayNode::compute_padding(int current_offset) const { - return (2*4-current_offset)&31; // see MacroAssembler::clear_memory_doubleword + int desired_padding = (2*4-current_offset)&31; // see MacroAssembler::clear_memory_doubleword + return (desired_padding <= 3*4) ? desired_padding : 0; } //============================================================================= @@ -3064,121 +3032,6 @@ __ bind(done); %} - // New atomics. - enc_class enc_GetAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - - MacroAssembler _masm(&cbuf); - Register Rtmp = R0; - Register Rres = $res$$Register; - Register Rsrc = $src$$Register; - Register Rptr = $mem_ptr$$Register; - bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); - Register Rold = RegCollision ? Rtmp : Rres; - - Label Lretry; - __ bind(Lretry); - __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); - __ add(Rtmp, Rsrc, Rold); - __ stwcx_(Rtmp, Rptr); - if (UseStaticBranchPredictionInCompareAndSwapPPC64) { - __ bne_predict_not_taken(CCR0, Lretry); - } else { - __ bne( CCR0, Lretry); - } - if (RegCollision) __ subf(Rres, Rsrc, Rtmp); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - __ sync(); - } - %} - - enc_class enc_GetAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - - MacroAssembler _masm(&cbuf); - Register Rtmp = R0; - Register Rres = $res$$Register; - Register Rsrc = $src$$Register; - Register Rptr = $mem_ptr$$Register; - bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); - Register Rold = RegCollision ? Rtmp : Rres; - - Label Lretry; - __ bind(Lretry); - __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); - __ add(Rtmp, Rsrc, Rold); - __ stdcx_(Rtmp, Rptr); - if (UseStaticBranchPredictionInCompareAndSwapPPC64) { - __ bne_predict_not_taken(CCR0, Lretry); - } else { - __ bne( CCR0, Lretry); - } - if (RegCollision) __ subf(Rres, Rsrc, Rtmp); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - __ sync(); - } - %} - - enc_class enc_GetAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - - MacroAssembler _masm(&cbuf); - Register Rtmp = R0; - Register Rres = $res$$Register; - Register Rsrc = $src$$Register; - Register Rptr = $mem_ptr$$Register; - bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); - Register Rold = RegCollision ? Rtmp : Rres; - - Label Lretry; - __ bind(Lretry); - __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); - __ stwcx_(Rsrc, Rptr); - if (UseStaticBranchPredictionInCompareAndSwapPPC64) { - __ bne_predict_not_taken(CCR0, Lretry); - } else { - __ bne( CCR0, Lretry); - } - if (RegCollision) __ mr(Rres, Rtmp); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - __ sync(); - } - %} - - enc_class enc_GetAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - - MacroAssembler _masm(&cbuf); - Register Rtmp = R0; - Register Rres = $res$$Register; - Register Rsrc = $src$$Register; - Register Rptr = $mem_ptr$$Register; - bool RegCollision = (Rres == Rsrc) || (Rres == Rptr); - Register Rold = RegCollision ? Rtmp : Rres; - - Label Lretry; - __ bind(Lretry); - __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update()); - __ stdcx_(Rsrc, Rptr); - if (UseStaticBranchPredictionInCompareAndSwapPPC64) { - __ bne_predict_not_taken(CCR0, Lretry); - } else { - __ bne( CCR0, Lretry); - } - if (RegCollision) __ mr(Rres, Rtmp); - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - __ isync(); - } else { - __ sync(); - } - %} - // This enc_class is needed so that scheduler gets proper // input mapping for latency computation. enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ @@ -7575,6 +7428,90 @@ // Strong versions: +instruct compareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2))); + predicate(VM_Version::has_lqarx()); + effect(TEMP cr0); + format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + $res$$Register, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2))); + predicate(!VM_Version::has_lqarx()); + effect(USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); + format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + $res$$Register, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2))); + predicate(VM_Version::has_lqarx()); + effect(TEMP cr0); + format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + $res$$Register, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2))); + predicate(!VM_Version::has_lqarx()); + effect(USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); + format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + $res$$Register, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2))); effect(TEMP cr0); @@ -7657,6 +7594,134 @@ // Weak versions: +instruct weakCompareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx()); + effect(TEMP cr0); + format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + MacroAssembler::MemBarNone, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx()); + effect(USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); + format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, + MacroAssembler::MemBarNone, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2))); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx()); + effect(TEMP cr0); + format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2))); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx()); + effect(USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); + format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, + support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx()); + effect(TEMP cr0); + format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + MacroAssembler::MemBarNone, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx()); + effect(USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); + format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, + MacroAssembler::MemBarNone, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2))); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx()); + effect(TEMP cr0); + format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2))); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx()); + effect(USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); + format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register, + support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + instruct weakCompareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2))); predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); @@ -7796,6 +7861,158 @@ // CompareAndExchange +instruct compareAndExchangeB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx()); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx()); + effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0); + format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2))); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx()); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2))); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx()); + effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0); + format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx()); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx()); + effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0); + format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2))); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx()); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2))); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx()); + effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0); + format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + instruct compareAndExchangeI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2))); predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); @@ -7950,57 +8167,235 @@ // Special RMW +instruct getAndAddB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ + match(Set res (GetAndAddB mem_ptr src)); + predicate(VM_Version::has_lqarx()); + effect(TEMP_DEF res, TEMP cr0); + format %{ "GetAndAddB $res, $mem_ptr, $src" %} + ins_encode %{ + __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register, + R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct getAndAddB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{ + match(Set res (GetAndAddB mem_ptr src)); + predicate(!VM_Version::has_lqarx()); + effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); + format %{ "GetAndAddB $res, $mem_ptr, $src" %} + ins_encode %{ + __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register, + R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct getAndAddS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ + match(Set res (GetAndAddS mem_ptr src)); + predicate(VM_Version::has_lqarx()); + effect(TEMP_DEF res, TEMP cr0); + format %{ "GetAndAddS $res, $mem_ptr, $src" %} + ins_encode %{ + __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register, + R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct getAndAddS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{ + match(Set res (GetAndAddS mem_ptr src)); + predicate(!VM_Version::has_lqarx()); + effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); + format %{ "GetAndAddS $res, $mem_ptr, $src" %} + ins_encode %{ + __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register, + R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndAddI mem_ptr src)); - effect(TEMP cr0); + effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndAddI $res, $mem_ptr, $src" %} - // Variable size: instruction count smaller if regs are disjoint. - ins_encode( enc_GetAndAddI(res, mem_ptr, src) ); + ins_encode %{ + __ getandaddw($res$$Register, $src$$Register, $mem_ptr$$Register, + R0, MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} ins_pipe(pipe_class_default); %} instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndAddL mem_ptr src)); - effect(TEMP cr0); + effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndAddL $res, $mem_ptr, $src" %} - // Variable size: instruction count smaller if regs are disjoint. - ins_encode( enc_GetAndAddL(res, mem_ptr, src) ); + ins_encode %{ + __ getandaddd($res$$Register, $src$$Register, $mem_ptr$$Register, + R0, MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct getAndSetB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ + match(Set res (GetAndSetB mem_ptr src)); + predicate(VM_Version::has_lqarx()); + effect(TEMP_DEF res, TEMP cr0); + format %{ "GetAndSetB $res, $mem_ptr, $src" %} + ins_encode %{ + __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register, + noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct getAndSetB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{ + match(Set res (GetAndSetB mem_ptr src)); + predicate(!VM_Version::has_lqarx()); + effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); + format %{ "GetAndSetB $res, $mem_ptr, $src" %} + ins_encode %{ + __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register, + R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct getAndSetS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ + match(Set res (GetAndSetS mem_ptr src)); + predicate(VM_Version::has_lqarx()); + effect(TEMP_DEF res, TEMP cr0); + format %{ "GetAndSetS $res, $mem_ptr, $src" %} + ins_encode %{ + __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register, + noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct getAndSetS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{ + match(Set res (GetAndSetS mem_ptr src)); + predicate(!VM_Version::has_lqarx()); + effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); + format %{ "GetAndSetS $res, $mem_ptr, $src" %} + ins_encode %{ + __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register, + R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} ins_pipe(pipe_class_default); %} instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetI mem_ptr src)); - effect(TEMP cr0); + effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndSetI $res, $mem_ptr, $src" %} - // Variable size: instruction count smaller if regs are disjoint. - ins_encode( enc_GetAndSetI(res, mem_ptr, src) ); + ins_encode %{ + __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register, + MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} ins_pipe(pipe_class_default); %} instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetL mem_ptr src)); - effect(TEMP cr0); + effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndSetL $res, $mem_ptr, $src" %} - // Variable size: instruction count smaller if regs are disjoint. - ins_encode( enc_GetAndSetL(res, mem_ptr, src) ); + ins_encode %{ + __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register, + MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} ins_pipe(pipe_class_default); %} instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetP mem_ptr src)); - effect(TEMP cr0); + effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndSetP $res, $mem_ptr, $src" %} - // Variable size: instruction count smaller if regs are disjoint. - ins_encode( enc_GetAndSetL(res, mem_ptr, src) ); + ins_encode %{ + __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register, + MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} ins_pipe(pipe_class_default); %} instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetN mem_ptr src)); - effect(TEMP cr0); + effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndSetN $res, $mem_ptr, $src" %} - // Variable size: instruction count smaller if regs are disjoint. - ins_encode( enc_GetAndSetI(res, mem_ptr, src) ); + ins_encode %{ + __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register, + MacroAssembler::cmpxchgx_hint_atomic_update()); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} ins_pipe(pipe_class_default); %} @@ -11360,7 +11755,7 @@ effect(USE_KILL cnt, USE_KILL base, KILL ctr); ins_cost(MEMORY_REF_COST); - ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. + ins_alignment(4); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. format %{ "ClearArray $cnt, $base" %} ins_encode %{ @@ -11948,283 +12343,6 @@ %} -// String_IndexOf for needle of length 1. -// -// Match needle into immediate operands: no loadConP node needed. Saves one -// register and two instructions over string_indexOf_imm1Node. -// -// Assumes register result differs from all input registers. -// -// Preserves registers haystack, haycnt -// Kills registers tmp1, tmp2 -// Defines registers result -// -// Use dst register classes if register gets killed, as it is the case for tmp registers! -// -// Unfortunately this does not match too often. In many situations the AddP is used -// by several nodes, even several StrIndexOf nodes, breaking the match tree. -instruct string_indexOf_imm1_char(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt, - immP needleImm, immL offsetImm, immI_1 needlecntImm, - iRegIdst tmp1, iRegIdst tmp2, - flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{ - predicate(SpecialStringIndexOf && !CompactStrings); // type check implicit by parameter type, See Matcher::match_rule_supported - match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm))); - - effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr); - - ins_cost(150); - format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]" - "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %} - - ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - immPOper *needleOper = (immPOper *)$needleImm; - const TypeOopPtr *t = needleOper->type()->isa_oopptr(); - ciTypeArray* needle_values = t->const_oop()->as_type_array(); // Pointer to live char * - jchar chr; - if (java_lang_String::has_coder_field()) { - // New compact strings byte array strings -#ifdef VM_LITTLE_ENDIAN - chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) | - ((jchar)(unsigned char)needle_values->element_value(0).as_byte()); -#else - chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) | - ((jchar)(unsigned char)needle_values->element_value(1).as_byte()); -#endif - } else { - // Old char array strings - chr = needle_values->char_at(0); - } - __ string_indexof_1($result$$Register, - $haystack$$Register, $haycnt$$Register, - R0, chr, - $tmp1$$Register, $tmp2$$Register); - %} - ins_pipe(pipe_class_compare); -%} - -// String_IndexOf for needle of length 1. -// -// Special case requires less registers and emits less instructions. -// -// Assumes register result differs from all input registers. -// -// Preserves registers haystack, haycnt -// Kills registers tmp1, tmp2, needle -// Defines registers result -// -// Use dst register classes if register gets killed, as it is the case for tmp registers! -instruct string_indexOf_imm1(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt, - rscratch2RegP needle, immI_1 needlecntImm, - iRegIdst tmp1, iRegIdst tmp2, - flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{ - match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm))); - effect(USE_KILL needle, /* TDEF needle, */ TEMP_DEF result, - TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr); - // Required for EA: check if it is still a type_array. - predicate(SpecialStringIndexOf && !CompactStrings && - n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() && - n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array()); - ins_cost(180); - - ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. - - format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]" - " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %} - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - Node *ndl = in(operand_index($needle)); // The node that defines needle. - ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array(); - guarantee(needle_values, "sanity"); - jchar chr; - if (java_lang_String::has_coder_field()) { - // New compact strings byte array strings -#ifdef VM_LITTLE_ENDIAN - chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) | - ((jchar)(unsigned char)needle_values->element_value(0).as_byte()); -#else - chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) | - ((jchar)(unsigned char)needle_values->element_value(1).as_byte()); -#endif - } else { - // Old char array strings - chr = needle_values->char_at(0); - } - __ string_indexof_1($result$$Register, - $haystack$$Register, $haycnt$$Register, - R0, chr, - $tmp1$$Register, $tmp2$$Register); - %} - ins_pipe(pipe_class_compare); -%} - -// String_IndexOfChar -// -// Assumes register result differs from all input registers. -// -// Preserves registers haystack, haycnt -// Kills registers tmp1, tmp2 -// Defines registers result -// -// Use dst register classes if register gets killed, as it is the case for tmp registers! -instruct string_indexOfChar(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt, - iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2, - flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{ - match(Set result (StrIndexOfChar (Binary haystack haycnt) ch)); - effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr); - predicate(SpecialStringIndexOf && !CompactStrings); - ins_cost(180); - - ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. - - format %{ "String IndexOfChar $haystack[0..$haycnt], $ch" - " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %} - ins_encode %{ - __ string_indexof_1($result$$Register, - $haystack$$Register, $haycnt$$Register, - $ch$$Register, 0 /* this is not used if the character is already in a register */, - $tmp1$$Register, $tmp2$$Register); - %} - ins_pipe(pipe_class_compare); -%} - -// String_IndexOf. -// -// Length of needle as immediate. This saves instruction loading constant needle -// length. -// @@@ TODO Specify rules for length < 8 or so, and roll out comparison of needle -// completely or do it in vector instruction. This should save registers for -// needlecnt and needle. -// -// Assumes register result differs from all input registers. -// Overwrites haycnt, needlecnt. -// Use dst register classes if register gets killed, as it is the case for tmp registers! -instruct string_indexOf_imm(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, - iRegPsrc needle, uimmI15 needlecntImm, - iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5, - flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{ - match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm))); - effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr); - // Required for EA: check if it is still a type_array. - predicate(SpecialStringIndexOf && !CompactStrings && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() && - n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array()); - ins_cost(250); - - ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. - - format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]" - " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %} - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - Node *ndl = in(operand_index($needle)); // The node that defines needle. - ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array(); - - __ string_indexof($result$$Register, - $haystack$$Register, $haycnt$$Register, - $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant, - $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register); - %} - ins_pipe(pipe_class_compare); -%} - -// StrIndexOf node. -// -// Assumes register result differs from all input registers. -// Overwrites haycnt, needlecnt. -// Use dst register classes if register gets killed, as it is the case for tmp registers! -instruct string_indexOf(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt, - iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, - flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{ - match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt))); - effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/ - TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr); - predicate(SpecialStringIndexOf && !CompactStrings); // See Matcher::match_rule_supported. - ins_cost(300); - - ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. - - format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]" - " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %} - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - __ string_indexof($result$$Register, - $haystack$$Register, $haycnt$$Register, - $needle$$Register, NULL, $needlecnt$$Register, 0, // needlecnt not constant. - $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register); - %} - ins_pipe(pipe_class_compare); -%} - -// String equals with immediate. -instruct string_equals_imm(iRegPsrc str1, iRegPsrc str2, uimmI15 cntImm, iRegIdst result, - iRegPdst tmp1, iRegPdst tmp2, - flagsRegCR0 cr0, flagsRegCR6 cr6, regCTR ctr) %{ - match(Set result (StrEquals (Binary str1 str2) cntImm)); - effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, - KILL cr0, KILL cr6, KILL ctr); - predicate(SpecialStringEquals && !CompactStrings); // See Matcher::match_rule_supported. - ins_cost(250); - - ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. - - format %{ "String Equals SCL [0..$cntImm]($str1),[0..$cntImm]($str2)" - " -> $result \t// KILL $cr0, $cr6, $ctr, TEMP $result, $tmp1, $tmp2" %} - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - __ char_arrays_equalsImm($str1$$Register, $str2$$Register, $cntImm$$constant, - $result$$Register, $tmp1$$Register, $tmp2$$Register); - %} - ins_pipe(pipe_class_compare); -%} - -// String equals. -// Use dst register classes if register gets killed, as it is the case for TEMP operands! -instruct string_equals(iRegPsrc str1, iRegPsrc str2, iRegIsrc cnt, iRegIdst result, - iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3, iRegPdst tmp4, iRegPdst tmp5, - flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{ - match(Set result (StrEquals (Binary str1 str2) cnt)); - effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, - KILL cr0, KILL cr1, KILL cr6, KILL ctr); - predicate(SpecialStringEquals && !CompactStrings); // See Matcher::match_rule_supported. - ins_cost(300); - - ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. - - format %{ "String Equals [0..$cnt]($str1),[0..$cnt]($str2) -> $result" - " \t// KILL $cr0, $cr1, $cr6, $ctr, TEMP $result, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %} - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - __ char_arrays_equals($str1$$Register, $str2$$Register, $cnt$$Register, $result$$Register, - $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register); - %} - ins_pipe(pipe_class_compare); -%} - -// String compare. -// Char[] pointers are passed in. -// Use dst register classes if register gets killed, as it is the case for TEMP operands! -instruct string_compare(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result, - iRegPdst tmp, flagsRegCR0 cr0, regCTR ctr) %{ - predicate(!CompactStrings); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(USE_KILL cnt1, USE_KILL cnt2, USE_KILL str1, USE_KILL str2, TEMP_DEF result, TEMP tmp, KILL cr0, KILL ctr); - ins_cost(300); - - ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted. - - format %{ "String Compare $str1[0..$cnt1], $str2[0..$cnt2] -> $result" - " \t// TEMP $tmp, $result KILLs $str1, $cnt1, $str2, $cnt2, $cr0, $ctr" %} - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, - $result$$Register, $tmp$$Register); - %} - ins_pipe(pipe_class_compare); -%} - //---------- Min/Max Instructions --------------------------------------------- instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{