< prev index next >

src/cpu/ppc/vm/macroAssembler_ppc.cpp

Print this page
rev 11436 : 8159976: PPC64: Add missing intrinsics for sub-word atomics
Reviewed-by: simonis

*** 1420,1475 **** should_not_reach_here(); bind(no_reserved_zone_enabling); } // CmpxchgX sets condition register to cmpX(current, compare). ! void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_value, Register compare_value, Register exchange_value, ! Register addr_base, int semantics, bool cmpxchgx_hint, ! Register int_flag_success, bool contention_hint, bool weak) { Label retry; Label failed; Label done; // Save one branch if result is returned via register and // result register is different from the other ones. bool use_result_reg = (int_flag_success != noreg); bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value && ! int_flag_success != exchange_value && int_flag_success != addr_base); assert(!weak || flag == CCR0, "weak only supported with CCR0"); if (use_result_reg && preset_result_reg) { li(int_flag_success, 0); // preset (assume cas failed) } // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). if (contention_hint) { // Don't try to reserve if cmp fails. ! lwz(dest_current_value, 0, addr_base); cmpw(flag, dest_current_value, compare_value); bne(flag, failed); } // release/fence semantics if (semantics & MemBarRel) { release(); } ! // atomic emulation loop ! bind(retry); ! ! lwarx(dest_current_value, addr_base, cmpxchgx_hint); ! cmpw(flag, dest_current_value, compare_value); ! if (UseStaticBranchPredictionInCompareAndSwapPPC64) { ! bne_predict_not_taken(flag, failed); ! } else { ! bne( flag, failed); ! } ! // branch to done => (flag == ne), (dest_current_value != compare_value) ! // fall through => (flag == eq), (dest_current_value == compare_value) ! ! stwcx_(exchange_value, addr_base); if (!weak || use_result_reg) { if (UseStaticBranchPredictionInCompareAndSwapPPC64) { bne_predict_not_taken(CCR0, weak ? failed : retry); // StXcx_ sets CCR0. } else { bne( CCR0, weak ? failed : retry); // StXcx_ sets CCR0. --- 1420,1643 ---- should_not_reach_here(); bind(no_reserved_zone_enabling); } + void MacroAssembler::getandsetd(Register dest_current_value, Register exchange_value, Register addr_base, + bool cmpxchgx_hint) { + Label retry; + bind(retry); + ldarx(dest_current_value, addr_base, cmpxchgx_hint); + stdcx_(exchange_value, addr_base); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0. + } else { + bne( CCR0, retry); // StXcx_ sets CCR0. + } + } + + void MacroAssembler::getandaddd(Register dest_current_value, Register inc_value, Register addr_base, + Register tmp, bool cmpxchgx_hint) { + Label retry; + bind(retry); + ldarx(dest_current_value, addr_base, cmpxchgx_hint); + add(tmp, dest_current_value, inc_value); + stdcx_(tmp, addr_base); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0. + } else { + bne( CCR0, retry); // StXcx_ sets CCR0. + } + } + + // Word/sub-word atomic helper functions + + // Temps and addr_base are killed if size < 4 and processor does not support respective instructions. + // Atomic add always kills tmp1. + void MacroAssembler::atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value, + Register addr_base, Register tmp1, Register tmp2, Register tmp3, + bool cmpxchgx_hint, bool is_add, int size) { + int instruction_type = VM_Version::has_lqarx() ? size : 4; // Sub-word instructions available since Power 8. + + Label retry; + Register shift_amount = noreg, + val32 = dest_current_value, + modval = is_add ? tmp1 : exchange_value; + + if (instruction_type != size) { + assert_different_registers(tmp1, tmp2, tmp3, dest_current_value, exchange_value, addr_base); + modval = tmp1; + shift_amount = tmp2; + val32 = tmp3; + // Need some preperation: Compute shift amount, align address. Note: shorts must be 2 byte aligned. + #ifdef VM_LITTLE_ENDIAN + rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8; + clrrdi(addr_base, addr_base, 2); + #else + xori(shift_amount, addr_base, (size == 1) ? 3 : 2); + clrrdi(addr_base, addr_base, 2); + rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16; + #endif + } + + // atomic emulation loop + bind(retry); + + switch (instruction_type) { + case 4: lwarx(val32, addr_base, cmpxchgx_hint); break; + case 2: lharx(val32, addr_base, cmpxchgx_hint); break; + case 1: lbarx(val32, addr_base, cmpxchgx_hint); break; + default: ShouldNotReachHere(); + } + + if (instruction_type != size) { + srw(dest_current_value, val32, shift_amount); + } + + if (is_add) { add(modval, dest_current_value, exchange_value); } + + if (instruction_type != size) { + // Transform exchange value such that the replacement can be done by one xor instruction + xorr(modval, dest_current_value, is_add ? modval : exchange_value); + clrldi(modval, modval, (size == 1) ? 56 : 48); + slw(modval, modval, shift_amount); + xorr(modval, val32, modval); + } + + switch (instruction_type) { + case 4: stwcx_(modval, addr_base); break; + case 2: sthcx_(modval, addr_base); break; + case 1: stbcx_(modval, addr_base); break; + default: ShouldNotReachHere(); + } + + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0. + } else { + bne( CCR0, retry); // StXcx_ sets CCR0. + } + + if (size == 1) { + extsb(dest_current_value, dest_current_value); + } else if (size == 2) { + extsh(dest_current_value, dest_current_value); + }; + } + + // Temps, addr_base and exchange_value are killed if size < 4 and processor does not support respective instructions. + void MacroAssembler::cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value, + Register compare_value, Register exchange_value, + Register addr_base, Register tmp1, Register tmp2, + Label &retry, Label &failed, bool cmpxchgx_hint, int size) { + int instruction_type = VM_Version::has_lqarx() ? size : 4; // Sub-word instructions available since Power 8. + + Register shift_amount = noreg, + val32 = dest_current_value, + modval = exchange_value; + + if (instruction_type != size) { + assert_different_registers(tmp1, tmp2, dest_current_value, compare_value, exchange_value, addr_base); + shift_amount = tmp1; + val32 = tmp2; + modval = tmp2; + // Need some preperation: Compute shift amount, align address. Note: shorts must be 2 byte aligned. + #ifdef VM_LITTLE_ENDIAN + rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8; + clrrdi(addr_base, addr_base, 2); + #else + xori(shift_amount, addr_base, (size == 1) ? 3 : 2); + clrrdi(addr_base, addr_base, 2); + rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16; + #endif + // Transform exchange value such that the replacement can be done by one xor instruction. + xorr(exchange_value, compare_value, exchange_value); + clrldi(exchange_value, exchange_value, (size == 1) ? 56 : 48); + slw(exchange_value, exchange_value, shift_amount); + } + + // atomic emulation loop + bind(retry); + + switch (instruction_type) { + case 4: lwarx(val32, addr_base, cmpxchgx_hint); break; + case 2: lharx(val32, addr_base, cmpxchgx_hint); break; + case 1: lbarx(val32, addr_base, cmpxchgx_hint); break; + default: ShouldNotReachHere(); + } + + if (instruction_type != size) { + srw(dest_current_value, val32, shift_amount); + } + if (size == 1) { + extsb(dest_current_value, dest_current_value); + } else if (size == 2) { + extsh(dest_current_value, dest_current_value); + }; + + cmpw(flag, dest_current_value, compare_value); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(flag, failed); + } else { + bne( flag, failed); + } + // branch to done => (flag == ne), (dest_current_value != compare_value) + // fall through => (flag == eq), (dest_current_value == compare_value) + + if (instruction_type != size) { + xorr(modval, val32, exchange_value); + } + + switch (instruction_type) { + case 4: stwcx_(modval, addr_base); break; + case 2: sthcx_(modval, addr_base); break; + case 1: stbcx_(modval, addr_base); break; + default: ShouldNotReachHere(); + } + } + // CmpxchgX sets condition register to cmpX(current, compare). ! void MacroAssembler::cmpxchg_generic(ConditionRegister flag, Register dest_current_value, Register compare_value, Register exchange_value, ! Register addr_base, Register tmp1, Register tmp2, ! int semantics, bool cmpxchgx_hint, ! Register int_flag_success, bool contention_hint, bool weak, int size) { Label retry; Label failed; Label done; // Save one branch if result is returned via register and // result register is different from the other ones. bool use_result_reg = (int_flag_success != noreg); bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value && ! int_flag_success != exchange_value && int_flag_success != addr_base && ! int_flag_success != tmp1 && int_flag_success != tmp2); assert(!weak || flag == CCR0, "weak only supported with CCR0"); + assert(size == 1 || size == 2 || size == 4, "unsupported"); if (use_result_reg && preset_result_reg) { li(int_flag_success, 0); // preset (assume cas failed) } // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). if (contention_hint) { // Don't try to reserve if cmp fails. ! switch (size) { ! case 1: lbz(dest_current_value, 0, addr_base); extsb(dest_current_value, dest_current_value); break; ! case 2: lha(dest_current_value, 0, addr_base); break; ! case 4: lwz(dest_current_value, 0, addr_base); break; ! default: ShouldNotReachHere(); ! } cmpw(flag, dest_current_value, compare_value); bne(flag, failed); } // release/fence semantics if (semantics & MemBarRel) { release(); } ! cmpxchg_loop_body(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, ! retry, failed, cmpxchgx_hint, size); if (!weak || use_result_reg) { if (UseStaticBranchPredictionInCompareAndSwapPPC64) { bne_predict_not_taken(CCR0, weak ? failed : retry); // StXcx_ sets CCR0. } else { bne( CCR0, weak ? failed : retry); // StXcx_ sets CCR0.
*** 3749,4206 **** li(result, 0); bind(Ldone); } - - // Intrinsics for non-CompactStrings - - // Search for a single jchar in an jchar[]. - // - // Assumes that result differs from all other registers. - // - // 'haystack' is the addresses of a jchar-array. - // 'needle' is either the character to search for or R0. - // 'needleChar' is the character to search for if 'needle' == R0.. - // 'haycnt' is the length of the haystack. We assume 'haycnt' >=1. - // - // Preserves haystack, haycnt, needle and kills all other registers. - // - // If needle == R0, we search for the constant needleChar. - void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt, - Register needle, jchar needleChar, - Register tmp1, Register tmp2) { - - assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2); - - Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End; - Register addr = tmp1, - ch1 = tmp2, - ch2 = R0; - - //3: - dcbtct(haystack, 0x00); // Indicate R/O access to haystack. - - srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR). - mr(addr, haystack); - beq(CCR0, L_FinalCheck); - mtctr(tmp2); // Move to count register. - //8: - bind(L_InnerLoop); // Main work horse (2x unrolled search loop). - lhz(ch1, 0, addr); // Load characters from haystack. - lhz(ch2, 2, addr); - (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, needleChar); - (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, needleChar); - beq(CCR0, L_Found1); // Did we find the needle? - beq(CCR1, L_Found2); - addi(addr, addr, 4); - bdnz(L_InnerLoop); - //16: - bind(L_FinalCheck); - andi_(R0, haycnt, 1); - beq(CCR0, L_NotFound); - lhz(ch1, 0, addr); // One position left at which we have to compare. - (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, needleChar); - beq(CCR1, L_Found3); - //21: - bind(L_NotFound); - li(result, -1); // Not found. - b(L_End); - - bind(L_Found2); - addi(addr, addr, 2); - //24: - bind(L_Found1); - bind(L_Found3); // Return index ... - subf(addr, haystack, addr); // relative to haystack, - srdi(result, addr, 1); // in characters. - bind(L_End); - } - - - // Implementation of IndexOf for jchar arrays. - // - // The length of haystack and needle are not constant, i.e. passed in a register. - // - // Preserves registers haystack, needle. - // Kills registers haycnt, needlecnt. - // Assumes that result differs from all other registers. - // Haystack, needle are the addresses of jchar-arrays. - // Haycnt, needlecnt are the lengths of them, respectively. - // - // Needlecntval must be zero or 15-bit unsigned immediate and > 1. - void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt, - Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, - Register tmp1, Register tmp2, Register tmp3, Register tmp4) { - - // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite! - Label L_TooShort, L_Found, L_NotFound, L_End; - Register last_addr = haycnt, // Kill haycnt at the beginning. - addr = tmp1, - n_start = tmp2, - ch1 = tmp3, - ch2 = R0; - - // ************************************************************************************************** - // Prepare for main loop: optimized for needle count >=2, bail out otherwise. - // ************************************************************************************************** - - //1 (variable) or 3 (const): - dcbtct(needle, 0x00); // Indicate R/O access to str1. - dcbtct(haystack, 0x00); // Indicate R/O access to str2. - - // Compute last haystack addr to use if no match gets found. - if (needlecntval == 0) { // variable needlecnt - //3: - subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt. - addi(addr, haystack, -2); // Accesses use pre-increment. - cmpwi(CCR6, needlecnt, 2); - blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately. - slwi(ch1, ch1, 1); // Scale to number of bytes. - lwz(n_start, 0, needle); // Load first 2 characters of needle. - add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). - addi(needlecnt, needlecnt, -2); // Rest of needle. - } else { // constant needlecnt - guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately"); - assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate"); - //5: - addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt. - lwz(n_start, 0, needle); // Load first 2 characters of needle. - addi(addr, haystack, -2); // Accesses use pre-increment. - slwi(ch1, ch1, 1); // Scale to number of bytes. - add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). - li(needlecnt, needlecntval-2); // Rest of needle. - } - - // Main Loop (now we have at least 3 characters). - //11: - Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2, L_Comp3; - bind(L_OuterLoop); // Search for 1st 2 characters. - Register addr_diff = tmp4; - subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check. - addi(addr, addr, 2); // This is the new address we want to use for comparing. - srdi_(ch2, addr_diff, 2); - beq(CCR0, L_FinalCheck); // 2 characters left? - mtctr(ch2); // addr_diff/4 - //16: - bind(L_InnerLoop); // Main work horse (2x unrolled search loop) - lwz(ch1, 0, addr); // Load 2 characters of haystack (ignore alignment). - lwz(ch2, 2, addr); - cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop). - cmpw(CCR1, ch2, n_start); - beq(CCR0, L_Comp1); // Did we find the needle start? - beq(CCR1, L_Comp2); - addi(addr, addr, 4); - bdnz(L_InnerLoop); - //24: - bind(L_FinalCheck); - rldicl_(addr_diff, addr_diff, 64-1, 63); // Remaining characters not covered by InnerLoop: (addr_diff>>1)&1. - beq(CCR0, L_NotFound); - lwz(ch1, 0, addr); // One position left at which we have to compare. - cmpw(CCR1, ch1, n_start); - beq(CCR1, L_Comp3); - //29: - bind(L_NotFound); - li(result, -1); // not found - b(L_End); - - - // ************************************************************************************************** - // Special Case: unfortunately, the variable needle case can be called with needlecnt<2 - // ************************************************************************************************** - //31: - if ((needlecntval>>1) !=1 ) { // Const needlecnt is 2 or 3? Reduce code size. - int nopcnt = 5; - if (needlecntval !=0 ) ++nopcnt; // Balance alignment (other case: see below). - if (needlecntval == 0) { // We have to handle these cases separately. - Label L_OneCharLoop; - bind(L_TooShort); - mtctr(haycnt); - lhz(n_start, 0, needle); // First character of needle - bind(L_OneCharLoop); - lhzu(ch1, 2, addr); - cmpw(CCR1, ch1, n_start); - beq(CCR1, L_Found); // Did we find the one character needle? - bdnz(L_OneCharLoop); - li(result, -1); // Not found. - b(L_End); - } // 8 instructions, so no impact on alignment. - for (int x = 0; x < nopcnt; ++x) nop(); - } - - // ************************************************************************************************** - // Regular Case Part II: compare rest of needle (first 2 characters have been compared already) - // ************************************************************************************************** - - // Compare the rest - //36 if needlecntval==0, else 37: - bind(L_Comp2); - addi(addr, addr, 2); // First comparison has failed, 2nd one hit. - bind(L_Comp1); // Addr points to possible needle start. - bind(L_Comp3); // Could have created a copy and use a different return address but saving code size here. - if (needlecntval != 2) { // Const needlecnt==2? - if (needlecntval != 3) { - if (needlecntval == 0) beq(CCR6, L_Found); // Variable needlecnt==2? - Register ind_reg = tmp4; - li(ind_reg, 2*2); // First 2 characters are already compared, use index 2. - mtctr(needlecnt); // Decremented by 2, still > 0. - //40: - Label L_CompLoop; - bind(L_CompLoop); - lhzx(ch2, needle, ind_reg); - lhzx(ch1, addr, ind_reg); - cmpw(CCR1, ch1, ch2); - bne(CCR1, L_OuterLoop); - addi(ind_reg, ind_reg, 2); - bdnz(L_CompLoop); - } else { // No loop required if there's only one needle character left. - lhz(ch2, 2*2, needle); - lhz(ch1, 2*2, addr); - cmpw(CCR1, ch1, ch2); - bne(CCR1, L_OuterLoop); - } - } - // Return index ... - //46: - bind(L_Found); - subf(addr, haystack, addr); // relative to haystack, ... - srdi(result, addr, 1); // in characters. - //48: - bind(L_End); - } - - // Implementation of Compare for jchar arrays. - // - // Kills the registers str1, str2, cnt1, cnt2. - // Kills cr0, ctr. - // Assumes that result differes from the input registers. - void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg, - Register result_reg, Register tmp_reg) { - assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg); - - Label Ldone, Lslow_case, Lslow_loop, Lfast_loop; - Register cnt_diff = R0, - limit_reg = cnt1_reg, - chr1_reg = result_reg, - chr2_reg = cnt2_reg, - addr_diff = str2_reg; - - // 'cnt_reg' contains the number of characters in the string's character array for the - // pre-CompactStrings strings implementation and the number of bytes in the string's - // byte array for the CompactStrings strings implementation. - const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array - - // Offset 0 should be 32 byte aligned. - //-6: - srawi(cnt1_reg, cnt1_reg, HAS_COMPACT_STRING); - srawi(cnt2_reg, cnt2_reg, HAS_COMPACT_STRING); - //-4: - dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. - dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. - //-2: - // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters). - subf(result_reg, cnt2_reg, cnt1_reg); // difference between cnt1/2 - subf_(addr_diff, str1_reg, str2_reg); // alias? - beq(CCR0, Ldone); // return cnt difference if both ones are identical - srawi(limit_reg, result_reg, 31); // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow) - mr(cnt_diff, result_reg); - andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt1<cnt2 ? cnt1-cnt2 : 0 - add_(limit_reg, cnt2_reg, limit_reg); // min(cnt1, cnt2)==0? - beq(CCR0, Ldone); // return cnt difference if one has 0 length - - lhz(chr1_reg, 0, str1_reg); // optional: early out if first characters mismatch - lhzx(chr2_reg, str1_reg, addr_diff); // optional: early out if first characters mismatch - addi(tmp_reg, limit_reg, -1); // min(cnt1, cnt2)-1 - subf_(result_reg, chr2_reg, chr1_reg); // optional: early out if first characters mismatch - bne(CCR0, Ldone); // optional: early out if first characters mismatch - - // Set loop counter by scaling down tmp_reg - srawi_(chr2_reg, tmp_reg, exact_log2(4)); // (min(cnt1, cnt2)-1)/4 - ble(CCR0, Lslow_case); // need >4 characters for fast loop - andi(limit_reg, tmp_reg, 4-1); // remaining characters - - // Adapt str1_reg str2_reg for the first loop iteration - mtctr(chr2_reg); // (min(cnt1, cnt2)-1)/4 - addi(limit_reg, limit_reg, 4+1); // compare last 5-8 characters in slow_case if mismatch found in fast_loop - //16: - // Compare the rest of the characters - bind(Lfast_loop); - ld(chr1_reg, 0, str1_reg); - ldx(chr2_reg, str1_reg, addr_diff); - cmpd(CCR0, chr2_reg, chr1_reg); - bne(CCR0, Lslow_case); // return chr1_reg - addi(str1_reg, str1_reg, 4*2); - bdnz(Lfast_loop); - addi(limit_reg, limit_reg, -4); // no mismatch found in fast_loop, only 1-4 characters missing - //23: - bind(Lslow_case); - mtctr(limit_reg); - //24: - bind(Lslow_loop); - lhz(chr1_reg, 0, str1_reg); - lhzx(chr2_reg, str1_reg, addr_diff); - subf_(result_reg, chr2_reg, chr1_reg); - bne(CCR0, Ldone); // return chr1_reg - addi(str1_reg, str1_reg, 1*2); - bdnz(Lslow_loop); - //30: - // If strings are equal up to min length, return the length difference. - mr(result_reg, cnt_diff); - nop(); // alignment - //32: - // Otherwise, return the difference between the first mismatched chars. - bind(Ldone); - } - - - // Compare char[] arrays. - // - // str1_reg USE only - // str2_reg USE only - // cnt_reg USE_DEF, due to tmp reg shortage - // result_reg DEF only, might compromise USE only registers - void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg, - Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg, - Register tmp5_reg) { - - // Str1 may be the same register as str2 which can occur e.g. after scalar replacement. - assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg); - assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg); - - // Offset 0 should be 32 byte aligned. - Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false; - Register index_reg = tmp5_reg; - Register cbc_iter = tmp4_reg; - - // 'cnt_reg' contains the number of characters in the string's character array for the - // pre-CompactStrings strings implementation and the number of bytes in the string's - // byte array for the CompactStrings strings implementation. - const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array - - //-1: - dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. - dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. - //1: - // cbc_iter: remaining characters after the '4 java characters per iteration' loop. - rlwinm(cbc_iter, cnt_reg, 32 - HAS_COMPACT_STRING, 30, 31); // (cnt_reg % (HAS_COMPACT_STRING ? 8 : 4)) >> HAS_COMPACT_STRING - li(index_reg, 0); // init - li(result_reg, 0); // assume false - // tmp2_reg: units of 4 java characters (i.e. 8 bytes) per iteration (main loop). - srwi_(tmp2_reg, cnt_reg, exact_log2(4 << HAS_COMPACT_STRING)); // cnt_reg / (HAS_COMPACT_STRING ? 8 : 4) - - cmpwi(CCR1, cbc_iter, 0); // CCR1 = (cbc_iter==0) - beq(CCR0, Linit_cbc); // too short - mtctr(tmp2_reg); - //8: - bind(Lloop); - ldx(tmp1_reg, str1_reg, index_reg); - ldx(tmp2_reg, str2_reg, index_reg); - cmpd(CCR0, tmp1_reg, tmp2_reg); - bne(CCR0, Ldone_false); // Unequal char pair found -> done. - addi(index_reg, index_reg, 4*sizeof(jchar)); - bdnz(Lloop); - //14: - bind(Linit_cbc); - beq(CCR1, Ldone_true); - mtctr(cbc_iter); - //16: - bind(Lcbc); - lhzx(tmp1_reg, str1_reg, index_reg); - lhzx(tmp2_reg, str2_reg, index_reg); - cmpw(CCR0, tmp1_reg, tmp2_reg); - bne(CCR0, Ldone_false); // Unequal char pair found -> done. - addi(index_reg, index_reg, 1*sizeof(jchar)); - bdnz(Lcbc); - nop(); - bind(Ldone_true); - li(result_reg, 1); - //24: - bind(Ldone_false); - } - - - void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg, - Register tmp1_reg, Register tmp2_reg) { - // Str1 may be the same register as str2 which can occur e.g. after scalar replacement. - assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg); - assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg); - assert(sizeof(jchar) == 2, "must be"); - assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate"); - - // 'cntval' contains the number of characters in the string's character array for the - // pre-CompactStrings strings implementation and the number of bytes in the string's - // byte array for the CompactStrings strings implementation. - cntval >>= (java_lang_String::has_coder_field() ? 1 : 0); // '1' = byte array strings, '0' = char array strings - - Label Ldone_false; - - if (cntval < 16) { // short case - if (cntval != 0) li(result_reg, 0); // assume false - - const int num_bytes = cntval*sizeof(jchar); - int index = 0; - for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) { - ld(tmp1_reg, index, str1_reg); - ld(tmp2_reg, index, str2_reg); - cmpd(CCR0, tmp1_reg, tmp2_reg); - bne(CCR0, Ldone_false); - } - if (cntval & 2) { - lwz(tmp1_reg, index, str1_reg); - lwz(tmp2_reg, index, str2_reg); - cmpw(CCR0, tmp1_reg, tmp2_reg); - bne(CCR0, Ldone_false); - index += 4; - } - if (cntval & 1) { - lhz(tmp1_reg, index, str1_reg); - lhz(tmp2_reg, index, str2_reg); - cmpw(CCR0, tmp1_reg, tmp2_reg); - bne(CCR0, Ldone_false); - } - // fallthrough: true - } else { - Label Lloop; - Register index_reg = tmp1_reg; - const int loopcnt = cntval/4; - assert(loopcnt > 0, "must be"); - // Offset 0 should be 32 byte aligned. - //2: - dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. - dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. - li(tmp2_reg, loopcnt); - li(index_reg, 0); // init - li(result_reg, 0); // assume false - mtctr(tmp2_reg); - //8: - bind(Lloop); - ldx(R0, str1_reg, index_reg); - ldx(tmp2_reg, str2_reg, index_reg); - cmpd(CCR0, R0, tmp2_reg); - bne(CCR0, Ldone_false); // Unequal char pair found -> done. - addi(index_reg, index_reg, 4*sizeof(jchar)); - bdnz(Lloop); - //14: - if (cntval & 2) { - lwzx(R0, str1_reg, index_reg); - lwzx(tmp2_reg, str2_reg, index_reg); - cmpw(CCR0, R0, tmp2_reg); - bne(CCR0, Ldone_false); - if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar)); - } - if (cntval & 1) { - lhzx(R0, str1_reg, index_reg); - lhzx(tmp2_reg, str2_reg, index_reg); - cmpw(CCR0, R0, tmp2_reg); - bne(CCR0, Ldone_false); - } - // fallthru: true - } - li(result_reg, 1); - bind(Ldone_false); - } - #endif // Compiler2 // Helpers for Intrinsic Emitters // // Revert the byte order of a 32bit value in a register --- 3917,3926 ----
< prev index next >