< prev index next >

src/cpu/ppc/vm/macroAssembler_ppc.cpp

Print this page
rev 11436 : 8159976: PPC64: Add missing intrinsics for sub-word atomics
Reviewed-by: simonis

@@ -1420,56 +1420,224 @@
   should_not_reach_here();
 
   bind(no_reserved_zone_enabling);
 }
 
+void MacroAssembler::getandsetd(Register dest_current_value, Register exchange_value, Register addr_base,
+                                bool cmpxchgx_hint) {
+  Label retry;
+  bind(retry);
+  ldarx(dest_current_value, addr_base, cmpxchgx_hint);
+  stdcx_(exchange_value, addr_base);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
+  } else {
+    bne(                  CCR0, retry); // StXcx_ sets CCR0.
+  }
+}
+
+void MacroAssembler::getandaddd(Register dest_current_value, Register inc_value, Register addr_base,
+                                Register tmp, bool cmpxchgx_hint) {
+  Label retry;
+  bind(retry);
+  ldarx(dest_current_value, addr_base, cmpxchgx_hint);
+  add(tmp, dest_current_value, inc_value);
+  stdcx_(tmp, addr_base);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
+  } else {
+    bne(                  CCR0, retry); // StXcx_ sets CCR0.
+  }
+}
+
+// Word/sub-word atomic helper functions
+
+// Temps and addr_base are killed if size < 4 and processor does not support respective instructions.
+// Atomic add always kills tmp1.
+void MacroAssembler::atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value,
+                                                   Register addr_base, Register tmp1, Register tmp2, Register tmp3,
+                                                   bool cmpxchgx_hint, bool is_add, int size) {
+  int instruction_type = VM_Version::has_lqarx() ? size : 4; // Sub-word instructions available since Power 8.
+
+  Label retry;
+  Register shift_amount = noreg,
+           val32 = dest_current_value,
+           modval = is_add ? tmp1 : exchange_value;
+
+  if (instruction_type != size) {
+    assert_different_registers(tmp1, tmp2, tmp3, dest_current_value, exchange_value, addr_base);
+    modval = tmp1;
+    shift_amount = tmp2;
+    val32 = tmp3;
+    // Need some preperation: Compute shift amount, align address. Note: shorts must be 2 byte aligned.
+#ifdef VM_LITTLE_ENDIAN
+    rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8;
+    clrrdi(addr_base, addr_base, 2);
+#else
+    xori(shift_amount, addr_base, (size == 1) ? 3 : 2);
+    clrrdi(addr_base, addr_base, 2);
+    rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16;
+#endif
+  }
+
+  // atomic emulation loop
+  bind(retry);
+
+  switch (instruction_type) {
+    case 4: lwarx(val32, addr_base, cmpxchgx_hint); break;
+    case 2: lharx(val32, addr_base, cmpxchgx_hint); break;
+    case 1: lbarx(val32, addr_base, cmpxchgx_hint); break;
+    default: ShouldNotReachHere();
+  }
+
+  if (instruction_type != size) {
+    srw(dest_current_value, val32, shift_amount);
+  }
+
+  if (is_add) { add(modval, dest_current_value, exchange_value); }
+
+  if (instruction_type != size) {
+    // Transform exchange value such that the replacement can be done by one xor instruction
+    xorr(modval, dest_current_value, is_add ? modval : exchange_value);
+    clrldi(modval, modval, (size == 1) ? 56 : 48);
+    slw(modval, modval, shift_amount);
+    xorr(modval, val32, modval);
+  }
+
+  switch (instruction_type) {
+    case 4: stwcx_(modval, addr_base); break;
+    case 2: sthcx_(modval, addr_base); break;
+    case 1: stbcx_(modval, addr_base); break;
+    default: ShouldNotReachHere();
+  }
+
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
+  } else {
+    bne(                  CCR0, retry); // StXcx_ sets CCR0.
+  }
+
+  if (size == 1) {
+    extsb(dest_current_value, dest_current_value);
+  } else if (size == 2) {
+    extsh(dest_current_value, dest_current_value);
+  };
+}
+
+// Temps, addr_base and exchange_value are killed if size < 4 and processor does not support respective instructions.
+void MacroAssembler::cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value,
+                                       Register compare_value, Register exchange_value,
+                                       Register addr_base, Register tmp1, Register tmp2,
+                                       Label &retry, Label &failed, bool cmpxchgx_hint, int size) {
+  int instruction_type = VM_Version::has_lqarx() ? size : 4; // Sub-word instructions available since Power 8.
+
+  Register shift_amount = noreg,
+           val32 = dest_current_value,
+           modval = exchange_value;
+
+  if (instruction_type != size) {
+    assert_different_registers(tmp1, tmp2, dest_current_value, compare_value, exchange_value, addr_base);
+    shift_amount = tmp1;
+    val32 = tmp2;
+    modval = tmp2;
+    // Need some preperation: Compute shift amount, align address. Note: shorts must be 2 byte aligned.
+#ifdef VM_LITTLE_ENDIAN
+    rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8;
+    clrrdi(addr_base, addr_base, 2);
+#else
+    xori(shift_amount, addr_base, (size == 1) ? 3 : 2);
+    clrrdi(addr_base, addr_base, 2);
+    rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16;
+#endif
+    // Transform exchange value such that the replacement can be done by one xor instruction.
+    xorr(exchange_value, compare_value, exchange_value);
+    clrldi(exchange_value, exchange_value, (size == 1) ? 56 : 48);
+    slw(exchange_value, exchange_value, shift_amount);
+  }
+
+  // atomic emulation loop
+  bind(retry);
+
+  switch (instruction_type) {
+    case 4: lwarx(val32, addr_base, cmpxchgx_hint); break;
+    case 2: lharx(val32, addr_base, cmpxchgx_hint); break;
+    case 1: lbarx(val32, addr_base, cmpxchgx_hint); break;
+    default: ShouldNotReachHere();
+  }
+
+  if (instruction_type != size) {
+    srw(dest_current_value, val32, shift_amount);
+  }
+  if (size == 1) {
+    extsb(dest_current_value, dest_current_value);
+  } else if (size == 2) {
+    extsh(dest_current_value, dest_current_value);
+  };
+
+  cmpw(flag, dest_current_value, compare_value);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(flag, failed);
+  } else {
+    bne(                  flag, failed);
+  }
+  // branch to done  => (flag == ne), (dest_current_value != compare_value)
+  // fall through    => (flag == eq), (dest_current_value == compare_value)
+
+  if (instruction_type != size) {
+    xorr(modval, val32, exchange_value);
+  }
+
+  switch (instruction_type) {
+    case 4: stwcx_(modval, addr_base); break;
+    case 2: sthcx_(modval, addr_base); break;
+    case 1: stbcx_(modval, addr_base); break;
+    default: ShouldNotReachHere();
+  }
+}
+
 // CmpxchgX sets condition register to cmpX(current, compare).
-void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_value,
+void MacroAssembler::cmpxchg_generic(ConditionRegister flag, Register dest_current_value,
                               Register compare_value, Register exchange_value,
-                              Register addr_base, int semantics, bool cmpxchgx_hint,
-                              Register int_flag_success, bool contention_hint, bool weak) {
+                                     Register addr_base, Register tmp1, Register tmp2,
+                                     int semantics, bool cmpxchgx_hint,
+                                     Register int_flag_success, bool contention_hint, bool weak, int size) {
   Label retry;
   Label failed;
   Label done;
 
   // Save one branch if result is returned via register and
   // result register is different from the other ones.
   bool use_result_reg    = (int_flag_success != noreg);
   bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value &&
-                            int_flag_success != exchange_value && int_flag_success != addr_base);
+                            int_flag_success != exchange_value && int_flag_success != addr_base &&
+                            int_flag_success != tmp1 && int_flag_success != tmp2);
   assert(!weak || flag == CCR0, "weak only supported with CCR0");
+  assert(size == 1 || size == 2 || size == 4, "unsupported");
 
   if (use_result_reg && preset_result_reg) {
     li(int_flag_success, 0); // preset (assume cas failed)
   }
 
   // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
   if (contention_hint) { // Don't try to reserve if cmp fails.
-    lwz(dest_current_value, 0, addr_base);
+    switch (size) {
+      case 1: lbz(dest_current_value, 0, addr_base); extsb(dest_current_value, dest_current_value); break;
+      case 2: lha(dest_current_value, 0, addr_base); break;
+      case 4: lwz(dest_current_value, 0, addr_base); break;
+      default: ShouldNotReachHere();
+    }
     cmpw(flag, dest_current_value, compare_value);
     bne(flag, failed);
   }
 
   // release/fence semantics
   if (semantics & MemBarRel) {
     release();
   }
 
-  // atomic emulation loop
-  bind(retry);
-
-  lwarx(dest_current_value, addr_base, cmpxchgx_hint);
-  cmpw(flag, dest_current_value, compare_value);
-  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
-    bne_predict_not_taken(flag, failed);
-  } else {
-    bne(                  flag, failed);
-  }
-  // branch to done  => (flag == ne), (dest_current_value != compare_value)
-  // fall through    => (flag == eq), (dest_current_value == compare_value)
-
-  stwcx_(exchange_value, addr_base);
+  cmpxchg_loop_body(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2,
+                    retry, failed, cmpxchgx_hint, size);
   if (!weak || use_result_reg) {
     if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
       bne_predict_not_taken(CCR0, weak ? failed : retry); // StXcx_ sets CCR0.
     } else {
       bne(                  CCR0, weak ? failed : retry); // StXcx_ sets CCR0.

@@ -3749,458 +3917,10 @@
   li(result, 0);
 
   bind(Ldone);
 }
 
-
-// Intrinsics for non-CompactStrings
-
-// Search for a single jchar in an jchar[].
-//
-// Assumes that result differs from all other registers.
-//
-// 'haystack' is the addresses of a jchar-array.
-// 'needle' is either the character to search for or R0.
-// 'needleChar' is the character to search for if 'needle' == R0..
-// 'haycnt' is the length of the haystack. We assume 'haycnt' >=1.
-//
-// Preserves haystack, haycnt, needle and kills all other registers.
-//
-// If needle == R0, we search for the constant needleChar.
-void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt,
-                                      Register needle, jchar needleChar,
-                                      Register tmp1, Register tmp2) {
-
-  assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2);
-
-  Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End;
-  Register addr = tmp1,
-           ch1 = tmp2,
-           ch2 = R0;
-
-//3:
-   dcbtct(haystack, 0x00);                        // Indicate R/O access to haystack.
-
-   srwi_(tmp2, haycnt, 1);   // Shift right by exact_log2(UNROLL_FACTOR).
-   mr(addr, haystack);
-   beq(CCR0, L_FinalCheck);
-   mtctr(tmp2);              // Move to count register.
-//8:
-  bind(L_InnerLoop);             // Main work horse (2x unrolled search loop).
-   lhz(ch1, 0, addr);        // Load characters from haystack.
-   lhz(ch2, 2, addr);
-   (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, needleChar);
-   (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, needleChar);
-   beq(CCR0, L_Found1);   // Did we find the needle?
-   beq(CCR1, L_Found2);
-   addi(addr, addr, 4);
-   bdnz(L_InnerLoop);
-//16:
-  bind(L_FinalCheck);
-   andi_(R0, haycnt, 1);
-   beq(CCR0, L_NotFound);
-   lhz(ch1, 0, addr);        // One position left at which we have to compare.
-   (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, needleChar);
-   beq(CCR1, L_Found3);
-//21:
-  bind(L_NotFound);
-   li(result, -1);           // Not found.
-   b(L_End);
-
-  bind(L_Found2);
-   addi(addr, addr, 2);
-//24:
-  bind(L_Found1);
-  bind(L_Found3);                  // Return index ...
-   subf(addr, haystack, addr); // relative to haystack,
-   srdi(result, addr, 1);      // in characters.
-  bind(L_End);
-}
-
-
-// Implementation of IndexOf for jchar arrays.
-//
-// The length of haystack and needle are not constant, i.e. passed in a register.
-//
-// Preserves registers haystack, needle.
-// Kills registers haycnt, needlecnt.
-// Assumes that result differs from all other registers.
-// Haystack, needle are the addresses of jchar-arrays.
-// Haycnt, needlecnt are the lengths of them, respectively.
-//
-// Needlecntval must be zero or 15-bit unsigned immediate and > 1.
-void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
-                                    Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
-                                    Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
-
-  // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
-  Label L_TooShort, L_Found, L_NotFound, L_End;
-  Register last_addr = haycnt, // Kill haycnt at the beginning.
-           addr      = tmp1,
-           n_start   = tmp2,
-           ch1       = tmp3,
-           ch2       = R0;
-
-  // **************************************************************************************************
-  // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
-  // **************************************************************************************************
-
-//1 (variable) or 3 (const):
-   dcbtct(needle, 0x00);    // Indicate R/O access to str1.
-   dcbtct(haystack, 0x00);  // Indicate R/O access to str2.
-
-  // Compute last haystack addr to use if no match gets found.
-  if (needlecntval == 0) { // variable needlecnt
-//3:
-   subf(ch1, needlecnt, haycnt);      // Last character index to compare is haycnt-needlecnt.
-   addi(addr, haystack, -2);          // Accesses use pre-increment.
-   cmpwi(CCR6, needlecnt, 2);
-   blt(CCR6, L_TooShort);          // Variable needlecnt: handle short needle separately.
-   slwi(ch1, ch1, 1);                 // Scale to number of bytes.
-   lwz(n_start, 0, needle);           // Load first 2 characters of needle.
-   add(last_addr, haystack, ch1);     // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
-   addi(needlecnt, needlecnt, -2);    // Rest of needle.
-  } else { // constant needlecnt
-  guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
-  assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
-//5:
-   addi(ch1, haycnt, -needlecntval);  // Last character index to compare is haycnt-needlecnt.
-   lwz(n_start, 0, needle);           // Load first 2 characters of needle.
-   addi(addr, haystack, -2);          // Accesses use pre-increment.
-   slwi(ch1, ch1, 1);                 // Scale to number of bytes.
-   add(last_addr, haystack, ch1);     // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
-   li(needlecnt, needlecntval-2);     // Rest of needle.
-  }
-
-  // Main Loop (now we have at least 3 characters).
-//11:
-  Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2, L_Comp3;
-  bind(L_OuterLoop); // Search for 1st 2 characters.
-  Register addr_diff = tmp4;
-   subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.
-   addi(addr, addr, 2);              // This is the new address we want to use for comparing.
-   srdi_(ch2, addr_diff, 2);
-   beq(CCR0, L_FinalCheck);       // 2 characters left?
-   mtctr(ch2);                       // addr_diff/4
-//16:
-  bind(L_InnerLoop);                // Main work horse (2x unrolled search loop)
-   lwz(ch1, 0, addr);           // Load 2 characters of haystack (ignore alignment).
-   lwz(ch2, 2, addr);
-   cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
-   cmpw(CCR1, ch2, n_start);
-   beq(CCR0, L_Comp1);       // Did we find the needle start?
-   beq(CCR1, L_Comp2);
-   addi(addr, addr, 4);
-   bdnz(L_InnerLoop);
-//24:
-  bind(L_FinalCheck);
-   rldicl_(addr_diff, addr_diff, 64-1, 63); // Remaining characters not covered by InnerLoop: (addr_diff>>1)&1.
-   beq(CCR0, L_NotFound);
-   lwz(ch1, 0, addr);                       // One position left at which we have to compare.
-   cmpw(CCR1, ch1, n_start);
-   beq(CCR1, L_Comp3);
-//29:
-  bind(L_NotFound);
-   li(result, -1); // not found
-   b(L_End);
-
-
-   // **************************************************************************************************
-   // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
-   // **************************************************************************************************
-//31:
- if ((needlecntval>>1) !=1 ) { // Const needlecnt is 2 or 3? Reduce code size.
-  int nopcnt = 5;
-  if (needlecntval !=0 ) ++nopcnt; // Balance alignment (other case: see below).
-  if (needlecntval == 0) {         // We have to handle these cases separately.
-  Label L_OneCharLoop;
-  bind(L_TooShort);
-   mtctr(haycnt);
-   lhz(n_start, 0, needle);    // First character of needle
-  bind(L_OneCharLoop);
-   lhzu(ch1, 2, addr);
-   cmpw(CCR1, ch1, n_start);
-   beq(CCR1, L_Found);      // Did we find the one character needle?
-   bdnz(L_OneCharLoop);
-   li(result, -1);             // Not found.
-   b(L_End);
-  } // 8 instructions, so no impact on alignment.
-  for (int x = 0; x < nopcnt; ++x) nop();
- }
-
-  // **************************************************************************************************
-  // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
-  // **************************************************************************************************
-
-  // Compare the rest
-//36 if needlecntval==0, else 37:
-  bind(L_Comp2);
-   addi(addr, addr, 2); // First comparison has failed, 2nd one hit.
-  bind(L_Comp1);            // Addr points to possible needle start.
-  bind(L_Comp3);            // Could have created a copy and use a different return address but saving code size here.
-  if (needlecntval != 2) {  // Const needlecnt==2?
-   if (needlecntval != 3) {
-    if (needlecntval == 0) beq(CCR6, L_Found); // Variable needlecnt==2?
-    Register ind_reg = tmp4;
-    li(ind_reg, 2*2);   // First 2 characters are already compared, use index 2.
-    mtctr(needlecnt);   // Decremented by 2, still > 0.
-//40:
-   Label L_CompLoop;
-   bind(L_CompLoop);
-    lhzx(ch2, needle, ind_reg);
-    lhzx(ch1, addr, ind_reg);
-    cmpw(CCR1, ch1, ch2);
-    bne(CCR1, L_OuterLoop);
-    addi(ind_reg, ind_reg, 2);
-    bdnz(L_CompLoop);
-   } else { // No loop required if there's only one needle character left.
-    lhz(ch2, 2*2, needle);
-    lhz(ch1, 2*2, addr);
-    cmpw(CCR1, ch1, ch2);
-    bne(CCR1, L_OuterLoop);
-   }
-  }
-  // Return index ...
-//46:
-  bind(L_Found);
-   subf(addr, haystack, addr); // relative to haystack, ...
-   srdi(result, addr, 1);      // in characters.
-//48:
-  bind(L_End);
-}
-
-// Implementation of Compare for jchar arrays.
-//
-// Kills the registers str1, str2, cnt1, cnt2.
-// Kills cr0, ctr.
-// Assumes that result differes from the input registers.
-void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg,
-                                    Register result_reg, Register tmp_reg) {
-   assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg);
-
-   Label Ldone, Lslow_case, Lslow_loop, Lfast_loop;
-   Register cnt_diff = R0,
-            limit_reg = cnt1_reg,
-            chr1_reg = result_reg,
-            chr2_reg = cnt2_reg,
-            addr_diff = str2_reg;
-
-   // 'cnt_reg' contains the number of characters in the string's character array for the
-   // pre-CompactStrings strings implementation and the number of bytes in the string's
-   // byte array for the CompactStrings strings implementation.
-   const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array
-
-   // Offset 0 should be 32 byte aligned.
-//-6:
-    srawi(cnt1_reg, cnt1_reg, HAS_COMPACT_STRING);
-    srawi(cnt2_reg, cnt2_reg, HAS_COMPACT_STRING);
-//-4:
-    dcbtct(str1_reg, 0x00);  // Indicate R/O access to str1.
-    dcbtct(str2_reg, 0x00);  // Indicate R/O access to str2.
-//-2:
-   // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters).
-    subf(result_reg, cnt2_reg, cnt1_reg);  // difference between cnt1/2
-    subf_(addr_diff, str1_reg, str2_reg);  // alias?
-    beq(CCR0, Ldone);                   // return cnt difference if both ones are identical
-    srawi(limit_reg, result_reg, 31);      // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow)
-    mr(cnt_diff, result_reg);
-    andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt1<cnt2 ? cnt1-cnt2 : 0
-    add_(limit_reg, cnt2_reg, limit_reg);  // min(cnt1, cnt2)==0?
-    beq(CCR0, Ldone);                   // return cnt difference if one has 0 length
-
-    lhz(chr1_reg, 0, str1_reg);            // optional: early out if first characters mismatch
-    lhzx(chr2_reg, str1_reg, addr_diff);   // optional: early out if first characters mismatch
-    addi(tmp_reg, limit_reg, -1);          // min(cnt1, cnt2)-1
-    subf_(result_reg, chr2_reg, chr1_reg); // optional: early out if first characters mismatch
-    bne(CCR0, Ldone);                   // optional: early out if first characters mismatch
-
-   // Set loop counter by scaling down tmp_reg
-    srawi_(chr2_reg, tmp_reg, exact_log2(4)); // (min(cnt1, cnt2)-1)/4
-    ble(CCR0, Lslow_case);                 // need >4 characters for fast loop
-    andi(limit_reg, tmp_reg, 4-1);            // remaining characters
-
-   // Adapt str1_reg str2_reg for the first loop iteration
-    mtctr(chr2_reg);                 // (min(cnt1, cnt2)-1)/4
-    addi(limit_reg, limit_reg, 4+1); // compare last 5-8 characters in slow_case if mismatch found in fast_loop
-//16:
-   // Compare the rest of the characters
-   bind(Lfast_loop);
-    ld(chr1_reg, 0, str1_reg);
-    ldx(chr2_reg, str1_reg, addr_diff);
-    cmpd(CCR0, chr2_reg, chr1_reg);
-    bne(CCR0, Lslow_case); // return chr1_reg
-    addi(str1_reg, str1_reg, 4*2);
-    bdnz(Lfast_loop);
-    addi(limit_reg, limit_reg, -4); // no mismatch found in fast_loop, only 1-4 characters missing
-//23:
-   bind(Lslow_case);
-    mtctr(limit_reg);
-//24:
-   bind(Lslow_loop);
-    lhz(chr1_reg, 0, str1_reg);
-    lhzx(chr2_reg, str1_reg, addr_diff);
-    subf_(result_reg, chr2_reg, chr1_reg);
-    bne(CCR0, Ldone); // return chr1_reg
-    addi(str1_reg, str1_reg, 1*2);
-    bdnz(Lslow_loop);
-//30:
-   // If strings are equal up to min length, return the length difference.
-    mr(result_reg, cnt_diff);
-    nop(); // alignment
-//32:
-   // Otherwise, return the difference between the first mismatched chars.
-   bind(Ldone);
-}
-
-
-// Compare char[] arrays.
-//
-// str1_reg   USE only
-// str2_reg   USE only
-// cnt_reg    USE_DEF, due to tmp reg shortage
-// result_reg DEF only, might compromise USE only registers
-void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg,
-                                        Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg,
-                                        Register tmp5_reg) {
-
-  // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
-  assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
-  assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
-
-  // Offset 0 should be 32 byte aligned.
-  Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false;
-  Register index_reg = tmp5_reg;
-  Register cbc_iter  = tmp4_reg;
-
-  // 'cnt_reg' contains the number of characters in the string's character array for the
-  // pre-CompactStrings strings implementation and the number of bytes in the string's
-  // byte array for the CompactStrings strings implementation.
-  const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array
-
-//-1:
-  dcbtct(str1_reg, 0x00);  // Indicate R/O access to str1.
-  dcbtct(str2_reg, 0x00);  // Indicate R/O access to str2.
-//1:
-  // cbc_iter: remaining characters after the '4 java characters per iteration' loop.
-  rlwinm(cbc_iter, cnt_reg, 32 - HAS_COMPACT_STRING, 30, 31); // (cnt_reg % (HAS_COMPACT_STRING ? 8 : 4)) >> HAS_COMPACT_STRING
-  li(index_reg, 0); // init
-  li(result_reg, 0); // assume false
-  // tmp2_reg: units of 4 java characters (i.e. 8 bytes) per iteration (main loop).
-  srwi_(tmp2_reg, cnt_reg, exact_log2(4 << HAS_COMPACT_STRING)); // cnt_reg / (HAS_COMPACT_STRING ? 8 : 4)
-
-  cmpwi(CCR1, cbc_iter, 0);             // CCR1 = (cbc_iter==0)
-  beq(CCR0, Linit_cbc);                 // too short
-    mtctr(tmp2_reg);
-//8:
-    bind(Lloop);
-      ldx(tmp1_reg, str1_reg, index_reg);
-      ldx(tmp2_reg, str2_reg, index_reg);
-      cmpd(CCR0, tmp1_reg, tmp2_reg);
-      bne(CCR0, Ldone_false);  // Unequal char pair found -> done.
-      addi(index_reg, index_reg, 4*sizeof(jchar));
-      bdnz(Lloop);
-//14:
-  bind(Linit_cbc);
-  beq(CCR1, Ldone_true);
-    mtctr(cbc_iter);
-//16:
-    bind(Lcbc);
-      lhzx(tmp1_reg, str1_reg, index_reg);
-      lhzx(tmp2_reg, str2_reg, index_reg);
-      cmpw(CCR0, tmp1_reg, tmp2_reg);
-      bne(CCR0, Ldone_false);  // Unequal char pair found -> done.
-      addi(index_reg, index_reg, 1*sizeof(jchar));
-      bdnz(Lcbc);
-    nop();
-  bind(Ldone_true);
-  li(result_reg, 1);
-//24:
-  bind(Ldone_false);
-}
-
-
-void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg,
-                                           Register tmp1_reg, Register tmp2_reg) {
-  // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
-  assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg);
-  assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg);
-  assert(sizeof(jchar) == 2, "must be");
-  assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate");
-
-  // 'cntval' contains the number of characters in the string's character array for the
-  // pre-CompactStrings strings implementation and the number of bytes in the string's
-  // byte array for the CompactStrings strings implementation.
-  cntval >>= (java_lang_String::has_coder_field() ? 1 : 0); // '1' = byte array strings, '0' = char array strings
-
-  Label Ldone_false;
-
-  if (cntval < 16) { // short case
-    if (cntval != 0) li(result_reg, 0); // assume false
-
-    const int num_bytes = cntval*sizeof(jchar);
-    int index = 0;
-    for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) {
-      ld(tmp1_reg, index, str1_reg);
-      ld(tmp2_reg, index, str2_reg);
-      cmpd(CCR0, tmp1_reg, tmp2_reg);
-      bne(CCR0, Ldone_false);
-    }
-    if (cntval & 2) {
-      lwz(tmp1_reg, index, str1_reg);
-      lwz(tmp2_reg, index, str2_reg);
-      cmpw(CCR0, tmp1_reg, tmp2_reg);
-      bne(CCR0, Ldone_false);
-      index += 4;
-    }
-    if (cntval & 1) {
-      lhz(tmp1_reg, index, str1_reg);
-      lhz(tmp2_reg, index, str2_reg);
-      cmpw(CCR0, tmp1_reg, tmp2_reg);
-      bne(CCR0, Ldone_false);
-    }
-    // fallthrough: true
-  } else {
-    Label Lloop;
-    Register index_reg = tmp1_reg;
-    const int loopcnt = cntval/4;
-    assert(loopcnt > 0, "must be");
-    // Offset 0 should be 32 byte aligned.
-    //2:
-    dcbtct(str1_reg, 0x00);  // Indicate R/O access to str1.
-    dcbtct(str2_reg, 0x00);  // Indicate R/O access to str2.
-    li(tmp2_reg, loopcnt);
-    li(index_reg, 0); // init
-    li(result_reg, 0); // assume false
-    mtctr(tmp2_reg);
-    //8:
-    bind(Lloop);
-    ldx(R0, str1_reg, index_reg);
-    ldx(tmp2_reg, str2_reg, index_reg);
-    cmpd(CCR0, R0, tmp2_reg);
-    bne(CCR0, Ldone_false);  // Unequal char pair found -> done.
-    addi(index_reg, index_reg, 4*sizeof(jchar));
-    bdnz(Lloop);
-    //14:
-    if (cntval & 2) {
-      lwzx(R0, str1_reg, index_reg);
-      lwzx(tmp2_reg, str2_reg, index_reg);
-      cmpw(CCR0, R0, tmp2_reg);
-      bne(CCR0, Ldone_false);
-      if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar));
-    }
-    if (cntval & 1) {
-      lhzx(R0, str1_reg, index_reg);
-      lhzx(tmp2_reg, str2_reg, index_reg);
-      cmpw(CCR0, R0, tmp2_reg);
-      bne(CCR0, Ldone_false);
-    }
-    // fallthru: true
-  }
-  li(result_reg, 1);
-  bind(Ldone_false);
-}
-
 #endif // Compiler2
 
 // Helpers for Intrinsic Emitters
 //
 // Revert the byte order of a 32bit value in a register
< prev index next >