jdk_hs Cdiff src/hotspot/cpu/aarch64/macroAssembler

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp


*** 5179,5299 ****
      cset(result, NE); // set true or false
  
    BIND(DONE);
  }
  
! // Compare Strings or char/byte arrays.
! 
! // is_string is true iff this is a string comparison.
! 
! // For Strings we're passed the address of the first characters in a1
! // and a2 and the length in cnt1.
! 
! // For byte and char arrays we're passed the arrays themselves and we
! // have to extract length fields and do null checks here.
! 
! // elem_size is the element size in bytes: either 1 or 2.
! 
! // There are two implementations.  For arrays >= 8 bytes, all
! // comparisons (including the final one, which may overlap) are
! // performed 8 bytes at a time.  For arrays < 8 bytes, we compare a
! // halfword, then a short, and then a byte.
! 
! void MacroAssembler::arrays_equals(Register a1, Register a2,
!                                    Register result, Register cnt1,
!                                    int elem_size, bool is_string)
  {
!   Label SAME, DONE, SHORT, NEXT_WORD, ONE;
    Register tmp1 = rscratch1;
    Register tmp2 = rscratch2;
    Register cnt2 = tmp2;  // cnt2 only used in array length compare
    int elem_per_word = wordSize/elem_size;
    int log_elem_size = exact_log2(elem_size);
    int length_offset = arrayOopDesc::length_offset_in_bytes();
    int base_offset
      = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
  
    assert(elem_size == 1 || elem_size == 2, "must be char or byte");
    assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
  
  #ifndef PRODUCT
    {
      const char kind = (elem_size == 2) ? 'U' : 'L';
      char comment[64];
!     snprintf(comment, sizeof comment, "%s%c%s {",
!              is_string ? "string_equals" : "array_equals",
!              kind, "{");
      BLOCK_COMMENT(comment);
    }
  #endif
! 
!   mov(result, false);
! 
!   if (!is_string) {
!     // if (a==a2)
      //     return true;
-     eor(rscratch1, a1, a2);
-     cbz(rscratch1, SAME);
      // if (a==null || a2==null)
      //     return false;
!     cbz(a1, DONE);
!     cbz(a2, DONE);
      // if (a1.length != a2.length)
      //      return false;
      ldrw(cnt1, Address(a1, length_offset));
      ldrw(cnt2, Address(a2, length_offset));
!     eorw(tmp1, cnt1, cnt2);
!     cbnzw(tmp1, DONE);
! 
      lea(a1, Address(a1, base_offset));
      lea(a2, Address(a2, base_offset));
-   }
- 
    // Check for short strings, i.e. smaller than wordSize.
    subs(cnt1, cnt1, elem_per_word);
    br(Assembler::LT, SHORT);
    // Main 8 byte comparison loop.
    bind(NEXT_WORD); {
      ldr(tmp1, Address(post(a1, wordSize)));
      ldr(tmp2, Address(post(a2, wordSize)));
      subs(cnt1, cnt1, elem_per_word);
!     eor(tmp1, tmp1, tmp2);
!     cbnz(tmp1, DONE);
    } br(GT, NEXT_WORD);
    // Last longword.  In the case where length == 4 we compare the
    // same longword twice, but that's still faster than another
    // conditional branch.
    // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
    // length == 4.
    if (log_elem_size > 0)
      lsl(cnt1, cnt1, log_elem_size);
!   ldr(tmp1, Address(a1, cnt1));
!   ldr(tmp2, Address(a2, cnt1));
    eor(tmp1, tmp1, tmp2);
    cbnz(tmp1, DONE);
    b(SAME);
  
    bind(SHORT);
    Label TAIL03, TAIL01;
  
!   tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left.
    {
      ldrw(tmp1, Address(post(a1, 4)));
      ldrw(tmp2, Address(post(a2, 4)));
      eorw(tmp1, tmp1, tmp2);
      cbnzw(tmp1, DONE);
    }
    bind(TAIL03);
!   tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left.
    {
      ldrh(tmp1, Address(post(a1, 2)));
      ldrh(tmp2, Address(post(a2, 2)));
      eorw(tmp1, tmp1, tmp2);
      cbnzw(tmp1, DONE);
    }
    bind(TAIL01);
!   if (elem_size == 1) { // Only needed when comparing byte arrays.
      tbz(cnt1, 0, SAME); // 0-1 bytes left.
      {
        ldrb(tmp1, a1);
        ldrb(tmp2, a2);
        eorw(tmp1, tmp1, tmp2);
--- 5179,5467 ----
      cset(result, NE); // set true or false
  
    BIND(DONE);
  }
  
! void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
!                                    Register tmp4, Register tmp5, Register result,
!                                    Register cnt1, int elem_size)
  {
!   Label DONE;
    Register tmp1 = rscratch1;
    Register tmp2 = rscratch2;
    Register cnt2 = tmp2;  // cnt2 only used in array length compare
    int elem_per_word = wordSize/elem_size;
    int log_elem_size = exact_log2(elem_size);
    int length_offset = arrayOopDesc::length_offset_in_bytes();
    int base_offset
      = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+   int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
  
    assert(elem_size == 1 || elem_size == 2, "must be char or byte");
    assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
  
  #ifndef PRODUCT
    {
      const char kind = (elem_size == 2) ? 'U' : 'L';
      char comment[64];
!     snprintf(comment, sizeof comment, "array_equals%c{", kind);
      BLOCK_COMMENT(comment);
    }
  #endif
!   if (UseSimpleArrayEquals) {
!     Label NEXT_WORD, SHORT, SAME, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
!     // if (a1==a2)
      //     return true;
      // if (a==null || a2==null)
      //     return false;
!     // a1 & a2 == 0 means (some-pointer is null) or
!     // (very-rare-or-even-probably-impossible-pointer-values)
!     // so, we can save one branch in most cases
!     eor(rscratch1, a1, a2);
!     tst(a1, a2);
!     mov(result, false);
!     cbz(rscratch1, SAME);
!     br(EQ, A_MIGHT_BE_NULL);
      // if (a1.length != a2.length)
      //      return false;
+     bind(A_IS_NOT_NULL);
      ldrw(cnt1, Address(a1, length_offset));
      ldrw(cnt2, Address(a2, length_offset));
!     eorw(tmp5, cnt1, cnt2);
!     cbnzw(tmp5, DONE);
      lea(a1, Address(a1, base_offset));
      lea(a2, Address(a2, base_offset));
      // Check for short strings, i.e. smaller than wordSize.
      subs(cnt1, cnt1, elem_per_word);
      br(Assembler::LT, SHORT);
      // Main 8 byte comparison loop.
      bind(NEXT_WORD); {
        ldr(tmp1, Address(post(a1, wordSize)));
        ldr(tmp2, Address(post(a2, wordSize)));
        subs(cnt1, cnt1, elem_per_word);
!       eor(tmp5, tmp1, tmp2);
!       cbnz(tmp5, DONE);
      } br(GT, NEXT_WORD);
      // Last longword.  In the case where length == 4 we compare the
      // same longword twice, but that's still faster than another
      // conditional branch.
      // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
      // length == 4.
      if (log_elem_size > 0)
        lsl(cnt1, cnt1, log_elem_size);
!     ldr(tmp3, Address(a1, cnt1));
!     ldr(tmp4, Address(a2, cnt1));
!     eor(tmp5, tmp3, tmp4);
!     cbnz(tmp5, DONE);
!     b(SAME);
!     bind(A_MIGHT_BE_NULL);
!     // in case both a1 and a2 are not-null, proceed with loads
!     cbz(a1, DONE);
!     cbz(a2, DONE);
!     b(A_IS_NOT_NULL);
!     bind(SHORT);
! 
!     tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left.
!     {
!       ldrw(tmp1, Address(post(a1, 4)));
!       ldrw(tmp2, Address(post(a2, 4)));
!       eorw(tmp5, tmp1, tmp2);
!       cbnzw(tmp5, DONE);
!     }
!     bind(TAIL03);
!     tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left.
!     {
!       ldrh(tmp3, Address(post(a1, 2)));
!       ldrh(tmp4, Address(post(a2, 2)));
!       eorw(tmp5, tmp3, tmp4);
!       cbnzw(tmp5, DONE);
!     }
!     bind(TAIL01);
!     if (elem_size == 1) { // Only needed when comparing byte arrays.
!       tbz(cnt1, 0, SAME); // 0-1 bytes left.
!       {
!         ldrb(tmp1, a1);
!         ldrb(tmp2, a2);
!         eorw(tmp5, tmp1, tmp2);
!         cbnzw(tmp5, DONE);
!       }
!     }
!     bind(SAME);
!     mov(result, true);
!   } else {
!     Label NEXT_DWORD, A_IS_NULL, SHORT, TAIL, TAIL2, STUB, EARLY_OUT,
!         CSET_EQ, LAST_CHECK, LEN_IS_ZERO, SAME;
!     cbz(a1, A_IS_NULL);
!     ldrw(cnt1, Address(a1, length_offset));
!     cbz(a2, A_IS_NULL);
!     ldrw(cnt2, Address(a2, length_offset));
!     mov(result, false);
!     // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
!     // faster to perform another branch before comparing a1 and a2
!     cmp(cnt1, elem_per_word);
!     br(LE, SHORT); // short or same
!     cmp(a1, a2);
!     br(EQ, SAME);
!     ldr(tmp3, Address(pre(a1, base_offset)));
!     cmp(cnt1, stubBytesThreshold);
!     br(GE, STUB);
!     ldr(tmp4, Address(pre(a2, base_offset)));
!     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
!     cmp(cnt2, cnt1);
!     br(NE, DONE);
! 
!     // Main 16 byte comparison loop with 2 exits
!     bind(NEXT_DWORD); {
!       ldr(tmp1, Address(pre(a1, wordSize)));
!       ldr(tmp2, Address(pre(a2, wordSize)));
!       subs(cnt1, cnt1, 2 * elem_per_word);
!       br(LE, TAIL);
!       eor(tmp4, tmp3, tmp4);
!       cbnz(tmp4, DONE);
!       ldr(tmp3, Address(pre(a1, wordSize)));
!       ldr(tmp4, Address(pre(a2, wordSize)));
!       cmp(cnt1, elem_per_word);
!       br(LE, TAIL2);
!       cmp(tmp1, tmp2);
!     } br(EQ, NEXT_DWORD);
!     b(DONE);
! 
!     bind(TAIL);
!     eor(tmp4, tmp3, tmp4);
!     eor(tmp2, tmp1, tmp2);
!     lslv(tmp2, tmp2, tmp5);
!     orr(tmp5, tmp4, tmp2);
!     cmp(tmp5, zr);
!     b(CSET_EQ);
! 
!     bind(TAIL2);
!     eor(tmp2, tmp1, tmp2);
!     cbnz(tmp2, DONE);
!     b(LAST_CHECK);
! 
!     bind(STUB);
!     ldr(tmp4, Address(pre(a2, base_offset)));
!     cmp(cnt2, cnt1);
!     br(NE, DONE);
!     if (elem_size == 2) { // convert to byte counter
!       lsl(cnt1, cnt1, 1);
!     }
!     eor(tmp5, tmp3, tmp4);
!     cbnz(tmp5, DONE);
!     RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
!     assert(stub.target() != NULL, "array_equals_long stub has not been generated");
!     trampoline_call(stub);
!     b(DONE);
! 
!     bind(SAME);
!     mov(result, true);
!     b(DONE);
!     bind(A_IS_NULL);
!     // a1 or a2 is null. if a2 == a2 then return true. else return false
!     cmp(a1, a2);
!     b(CSET_EQ);
!     bind(EARLY_OUT);
!     // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
!     // so, if a2 == null => return false(0), else return true, so we can return a2
!     mov(result, a2);
!     b(DONE);
!     bind(LEN_IS_ZERO);
!     cmp(cnt2, zr);
!     b(CSET_EQ);
!     bind(SHORT);
!     cbz(cnt1, LEN_IS_ZERO);
!     sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
!     ldr(tmp3, Address(a1, base_offset));
!     ldr(tmp4, Address(a2, base_offset));
!     bind(LAST_CHECK);
!     eor(tmp4, tmp3, tmp4);
!     lslv(tmp5, tmp4, tmp5);
!     cmp(tmp5, zr);
!     bind(CSET_EQ);
!     cset(result, EQ);
!   }
! 
!   // That's it.
!   bind(DONE);
! 
!   BLOCK_COMMENT("} array_equals");
! }
! 
! // Compare Strings
! 
! // For Strings we're passed the address of the first characters in a1
! // and a2 and the length in cnt1.
! // elem_size is the element size in bytes: either 1 or 2.
! // There are two implementations.  For arrays >= 8 bytes, all
! // comparisons (including the final one, which may overlap) are
! // performed 8 bytes at a time.  For strings < 8 bytes, we compare a
! // halfword, then a short, and then a byte.
! 
! void MacroAssembler::string_equals(Register a1, Register a2,
!                                    Register result, Register cnt1, int elem_size)
! {
!   Label SAME, DONE, SHORT, NEXT_WORD;
!   Register tmp1 = rscratch1;
!   Register tmp2 = rscratch2;
!   Register cnt2 = tmp2;  // cnt2 only used in array length compare
! 
!   assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
!   assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
! 
! #ifndef PRODUCT
!   {
!     const char kind = (elem_size == 2) ? 'U' : 'L';
!     char comment[64];
!     snprintf(comment, sizeof comment, "{string_equals%c", kind);
!     BLOCK_COMMENT(comment);
!   }
! #endif
! 
!   mov(result, false);
! 
!   // Check for short strings, i.e. smaller than wordSize.
!   subs(cnt1, cnt1, wordSize);
!   br(Assembler::LT, SHORT);
!   // Main 8 byte comparison loop.
!   bind(NEXT_WORD); {
!     ldr(tmp1, Address(post(a1, wordSize)));
!     ldr(tmp2, Address(post(a2, wordSize)));
!     subs(cnt1, cnt1, wordSize);
      eor(tmp1, tmp1, tmp2);
      cbnz(tmp1, DONE);
+   } br(GT, NEXT_WORD);
+   // Last longword.  In the case where length == 4 we compare the
+   // same longword twice, but that's still faster than another
+   // conditional branch.
+   // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
+   // length == 4.
+   ldr(tmp1, Address(a1, cnt1));
+   ldr(tmp2, Address(a2, cnt1));
+   eor(tmp2, tmp1, tmp2);
+   cbnz(tmp2, DONE);
    b(SAME);
  
    bind(SHORT);
    Label TAIL03, TAIL01;
  
!   tbz(cnt1, 2, TAIL03); // 0-7 bytes left.
    {
      ldrw(tmp1, Address(post(a1, 4)));
      ldrw(tmp2, Address(post(a2, 4)));
      eorw(tmp1, tmp1, tmp2);
      cbnzw(tmp1, DONE);
    }
    bind(TAIL03);
!   tbz(cnt1, 1, TAIL01); // 0-3 bytes left.
    {
      ldrh(tmp1, Address(post(a1, 2)));
      ldrh(tmp2, Address(post(a2, 2)));
      eorw(tmp1, tmp1, tmp2);
      cbnzw(tmp1, DONE);
    }
    bind(TAIL01);
!   if (elem_size == 1) { // Only needed when comparing 1-byte elements
      tbz(cnt1, 0, SAME); // 0-1 bytes left.
      {
        ldrb(tmp1, a1);
        ldrb(tmp2, a2);
        eorw(tmp1, tmp1, tmp2);
*** 5304,5314 ****
    bind(SAME);
    mov(result, true);
  
    // That's it.
    bind(DONE);
!   BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
  }
  
  
  // The size of the blocks erased by the zero_blocks stub.  We must
  // handle anything smaller than this ourselves in zero_words().
--- 5472,5482 ----
    bind(SAME);
    mov(result, true);
  
    // That's it.
    bind(DONE);
!   BLOCK_COMMENT("} string_equals");
  }
  
  
  // The size of the blocks erased by the zero_blocks stub.  We must
  // handle anything smaller than this ourselves in zero_words().
< prev index next >