< prev index next >
src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Print this page
*** 5179,5299 ****
cset(result, NE); // set true or false
BIND(DONE);
}
! // Compare Strings or char/byte arrays.
!
! // is_string is true iff this is a string comparison.
!
! // For Strings we're passed the address of the first characters in a1
! // and a2 and the length in cnt1.
!
! // For byte and char arrays we're passed the arrays themselves and we
! // have to extract length fields and do null checks here.
!
! // elem_size is the element size in bytes: either 1 or 2.
!
! // There are two implementations. For arrays >= 8 bytes, all
! // comparisons (including the final one, which may overlap) are
! // performed 8 bytes at a time. For arrays < 8 bytes, we compare a
! // halfword, then a short, and then a byte.
!
! void MacroAssembler::arrays_equals(Register a1, Register a2,
! Register result, Register cnt1,
! int elem_size, bool is_string)
{
! Label SAME, DONE, SHORT, NEXT_WORD, ONE;
Register tmp1 = rscratch1;
Register tmp2 = rscratch2;
Register cnt2 = tmp2; // cnt2 only used in array length compare
int elem_per_word = wordSize/elem_size;
int log_elem_size = exact_log2(elem_size);
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset
= arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
assert(elem_size == 1 || elem_size == 2, "must be char or byte");
assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
#ifndef PRODUCT
{
const char kind = (elem_size == 2) ? 'U' : 'L';
char comment[64];
! snprintf(comment, sizeof comment, "%s%c%s {",
! is_string ? "string_equals" : "array_equals",
! kind, "{");
BLOCK_COMMENT(comment);
}
#endif
!
! mov(result, false);
!
! if (!is_string) {
! // if (a==a2)
// return true;
- eor(rscratch1, a1, a2);
- cbz(rscratch1, SAME);
// if (a==null || a2==null)
// return false;
! cbz(a1, DONE);
! cbz(a2, DONE);
// if (a1.length != a2.length)
// return false;
ldrw(cnt1, Address(a1, length_offset));
ldrw(cnt2, Address(a2, length_offset));
! eorw(tmp1, cnt1, cnt2);
! cbnzw(tmp1, DONE);
!
lea(a1, Address(a1, base_offset));
lea(a2, Address(a2, base_offset));
- }
-
// Check for short strings, i.e. smaller than wordSize.
subs(cnt1, cnt1, elem_per_word);
br(Assembler::LT, SHORT);
// Main 8 byte comparison loop.
bind(NEXT_WORD); {
ldr(tmp1, Address(post(a1, wordSize)));
ldr(tmp2, Address(post(a2, wordSize)));
subs(cnt1, cnt1, elem_per_word);
! eor(tmp1, tmp1, tmp2);
! cbnz(tmp1, DONE);
} br(GT, NEXT_WORD);
// Last longword. In the case where length == 4 we compare the
// same longword twice, but that's still faster than another
// conditional branch.
// cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
// length == 4.
if (log_elem_size > 0)
lsl(cnt1, cnt1, log_elem_size);
! ldr(tmp1, Address(a1, cnt1));
! ldr(tmp2, Address(a2, cnt1));
eor(tmp1, tmp1, tmp2);
cbnz(tmp1, DONE);
b(SAME);
bind(SHORT);
Label TAIL03, TAIL01;
! tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left.
{
ldrw(tmp1, Address(post(a1, 4)));
ldrw(tmp2, Address(post(a2, 4)));
eorw(tmp1, tmp1, tmp2);
cbnzw(tmp1, DONE);
}
bind(TAIL03);
! tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left.
{
ldrh(tmp1, Address(post(a1, 2)));
ldrh(tmp2, Address(post(a2, 2)));
eorw(tmp1, tmp1, tmp2);
cbnzw(tmp1, DONE);
}
bind(TAIL01);
! if (elem_size == 1) { // Only needed when comparing byte arrays.
tbz(cnt1, 0, SAME); // 0-1 bytes left.
{
ldrb(tmp1, a1);
ldrb(tmp2, a2);
eorw(tmp1, tmp1, tmp2);
--- 5179,5467 ----
cset(result, NE); // set true or false
BIND(DONE);
}
! void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
! Register tmp4, Register tmp5, Register result,
! Register cnt1, int elem_size)
{
! Label DONE;
Register tmp1 = rscratch1;
Register tmp2 = rscratch2;
Register cnt2 = tmp2; // cnt2 only used in array length compare
int elem_per_word = wordSize/elem_size;
int log_elem_size = exact_log2(elem_size);
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset
= arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+ int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
assert(elem_size == 1 || elem_size == 2, "must be char or byte");
assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
#ifndef PRODUCT
{
const char kind = (elem_size == 2) ? 'U' : 'L';
char comment[64];
! snprintf(comment, sizeof comment, "array_equals%c{", kind);
BLOCK_COMMENT(comment);
}
#endif
! if (UseSimpleArrayEquals) {
! Label NEXT_WORD, SHORT, SAME, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL;
! // if (a1==a2)
// return true;
// if (a==null || a2==null)
// return false;
! // a1 & a2 == 0 means (some-pointer is null) or
! // (very-rare-or-even-probably-impossible-pointer-values)
! // so, we can save one branch in most cases
! eor(rscratch1, a1, a2);
! tst(a1, a2);
! mov(result, false);
! cbz(rscratch1, SAME);
! br(EQ, A_MIGHT_BE_NULL);
// if (a1.length != a2.length)
// return false;
+ bind(A_IS_NOT_NULL);
ldrw(cnt1, Address(a1, length_offset));
ldrw(cnt2, Address(a2, length_offset));
! eorw(tmp5, cnt1, cnt2);
! cbnzw(tmp5, DONE);
lea(a1, Address(a1, base_offset));
lea(a2, Address(a2, base_offset));
// Check for short strings, i.e. smaller than wordSize.
subs(cnt1, cnt1, elem_per_word);
br(Assembler::LT, SHORT);
// Main 8 byte comparison loop.
bind(NEXT_WORD); {
ldr(tmp1, Address(post(a1, wordSize)));
ldr(tmp2, Address(post(a2, wordSize)));
subs(cnt1, cnt1, elem_per_word);
! eor(tmp5, tmp1, tmp2);
! cbnz(tmp5, DONE);
} br(GT, NEXT_WORD);
// Last longword. In the case where length == 4 we compare the
// same longword twice, but that's still faster than another
// conditional branch.
// cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
// length == 4.
if (log_elem_size > 0)
lsl(cnt1, cnt1, log_elem_size);
! ldr(tmp3, Address(a1, cnt1));
! ldr(tmp4, Address(a2, cnt1));
! eor(tmp5, tmp3, tmp4);
! cbnz(tmp5, DONE);
! b(SAME);
! bind(A_MIGHT_BE_NULL);
! // in case both a1 and a2 are not-null, proceed with loads
! cbz(a1, DONE);
! cbz(a2, DONE);
! b(A_IS_NOT_NULL);
! bind(SHORT);
!
! tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left.
! {
! ldrw(tmp1, Address(post(a1, 4)));
! ldrw(tmp2, Address(post(a2, 4)));
! eorw(tmp5, tmp1, tmp2);
! cbnzw(tmp5, DONE);
! }
! bind(TAIL03);
! tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left.
! {
! ldrh(tmp3, Address(post(a1, 2)));
! ldrh(tmp4, Address(post(a2, 2)));
! eorw(tmp5, tmp3, tmp4);
! cbnzw(tmp5, DONE);
! }
! bind(TAIL01);
! if (elem_size == 1) { // Only needed when comparing byte arrays.
! tbz(cnt1, 0, SAME); // 0-1 bytes left.
! {
! ldrb(tmp1, a1);
! ldrb(tmp2, a2);
! eorw(tmp5, tmp1, tmp2);
! cbnzw(tmp5, DONE);
! }
! }
! bind(SAME);
! mov(result, true);
! } else {
! Label NEXT_DWORD, A_IS_NULL, SHORT, TAIL, TAIL2, STUB, EARLY_OUT,
! CSET_EQ, LAST_CHECK, LEN_IS_ZERO, SAME;
! cbz(a1, A_IS_NULL);
! ldrw(cnt1, Address(a1, length_offset));
! cbz(a2, A_IS_NULL);
! ldrw(cnt2, Address(a2, length_offset));
! mov(result, false);
! // on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
! // faster to perform another branch before comparing a1 and a2
! cmp(cnt1, elem_per_word);
! br(LE, SHORT); // short or same
! cmp(a1, a2);
! br(EQ, SAME);
! ldr(tmp3, Address(pre(a1, base_offset)));
! cmp(cnt1, stubBytesThreshold);
! br(GE, STUB);
! ldr(tmp4, Address(pre(a2, base_offset)));
! sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
! cmp(cnt2, cnt1);
! br(NE, DONE);
!
! // Main 16 byte comparison loop with 2 exits
! bind(NEXT_DWORD); {
! ldr(tmp1, Address(pre(a1, wordSize)));
! ldr(tmp2, Address(pre(a2, wordSize)));
! subs(cnt1, cnt1, 2 * elem_per_word);
! br(LE, TAIL);
! eor(tmp4, tmp3, tmp4);
! cbnz(tmp4, DONE);
! ldr(tmp3, Address(pre(a1, wordSize)));
! ldr(tmp4, Address(pre(a2, wordSize)));
! cmp(cnt1, elem_per_word);
! br(LE, TAIL2);
! cmp(tmp1, tmp2);
! } br(EQ, NEXT_DWORD);
! b(DONE);
!
! bind(TAIL);
! eor(tmp4, tmp3, tmp4);
! eor(tmp2, tmp1, tmp2);
! lslv(tmp2, tmp2, tmp5);
! orr(tmp5, tmp4, tmp2);
! cmp(tmp5, zr);
! b(CSET_EQ);
!
! bind(TAIL2);
! eor(tmp2, tmp1, tmp2);
! cbnz(tmp2, DONE);
! b(LAST_CHECK);
!
! bind(STUB);
! ldr(tmp4, Address(pre(a2, base_offset)));
! cmp(cnt2, cnt1);
! br(NE, DONE);
! if (elem_size == 2) { // convert to byte counter
! lsl(cnt1, cnt1, 1);
! }
! eor(tmp5, tmp3, tmp4);
! cbnz(tmp5, DONE);
! RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
! assert(stub.target() != NULL, "array_equals_long stub has not been generated");
! trampoline_call(stub);
! b(DONE);
!
! bind(SAME);
! mov(result, true);
! b(DONE);
! bind(A_IS_NULL);
! // a1 or a2 is null. if a2 == a2 then return true. else return false
! cmp(a1, a2);
! b(CSET_EQ);
! bind(EARLY_OUT);
! // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
! // so, if a2 == null => return false(0), else return true, so we can return a2
! mov(result, a2);
! b(DONE);
! bind(LEN_IS_ZERO);
! cmp(cnt2, zr);
! b(CSET_EQ);
! bind(SHORT);
! cbz(cnt1, LEN_IS_ZERO);
! sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
! ldr(tmp3, Address(a1, base_offset));
! ldr(tmp4, Address(a2, base_offset));
! bind(LAST_CHECK);
! eor(tmp4, tmp3, tmp4);
! lslv(tmp5, tmp4, tmp5);
! cmp(tmp5, zr);
! bind(CSET_EQ);
! cset(result, EQ);
! }
!
! // That's it.
! bind(DONE);
!
! BLOCK_COMMENT("} array_equals");
! }
!
! // Compare Strings
!
! // For Strings we're passed the address of the first characters in a1
! // and a2 and the length in cnt1.
! // elem_size is the element size in bytes: either 1 or 2.
! // There are two implementations. For arrays >= 8 bytes, all
! // comparisons (including the final one, which may overlap) are
! // performed 8 bytes at a time. For strings < 8 bytes, we compare a
! // halfword, then a short, and then a byte.
!
! void MacroAssembler::string_equals(Register a1, Register a2,
! Register result, Register cnt1, int elem_size)
! {
! Label SAME, DONE, SHORT, NEXT_WORD;
! Register tmp1 = rscratch1;
! Register tmp2 = rscratch2;
! Register cnt2 = tmp2; // cnt2 only used in array length compare
!
! assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
! assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
!
! #ifndef PRODUCT
! {
! const char kind = (elem_size == 2) ? 'U' : 'L';
! char comment[64];
! snprintf(comment, sizeof comment, "{string_equals%c", kind);
! BLOCK_COMMENT(comment);
! }
! #endif
!
! mov(result, false);
!
! // Check for short strings, i.e. smaller than wordSize.
! subs(cnt1, cnt1, wordSize);
! br(Assembler::LT, SHORT);
! // Main 8 byte comparison loop.
! bind(NEXT_WORD); {
! ldr(tmp1, Address(post(a1, wordSize)));
! ldr(tmp2, Address(post(a2, wordSize)));
! subs(cnt1, cnt1, wordSize);
eor(tmp1, tmp1, tmp2);
cbnz(tmp1, DONE);
+ } br(GT, NEXT_WORD);
+ // Last longword. In the case where length == 4 we compare the
+ // same longword twice, but that's still faster than another
+ // conditional branch.
+ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
+ // length == 4.
+ ldr(tmp1, Address(a1, cnt1));
+ ldr(tmp2, Address(a2, cnt1));
+ eor(tmp2, tmp1, tmp2);
+ cbnz(tmp2, DONE);
b(SAME);
bind(SHORT);
Label TAIL03, TAIL01;
! tbz(cnt1, 2, TAIL03); // 0-7 bytes left.
{
ldrw(tmp1, Address(post(a1, 4)));
ldrw(tmp2, Address(post(a2, 4)));
eorw(tmp1, tmp1, tmp2);
cbnzw(tmp1, DONE);
}
bind(TAIL03);
! tbz(cnt1, 1, TAIL01); // 0-3 bytes left.
{
ldrh(tmp1, Address(post(a1, 2)));
ldrh(tmp2, Address(post(a2, 2)));
eorw(tmp1, tmp1, tmp2);
cbnzw(tmp1, DONE);
}
bind(TAIL01);
! if (elem_size == 1) { // Only needed when comparing 1-byte elements
tbz(cnt1, 0, SAME); // 0-1 bytes left.
{
ldrb(tmp1, a1);
ldrb(tmp2, a2);
eorw(tmp1, tmp1, tmp2);
*** 5304,5314 ****
bind(SAME);
mov(result, true);
// That's it.
bind(DONE);
! BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
}
// The size of the blocks erased by the zero_blocks stub. We must
// handle anything smaller than this ourselves in zero_words().
--- 5472,5482 ----
bind(SAME);
mov(result, true);
// That's it.
bind(DONE);
! BLOCK_COMMENT("} string_equals");
}
// The size of the blocks erased by the zero_blocks stub. We must
// handle anything smaller than this ourselves in zero_words().
< prev index next >