< prev index next >
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Print this page
@@ -3886,10 +3886,132 @@
__ pop(spilled_regs, sp);
__ leave();
__ ret(lr);
return entry;
}
+
+ address generate_large_array_equals_byte() {
+ return generate_large_array_equals(1);
+ }
+
+ address generate_large_array_equals_char() {
+ return generate_large_array_equals(2);
+ }
+
+ // a1 = r1 - array1 address
+ // a2 = r2 - array2 address
+ // result = r0 - return value. Already contains "false"
+ // cnt1 = r4 - amount of elements left to check, reduced by elem_per_word
+ address generate_large_array_equals(int elem_size) {
+ StubCodeMark mark(this, "StubRoutines", elem_size == 1
+ ? "large_array_equals_byte"
+ : "large_array_equals_char");
+ Register a1 = r1, a2 = r2, result = r0, cnt1 = r4, tmp1 = rscratch1,
+ tmp2 = rscratch2, tmp3 = r6, tmp4 = r7;
+ Label LARGE_LOOP, NOT_EQUAL;
+ int elem_per_word = wordSize/elem_size;
+ int branchThreshold = MAX(80, SoftwarePrefetchHintDistance)/elem_size - elem_per_word;
+ RegSet spilled_regs = RegSet::of(tmp3, tmp4);
+
+ assert_different_registers(a1, a2, result, cnt1, tmp1, tmp2, tmp3, tmp4);
+
+ __ align(CodeEntryAlignment);
+ address entry = __ pc();
+ __ enter();
+
+ if (!UseSIMDForArrayEquals) {
+ // pre-loop
+ __ push(spilled_regs, sp);
+ __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize)));
+ __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize)));
+ }
+ __ bind(LARGE_LOOP); // unrolled to 64 bytes loop with possible prefetching
+ if (SoftwarePrefetchHintDistance >= 0) {
+ __ prfm(Address(a1, SoftwarePrefetchHintDistance));
+ __ prfm(Address(a2, SoftwarePrefetchHintDistance));
+ }
+ if (UseSIMDForArrayEquals) {
+ __ ld1(v0, v1, v2, v3, __ T2D, Address(__ post(a1, 4 * 2 * wordSize)));
+ __ ld1(v4, v5, v6, v7, __ T2D, Address(__ post(a2, 4 * 2 * wordSize)));
+ __ eor(v0, __ T2D, v0, v4);
+ __ eor(v1, __ T2D, v1, v5);
+ __ eor(v2, __ T2D, v2, v6);
+ __ eor(v3, __ T2D, v3, v7);
+
+ __ orr(v0, __ T2D, v0, v1);
+ __ orr(v1, __ T2D, v2, v3);
+ __ orr(v0, __ T2D, v0, v1);
+
+ __ umov(tmp1, v0, __ D, 0);
+ __ cbnz(tmp1, NOT_EQUAL);
+ __ umov(tmp1, v0, __ D, 1);
+ __ cbnz(tmp1, NOT_EQUAL);
+ __ sub(cnt1, cnt1, 64/elem_size);
+ __ cmp(cnt1, branchThreshold);
+ __ br(__ GT, LARGE_LOOP);
+ } else {
+ __ eor(tmp1, tmp1, tmp2);
+ __ ldr(tmp2, Address(__ post(a2, wordSize)));
+ __ cbnz(tmp1, NOT_EQUAL);
+ __ ldr(tmp1, Address(__ post(a1, wordSize)));
+ __ eor(tmp3, tmp3, tmp4);
+ __ ldr(tmp4, Address(__ post(a2, wordSize)));
+ __ cbnz(tmp3, NOT_EQUAL);
+ __ ldr(tmp3, Address(__ post(a1, wordSize)));
+
+ __ eor(tmp1, tmp1, tmp2);
+ __ ldr(tmp2, Address(__ post(a2, wordSize)));
+ __ cbnz(tmp1, NOT_EQUAL);
+ __ ldr(tmp1, Address(__ post(a1, wordSize)));
+ __ eor(tmp3, tmp3, tmp4);
+ __ ldr(tmp4, Address(__ post(a2, wordSize)));
+ __ cbnz(tmp3, NOT_EQUAL);
+ __ ldr(tmp3, Address(__ post(a1, wordSize)));
+
+ __ eor(tmp1, tmp1, tmp2);
+ __ ldr(tmp2, Address(__ post(a2, wordSize)));
+ __ cbnz(tmp1, NOT_EQUAL);
+ __ ldr(tmp1, Address(__ post(a1, wordSize)));
+ __ eor(tmp3, tmp3, tmp4);
+ __ ldr(tmp4, Address(__ post(a2, wordSize)));
+ __ cbnz(tmp3, NOT_EQUAL);
+ __ ldr(tmp3, Address(__ post(a1, wordSize)));
+
+ // loads below are for next loop iteration
+ __ eor(tmp1, tmp1, tmp2);
+ __ ldr(tmp2, Address(__ post(a2, wordSize)));
+ __ cbnz(tmp1, NOT_EQUAL);
+ __ ldr(tmp1, Address(__ post(a1, wordSize)));
+ __ eor(tmp3, tmp3, tmp4);
+ __ ldr(tmp4, Address(__ post(a2, wordSize)));
+ __ cbnz(tmp3, NOT_EQUAL);
+ __ ldr(tmp3, Address(__ post(a1, wordSize)));
+
+ __ sub(cnt1, cnt1, 8 * elem_per_word);
+ // run this loop until we have memory to prefetch(but at least 64+16 bytes).
+ __ cmp(cnt1, branchThreshold);
+ __ br(Assembler::GT, LARGE_LOOP);
+ // both a1 and a2 are shifted more than needed by wordSize and tmp1-tmp4
+ // contains still-not-checked value. Check it in this post-loop, also update
+ // cnt1 accordingly
+ __ eor(tmp1, tmp1, tmp2);
+ __ cbnz(tmp1, NOT_EQUAL);
+ __ eor(tmp3, tmp3, tmp4);
+ __ cbnz(tmp3, NOT_EQUAL);
+ __ sub(cnt1, cnt1, 2 * elem_per_word);
+ }
+
+ __ mov(result, true);
+ __ bind(NOT_EQUAL);
+ if (!UseSIMDForArrayEquals) {
+ __ pop(spilled_regs, sp);
+ }
+ __ leave();
+ __ ret(lr);
+ return entry;
+ }
+
/**
* Arguments:
*
* Input:
* c_rarg0 - current state address
@@ -4968,10 +5090,14 @@
generate_arraycopy_stubs();
// has negatives stub for large arrays.
StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long);
+ // array equals stub for large arrays.
+ StubRoutines::aarch64::_large_array_equals_byte = generate_large_array_equals_byte();
+ StubRoutines::aarch64::_large_array_equals_char = generate_large_array_equals_char();
+
if (UseMultiplyToLenIntrinsic) {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
if (UseSquareToLenIntrinsic) {
< prev index next >