--- old/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp 2017-10-30 15:50:37.636784004 +0300 +++ new/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp 2017-10-30 15:50:37.504786077 +0300 @@ -3888,6 +3888,103 @@ __ ret(lr); return entry; } + + address generate_large_array_equals_byte() { + return generate_large_array_equals(1); + } + + address generate_large_array_equals_char() { + return generate_large_array_equals(2); + } + + // a1 = r1 - array1 address + // a2 = r2 - array2 address + // result = r0 - return value. Already contains "false" + // cnt1 = r4 - amount of elements left to check, reduced by elem_per_word + address generate_large_array_equals(int elem_size) { + StubCodeMark mark(this, "StubRoutines", elem_size == 1 + ? "large_array_equals_byte" : "large_array_equals_char"); + Register a1 = r1, a2 = r2, result = r0, cnt1 = r4, tmp1 = rscratch1, + tmp2 = rscratch2, tmp3 = r6, tmp4 = r7; + Label LARGE_LOOP, NOT_EQUAL; + int elem_per_word = wordSize/elem_size; + int branchThreshold = MAX(80, SoftwarePrefetchHintDistance)/elem_size - elem_per_word; + + assert_different_registers(a1, a2, result, cnt1, tmp1, tmp2, tmp3, tmp4); + + __ align(CodeEntryAlignment); + address entry = __ pc(); + __ enter(); + + // pre-loop + RegSet spilled_regs = RegSet::of(tmp3, tmp4); + __ push(spilled_regs, sp); + __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize))); + __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize))); + // unrolled to 64 bytes loop with possible prefetching + __ bind(LARGE_LOOP); + if (SoftwarePrefetchHintDistance >= 0) { + __ prfm(Address(a1, SoftwarePrefetchHintDistance)); + __ prfm(Address(a2, SoftwarePrefetchHintDistance)); + } + + __ eor(tmp1, tmp1, tmp2); + __ ldr(tmp2, Address(__ post(a2, wordSize))); + __ cbnz(tmp1, NOT_EQUAL); + __ ldr(tmp1, Address(__ post(a1, wordSize))); + __ eor(tmp3, tmp3, tmp4); + __ ldr(tmp4, Address(__ post(a2, wordSize))); + __ cbnz(tmp3, NOT_EQUAL); + __ ldr(tmp3, Address(__ post(a1, wordSize))); + + __ eor(tmp1, tmp1, tmp2); + __ ldr(tmp2, Address(__ post(a2, wordSize))); + __ cbnz(tmp1, NOT_EQUAL); + __ ldr(tmp1, Address(__ post(a1, wordSize))); + __ eor(tmp3, tmp3, tmp4); + __ ldr(tmp4, Address(__ post(a2, wordSize))); + __ cbnz(tmp3, NOT_EQUAL); + __ ldr(tmp3, Address(__ post(a1, wordSize))); + + __ eor(tmp1, tmp1, tmp2); + __ ldr(tmp2, Address(__ post(a2, wordSize))); + __ cbnz(tmp1, NOT_EQUAL); + __ ldr(tmp1, Address(__ post(a1, wordSize))); + __ eor(tmp3, tmp3, tmp4); + __ ldr(tmp4, Address(__ post(a2, wordSize))); + __ cbnz(tmp3, NOT_EQUAL); + __ ldr(tmp3, Address(__ post(a1, wordSize))); + + // loads below are for next loop iteration + __ eor(tmp1, tmp1, tmp2); + __ ldr(tmp2, Address(__ post(a2, wordSize))); + __ cbnz(tmp1, NOT_EQUAL); + __ ldr(tmp1, Address(__ post(a1, wordSize))); + __ eor(tmp3, tmp3, tmp4); + __ ldr(tmp4, Address(__ post(a2, wordSize))); + __ cbnz(tmp3, NOT_EQUAL); + __ ldr(tmp3, Address(__ post(a1, wordSize))); + + __ sub(cnt1, cnt1, 8 * elem_per_word); + // run this loop until we have memory to prefetch(but at least 64+16 bytes). + __ cmp(cnt1, branchThreshold); + __ br(Assembler::GT, LARGE_LOOP); + // both a1 and a2 are shifted more than needed by wordSize and tmp1-tmp4 + // contains still-not-checked value. Check it in this post-loop, also + // update cnt1 accordingly + __ eor(tmp1, tmp1, tmp2); + __ cbnz(tmp1, NOT_EQUAL); + __ eor(tmp3, tmp3, tmp4); + __ cbnz(tmp3, NOT_EQUAL); + __ sub(cnt1, cnt1, 2 * elem_per_word); + __ mov(result, true); + __ bind(NOT_EQUAL); + __ pop(spilled_regs, sp); + __ leave(); + __ ret(lr); + return entry; + } + /** * Arguments: * @@ -4966,6 +5063,10 @@ // has negatives stub for large arrays. StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long); + // array equals stub for large arrays. + StubRoutines::aarch64::_large_array_equals_byte = generate_large_array_equals_byte(); + StubRoutines::aarch64::_large_array_equals_char = generate_large_array_equals_char(); + if (UseMultiplyToLenIntrinsic) { StubRoutines::_multiplyToLen = generate_multiplyToLen(); }