< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page

        

*** 3886,3895 **** --- 3886,4017 ---- __ pop(spilled_regs, sp); __ leave(); __ ret(lr); return entry; } + + address generate_large_array_equals_byte() { + return generate_large_array_equals(1); + } + + address generate_large_array_equals_char() { + return generate_large_array_equals(2); + } + + // a1 = r1 - array1 address + // a2 = r2 - array2 address + // result = r0 - return value. Already contains "false" + // cnt1 = r4 - amount of elements left to check, reduced by elem_per_word + address generate_large_array_equals(int elem_size) { + StubCodeMark mark(this, "StubRoutines", elem_size == 1 + ? "large_array_equals_byte" + : "large_array_equals_char"); + Register a1 = r1, a2 = r2, result = r0, cnt1 = r4, tmp1 = rscratch1, + tmp2 = rscratch2, tmp3 = r6, tmp4 = r7; + Label LARGE_LOOP, NOT_EQUAL; + int elem_per_word = wordSize/elem_size; + int branchThreshold = MAX(80, SoftwarePrefetchHintDistance)/elem_size - elem_per_word; + RegSet spilled_regs = RegSet::of(tmp3, tmp4); + + assert_different_registers(a1, a2, result, cnt1, tmp1, tmp2, tmp3, tmp4); + + __ align(CodeEntryAlignment); + address entry = __ pc(); + __ enter(); + + if (!UseSIMDForArrayEquals) { + // pre-loop + __ push(spilled_regs, sp); + __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize))); + __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize))); + } + __ bind(LARGE_LOOP); // unrolled to 64 bytes loop with possible prefetching + if (SoftwarePrefetchHintDistance >= 0) { + __ prfm(Address(a1, SoftwarePrefetchHintDistance)); + __ prfm(Address(a2, SoftwarePrefetchHintDistance)); + } + if (UseSIMDForArrayEquals) { + __ ld1(v0, v1, v2, v3, __ T2D, Address(__ post(a1, 4 * 2 * wordSize))); + __ ld1(v4, v5, v6, v7, __ T2D, Address(__ post(a2, 4 * 2 * wordSize))); + __ eor(v0, __ T2D, v0, v4); + __ eor(v1, __ T2D, v1, v5); + __ eor(v2, __ T2D, v2, v6); + __ eor(v3, __ T2D, v3, v7); + + __ orr(v0, __ T2D, v0, v1); + __ orr(v1, __ T2D, v2, v3); + __ orr(v0, __ T2D, v0, v1); + + __ umov(tmp1, v0, __ D, 0); + __ cbnz(tmp1, NOT_EQUAL); + __ umov(tmp1, v0, __ D, 1); + __ cbnz(tmp1, NOT_EQUAL); + __ sub(cnt1, cnt1, 64/elem_size); + __ cmp(cnt1, branchThreshold); + __ br(__ GT, LARGE_LOOP); + } else { + __ eor(tmp1, tmp1, tmp2); + __ ldr(tmp2, Address(__ post(a2, wordSize))); + __ cbnz(tmp1, NOT_EQUAL); + __ ldr(tmp1, Address(__ post(a1, wordSize))); + __ eor(tmp3, tmp3, tmp4); + __ ldr(tmp4, Address(__ post(a2, wordSize))); + __ cbnz(tmp3, NOT_EQUAL); + __ ldr(tmp3, Address(__ post(a1, wordSize))); + + __ eor(tmp1, tmp1, tmp2); + __ ldr(tmp2, Address(__ post(a2, wordSize))); + __ cbnz(tmp1, NOT_EQUAL); + __ ldr(tmp1, Address(__ post(a1, wordSize))); + __ eor(tmp3, tmp3, tmp4); + __ ldr(tmp4, Address(__ post(a2, wordSize))); + __ cbnz(tmp3, NOT_EQUAL); + __ ldr(tmp3, Address(__ post(a1, wordSize))); + + __ eor(tmp1, tmp1, tmp2); + __ ldr(tmp2, Address(__ post(a2, wordSize))); + __ cbnz(tmp1, NOT_EQUAL); + __ ldr(tmp1, Address(__ post(a1, wordSize))); + __ eor(tmp3, tmp3, tmp4); + __ ldr(tmp4, Address(__ post(a2, wordSize))); + __ cbnz(tmp3, NOT_EQUAL); + __ ldr(tmp3, Address(__ post(a1, wordSize))); + + // loads below are for next loop iteration + __ eor(tmp1, tmp1, tmp2); + __ ldr(tmp2, Address(__ post(a2, wordSize))); + __ cbnz(tmp1, NOT_EQUAL); + __ ldr(tmp1, Address(__ post(a1, wordSize))); + __ eor(tmp3, tmp3, tmp4); + __ ldr(tmp4, Address(__ post(a2, wordSize))); + __ cbnz(tmp3, NOT_EQUAL); + __ ldr(tmp3, Address(__ post(a1, wordSize))); + + __ sub(cnt1, cnt1, 8 * elem_per_word); + // run this loop until we have memory to prefetch(but at least 64+16 bytes). + __ cmp(cnt1, branchThreshold); + __ br(Assembler::GT, LARGE_LOOP); + // both a1 and a2 are shifted more than needed by wordSize and tmp1-tmp4 + // contains still-not-checked value. Check it in this post-loop, also update + // cnt1 accordingly + __ eor(tmp1, tmp1, tmp2); + __ cbnz(tmp1, NOT_EQUAL); + __ eor(tmp3, tmp3, tmp4); + __ cbnz(tmp3, NOT_EQUAL); + __ sub(cnt1, cnt1, 2 * elem_per_word); + } + + __ mov(result, true); + __ bind(NOT_EQUAL); + if (!UseSIMDForArrayEquals) { + __ pop(spilled_regs, sp); + } + __ leave(); + __ ret(lr); + return entry; + } + /** * Arguments: * * Input: * c_rarg0 - current state address
*** 4968,4977 **** --- 5090,5103 ---- generate_arraycopy_stubs(); // has negatives stub for large arrays. StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long); + // array equals stub for large arrays. + StubRoutines::aarch64::_large_array_equals_byte = generate_large_array_equals_byte(); + StubRoutines::aarch64::_large_array_equals_char = generate_large_array_equals_char(); + if (UseMultiplyToLenIntrinsic) { StubRoutines::_multiplyToLen = generate_multiplyToLen(); } if (UseSquareToLenIntrinsic) {
< prev index next >