< prev index next >
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Print this page
*** 3849,3861 ****
__ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize)));
__ eor(tmp5, tmp5, tmp6);
__ sub(cnt1, cnt1, 8 * wordSize);
__ eor(tmp7, tmp7, tmp8);
__ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize)));
! // tmp6 is not used. MacroAssembler::subs is used here (rather than
! // cmp) because subs allows an unlimited range of immediate operand.
! __ subs(tmp6, cnt1, loopThreshold);
__ orr(tmp5, tmp5, tmp7);
__ cbnz(tmp5, NOT_EQUAL);
__ br(__ GE, LOOP);
// post-loop
__ eor(tmp1, tmp1, tmp2);
--- 3849,3859 ----
__ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize)));
__ eor(tmp5, tmp5, tmp6);
__ sub(cnt1, cnt1, 8 * wordSize);
__ eor(tmp7, tmp7, tmp8);
__ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize)));
! __ cmp(tmp6, cnt1, loopThreshold);
__ orr(tmp5, tmp5, tmp7);
__ cbnz(tmp5, NOT_EQUAL);
__ br(__ GE, LOOP);
// post-loop
__ eor(tmp1, tmp1, tmp2);
*** 3877,3887 ****
__ prfm(Address(a2, SoftwarePrefetchHintDistance));
}
__ ld1(v0, v1, v2, v3, __ T2D, Address(__ post(a1, 4 * 2 * wordSize)));
__ sub(cnt1, cnt1, 8 * wordSize);
__ ld1(v4, v5, v6, v7, __ T2D, Address(__ post(a2, 4 * 2 * wordSize)));
! __ subs(tmp1, cnt1, loopThreshold);
__ eor(v0, __ T16B, v0, v4);
__ eor(v1, __ T16B, v1, v5);
__ eor(v2, __ T16B, v2, v6);
__ eor(v3, __ T16B, v3, v7);
__ orr(v0, __ T16B, v0, v1);
--- 3875,3885 ----
__ prfm(Address(a2, SoftwarePrefetchHintDistance));
}
__ ld1(v0, v1, v2, v3, __ T2D, Address(__ post(a1, 4 * 2 * wordSize)));
__ sub(cnt1, cnt1, 8 * wordSize);
__ ld1(v4, v5, v6, v7, __ T2D, Address(__ post(a2, 4 * 2 * wordSize)));
! __ cmp(tmp1, cnt1, loopThreshold);
__ eor(v0, __ T16B, v0, v4);
__ eor(v1, __ T16B, v1, v5);
__ eor(v2, __ T16B, v2, v6);
__ eor(v3, __ T16B, v3, v7);
__ orr(v0, __ T16B, v0, v1);
*** 3936,3946 ****
__ cbnz(tmp1, NOT_EQUAL_NO_POP);
__ bind(ALIGNED16);
}
if (UseSIMDForArrayEquals) {
if (SoftwarePrefetchHintDistance >= 0) {
! __ subs(tmp1, cnt1, prefetchLoopThreshold);
__ br(__ LE, NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_simd(prefetchLoopThreshold,
/* prfm = */ true, NOT_EQUAL);
__ cmp(cnt1, nonPrefetchLoopThreshold);
__ br(__ LT, TAIL);
--- 3934,3944 ----
__ cbnz(tmp1, NOT_EQUAL_NO_POP);
__ bind(ALIGNED16);
}
if (UseSIMDForArrayEquals) {
if (SoftwarePrefetchHintDistance >= 0) {
! __ cmp(tmp1, cnt1, prefetchLoopThreshold);
__ br(__ LE, NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_simd(prefetchLoopThreshold,
/* prfm = */ true, NOT_EQUAL);
__ cmp(cnt1, nonPrefetchLoopThreshold);
__ br(__ LT, TAIL);
*** 3949,3959 ****
generate_large_array_equals_loop_simd(nonPrefetchLoopThreshold,
/* prfm = */ false, NOT_EQUAL);
} else {
__ push(spilled_regs, sp);
if (SoftwarePrefetchHintDistance >= 0) {
! __ subs(tmp1, cnt1, prefetchLoopThreshold);
__ br(__ LE, NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_nonsimd(prefetchLoopThreshold,
/* prfm = */ true, NOT_EQUAL);
__ cmp(cnt1, nonPrefetchLoopThreshold);
__ br(__ LT, TAIL);
--- 3947,3957 ----
generate_large_array_equals_loop_simd(nonPrefetchLoopThreshold,
/* prfm = */ false, NOT_EQUAL);
} else {
__ push(spilled_regs, sp);
if (SoftwarePrefetchHintDistance >= 0) {
! __ cmp(tmp1, cnt1, prefetchLoopThreshold);
__ br(__ LE, NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_nonsimd(prefetchLoopThreshold,
/* prfm = */ true, NOT_EQUAL);
__ cmp(cnt1, nonPrefetchLoopThreshold);
__ br(__ LT, TAIL);
*** 4104,4114 ****
__ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from
__ ldr(tmp3, Address(__ post(cnt1, 8)));
if (SoftwarePrefetchHintDistance >= 0) {
! __ cmp(cnt2, prefetchLoopExitCondition);
__ br(__ LT, SMALL_LOOP);
__ bind(LARGE_LOOP_PREFETCH);
__ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
__ mov(tmp4, 2);
__ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
--- 4102,4112 ----
__ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from
__ ldr(tmp3, Address(__ post(cnt1, 8)));
if (SoftwarePrefetchHintDistance >= 0) {
! __ cmp(rscratch2, cnt2, prefetchLoopExitCondition);
__ br(__ LT, SMALL_LOOP);
__ bind(LARGE_LOOP_PREFETCH);
__ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
__ mov(tmp4, 2);
__ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
*** 4121,4131 ****
__ bind(LARGE_LOOP_PREFETCH_REPEAT2);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
__ subs(tmp4, tmp4, 1);
__ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
__ sub(cnt2, cnt2, 64);
! __ cmp(cnt2, prefetchLoopExitCondition);
__ br(__ GE, LARGE_LOOP_PREFETCH);
}
__ cbz(cnt2, LOAD_LAST); // no characters left except last load
__ subs(cnt2, cnt2, 16);
__ br(__ LT, TAIL);
--- 4119,4129 ----
__ bind(LARGE_LOOP_PREFETCH_REPEAT2);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
__ subs(tmp4, tmp4, 1);
__ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
__ sub(cnt2, cnt2, 64);
! __ cmp(rscratch2, cnt2, prefetchLoopExitCondition);
__ br(__ GE, LARGE_LOOP_PREFETCH);
}
__ cbz(cnt2, LOAD_LAST); // no characters left except last load
__ subs(cnt2, cnt2, 16);
__ br(__ LT, TAIL);
*** 4238,4248 ****
__ prfm(Address(str2, SoftwarePrefetchHintDistance));
compare_string_16_bytes_same(DIFF, DIFF2);
compare_string_16_bytes_same(DIFF, DIFF2);
__ sub(cnt2, cnt2, isLL ? 64 : 32);
compare_string_16_bytes_same(DIFF, DIFF2);
! __ cmp(cnt2, largeLoopExitCondition);
compare_string_16_bytes_same(DIFF, DIFF2);
__ br(__ GT, LARGE_LOOP_PREFETCH);
__ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
// less than 16 bytes left?
__ subs(cnt2, cnt2, isLL ? 16 : 8);
--- 4236,4246 ----
__ prfm(Address(str2, SoftwarePrefetchHintDistance));
compare_string_16_bytes_same(DIFF, DIFF2);
compare_string_16_bytes_same(DIFF, DIFF2);
__ sub(cnt2, cnt2, isLL ? 64 : 32);
compare_string_16_bytes_same(DIFF, DIFF2);
! __ cmp(rscratch2, cnt2, largeLoopExitCondition);
compare_string_16_bytes_same(DIFF, DIFF2);
__ br(__ GT, LARGE_LOOP_PREFETCH);
__ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
// less than 16 bytes left?
__ subs(cnt2, cnt2, isLL ? 16 : 8);
*** 4657,4675 ****
__ sub(octetCounter, octetCounter, 2);
__ zip1(v1, __ T16B, v1, v0);
__ zip1(v2, __ T16B, v2, v0);
__ st1(v1, v2, __ T16B, __ post(dst, 32));
__ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
! __ cmp(octetCounter, large_loop_threshold);
__ br(__ LE, LOOP_START);
__ b(LOOP_PRFM_START);
__ bind(LOOP_PRFM);
__ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
__ bind(LOOP_PRFM_START);
__ prfm(Address(src, SoftwarePrefetchHintDistance));
__ sub(octetCounter, octetCounter, 8);
! __ cmp(octetCounter, large_loop_threshold);
inflate_and_store_2_fp_registers(true, v3, v4);
inflate_and_store_2_fp_registers(true, v5, v6);
__ br(__ GT, LOOP_PRFM);
__ cmp(octetCounter, 8);
__ br(__ LT, DONE);
--- 4655,4673 ----
__ sub(octetCounter, octetCounter, 2);
__ zip1(v1, __ T16B, v1, v0);
__ zip1(v2, __ T16B, v2, v0);
__ st1(v1, v2, __ T16B, __ post(dst, 32));
__ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
! __ cmp(rscratch1, octetCounter, large_loop_threshold);
__ br(__ LE, LOOP_START);
__ b(LOOP_PRFM_START);
__ bind(LOOP_PRFM);
__ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
__ bind(LOOP_PRFM_START);
__ prfm(Address(src, SoftwarePrefetchHintDistance));
__ sub(octetCounter, octetCounter, 8);
! __ cmp(rscratch1, octetCounter, large_loop_threshold);
inflate_and_store_2_fp_registers(true, v3, v4);
inflate_and_store_2_fp_registers(true, v5, v6);
__ br(__ GT, LOOP_PRFM);
__ cmp(octetCounter, 8);
__ br(__ LT, DONE);
< prev index next >