< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page

        

*** 3849,3861 **** __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize))); __ eor(tmp5, tmp5, tmp6); __ sub(cnt1, cnt1, 8 * wordSize); __ eor(tmp7, tmp7, tmp8); __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize))); ! // tmp6 is not used. MacroAssembler::subs is used here (rather than ! // cmp) because subs allows an unlimited range of immediate operand. ! __ subs(tmp6, cnt1, loopThreshold); __ orr(tmp5, tmp5, tmp7); __ cbnz(tmp5, NOT_EQUAL); __ br(__ GE, LOOP); // post-loop __ eor(tmp1, tmp1, tmp2); --- 3849,3859 ---- __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize))); __ eor(tmp5, tmp5, tmp6); __ sub(cnt1, cnt1, 8 * wordSize); __ eor(tmp7, tmp7, tmp8); __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize))); ! __ cmp(tmp6, cnt1, loopThreshold); __ orr(tmp5, tmp5, tmp7); __ cbnz(tmp5, NOT_EQUAL); __ br(__ GE, LOOP); // post-loop __ eor(tmp1, tmp1, tmp2);
*** 3877,3887 **** __ prfm(Address(a2, SoftwarePrefetchHintDistance)); } __ ld1(v0, v1, v2, v3, __ T2D, Address(__ post(a1, 4 * 2 * wordSize))); __ sub(cnt1, cnt1, 8 * wordSize); __ ld1(v4, v5, v6, v7, __ T2D, Address(__ post(a2, 4 * 2 * wordSize))); ! __ subs(tmp1, cnt1, loopThreshold); __ eor(v0, __ T16B, v0, v4); __ eor(v1, __ T16B, v1, v5); __ eor(v2, __ T16B, v2, v6); __ eor(v3, __ T16B, v3, v7); __ orr(v0, __ T16B, v0, v1); --- 3875,3885 ---- __ prfm(Address(a2, SoftwarePrefetchHintDistance)); } __ ld1(v0, v1, v2, v3, __ T2D, Address(__ post(a1, 4 * 2 * wordSize))); __ sub(cnt1, cnt1, 8 * wordSize); __ ld1(v4, v5, v6, v7, __ T2D, Address(__ post(a2, 4 * 2 * wordSize))); ! __ cmp(tmp1, cnt1, loopThreshold); __ eor(v0, __ T16B, v0, v4); __ eor(v1, __ T16B, v1, v5); __ eor(v2, __ T16B, v2, v6); __ eor(v3, __ T16B, v3, v7); __ orr(v0, __ T16B, v0, v1);
*** 3936,3946 **** __ cbnz(tmp1, NOT_EQUAL_NO_POP); __ bind(ALIGNED16); } if (UseSIMDForArrayEquals) { if (SoftwarePrefetchHintDistance >= 0) { ! __ subs(tmp1, cnt1, prefetchLoopThreshold); __ br(__ LE, NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_simd(prefetchLoopThreshold, /* prfm = */ true, NOT_EQUAL); __ cmp(cnt1, nonPrefetchLoopThreshold); __ br(__ LT, TAIL); --- 3934,3944 ---- __ cbnz(tmp1, NOT_EQUAL_NO_POP); __ bind(ALIGNED16); } if (UseSIMDForArrayEquals) { if (SoftwarePrefetchHintDistance >= 0) { ! __ cmp(tmp1, cnt1, prefetchLoopThreshold); __ br(__ LE, NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_simd(prefetchLoopThreshold, /* prfm = */ true, NOT_EQUAL); __ cmp(cnt1, nonPrefetchLoopThreshold); __ br(__ LT, TAIL);
*** 3949,3959 **** generate_large_array_equals_loop_simd(nonPrefetchLoopThreshold, /* prfm = */ false, NOT_EQUAL); } else { __ push(spilled_regs, sp); if (SoftwarePrefetchHintDistance >= 0) { ! __ subs(tmp1, cnt1, prefetchLoopThreshold); __ br(__ LE, NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_nonsimd(prefetchLoopThreshold, /* prfm = */ true, NOT_EQUAL); __ cmp(cnt1, nonPrefetchLoopThreshold); __ br(__ LT, TAIL); --- 3947,3957 ---- generate_large_array_equals_loop_simd(nonPrefetchLoopThreshold, /* prfm = */ false, NOT_EQUAL); } else { __ push(spilled_regs, sp); if (SoftwarePrefetchHintDistance >= 0) { ! __ cmp(tmp1, cnt1, prefetchLoopThreshold); __ br(__ LE, NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_nonsimd(prefetchLoopThreshold, /* prfm = */ true, NOT_EQUAL); __ cmp(cnt1, nonPrefetchLoopThreshold); __ br(__ LT, TAIL);
*** 4104,4114 **** __ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from __ ldr(tmp3, Address(__ post(cnt1, 8))); if (SoftwarePrefetchHintDistance >= 0) { ! __ cmp(cnt2, prefetchLoopExitCondition); __ br(__ LT, SMALL_LOOP); __ bind(LARGE_LOOP_PREFETCH); __ prfm(Address(tmp2, SoftwarePrefetchHintDistance)); __ mov(tmp4, 2); __ prfm(Address(cnt1, SoftwarePrefetchHintDistance)); --- 4102,4112 ---- __ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from __ ldr(tmp3, Address(__ post(cnt1, 8))); if (SoftwarePrefetchHintDistance >= 0) { ! __ cmp(rscratch2, cnt2, prefetchLoopExitCondition); __ br(__ LT, SMALL_LOOP); __ bind(LARGE_LOOP_PREFETCH); __ prfm(Address(tmp2, SoftwarePrefetchHintDistance)); __ mov(tmp4, 2); __ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
*** 4121,4131 **** __ bind(LARGE_LOOP_PREFETCH_REPEAT2); compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); __ subs(tmp4, tmp4, 1); __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2); __ sub(cnt2, cnt2, 64); ! __ cmp(cnt2, prefetchLoopExitCondition); __ br(__ GE, LARGE_LOOP_PREFETCH); } __ cbz(cnt2, LOAD_LAST); // no characters left except last load __ subs(cnt2, cnt2, 16); __ br(__ LT, TAIL); --- 4119,4129 ---- __ bind(LARGE_LOOP_PREFETCH_REPEAT2); compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); __ subs(tmp4, tmp4, 1); __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2); __ sub(cnt2, cnt2, 64); ! __ cmp(rscratch2, cnt2, prefetchLoopExitCondition); __ br(__ GE, LARGE_LOOP_PREFETCH); } __ cbz(cnt2, LOAD_LAST); // no characters left except last load __ subs(cnt2, cnt2, 16); __ br(__ LT, TAIL);
*** 4238,4248 **** __ prfm(Address(str2, SoftwarePrefetchHintDistance)); compare_string_16_bytes_same(DIFF, DIFF2); compare_string_16_bytes_same(DIFF, DIFF2); __ sub(cnt2, cnt2, isLL ? 64 : 32); compare_string_16_bytes_same(DIFF, DIFF2); ! __ cmp(cnt2, largeLoopExitCondition); compare_string_16_bytes_same(DIFF, DIFF2); __ br(__ GT, LARGE_LOOP_PREFETCH); __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left? // less than 16 bytes left? __ subs(cnt2, cnt2, isLL ? 16 : 8); --- 4236,4246 ---- __ prfm(Address(str2, SoftwarePrefetchHintDistance)); compare_string_16_bytes_same(DIFF, DIFF2); compare_string_16_bytes_same(DIFF, DIFF2); __ sub(cnt2, cnt2, isLL ? 64 : 32); compare_string_16_bytes_same(DIFF, DIFF2); ! __ cmp(rscratch2, cnt2, largeLoopExitCondition); compare_string_16_bytes_same(DIFF, DIFF2); __ br(__ GT, LARGE_LOOP_PREFETCH); __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left? // less than 16 bytes left? __ subs(cnt2, cnt2, isLL ? 16 : 8);
*** 4657,4675 **** __ sub(octetCounter, octetCounter, 2); __ zip1(v1, __ T16B, v1, v0); __ zip1(v2, __ T16B, v2, v0); __ st1(v1, v2, __ T16B, __ post(dst, 32)); __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64))); ! __ cmp(octetCounter, large_loop_threshold); __ br(__ LE, LOOP_START); __ b(LOOP_PRFM_START); __ bind(LOOP_PRFM); __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64))); __ bind(LOOP_PRFM_START); __ prfm(Address(src, SoftwarePrefetchHintDistance)); __ sub(octetCounter, octetCounter, 8); ! __ cmp(octetCounter, large_loop_threshold); inflate_and_store_2_fp_registers(true, v3, v4); inflate_and_store_2_fp_registers(true, v5, v6); __ br(__ GT, LOOP_PRFM); __ cmp(octetCounter, 8); __ br(__ LT, DONE); --- 4655,4673 ---- __ sub(octetCounter, octetCounter, 2); __ zip1(v1, __ T16B, v1, v0); __ zip1(v2, __ T16B, v2, v0); __ st1(v1, v2, __ T16B, __ post(dst, 32)); __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64))); ! __ cmp(rscratch1, octetCounter, large_loop_threshold); __ br(__ LE, LOOP_START); __ b(LOOP_PRFM_START); __ bind(LOOP_PRFM); __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64))); __ bind(LOOP_PRFM_START); __ prfm(Address(src, SoftwarePrefetchHintDistance)); __ sub(octetCounter, octetCounter, 8); ! __ cmp(rscratch1, octetCounter, large_loop_threshold); inflate_and_store_2_fp_registers(true, v3, v4); inflate_and_store_2_fp_registers(true, v5, v6); __ br(__ GT, LOOP_PRFM); __ cmp(octetCounter, 8); __ br(__ LT, DONE);
< prev index next >