< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page

        

*** 263,273 **** #ifdef ASSERT // make sure we have no pending exceptions { Label L; __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); ! __ cmp(rscratch1, (unsigned)NULL_WORD); __ br(Assembler::EQ, L); __ stop("StubRoutines::call_stub: entered with pending exception"); __ BIND(L); } #endif --- 263,273 ---- #ifdef ASSERT // make sure we have no pending exceptions { Label L; __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); ! __ cmp(rscratch1, (u1)NULL_WORD); __ br(Assembler::EQ, L); __ stop("StubRoutines::call_stub: entered with pending exception"); __ BIND(L); } #endif
*** 320,336 **** // n.b. this assumes Java returns an integral result in r0 // and a floating result in j_farg0 __ ldr(j_rarg2, result); Label is_long, is_float, is_double, exit; __ ldr(j_rarg1, result_type); ! __ cmp(j_rarg1, T_OBJECT); __ br(Assembler::EQ, is_long); ! __ cmp(j_rarg1, T_LONG); __ br(Assembler::EQ, is_long); ! __ cmp(j_rarg1, T_FLOAT); __ br(Assembler::EQ, is_float); ! __ cmp(j_rarg1, T_DOUBLE); __ br(Assembler::EQ, is_double); // handle T_INT case __ strw(r0, Address(j_rarg2)); --- 320,336 ---- // n.b. this assumes Java returns an integral result in r0 // and a floating result in j_farg0 __ ldr(j_rarg2, result); Label is_long, is_float, is_double, exit; __ ldr(j_rarg1, result_type); ! __ cmp(j_rarg1, (u1)T_OBJECT); __ br(Assembler::EQ, is_long); ! __ cmp(j_rarg1, (u1)T_LONG); __ br(Assembler::EQ, is_long); ! __ cmp(j_rarg1, (u1)T_FLOAT); __ br(Assembler::EQ, is_float); ! __ cmp(j_rarg1, (u1)T_DOUBLE); __ br(Assembler::EQ, is_double); // handle T_INT case __ strw(r0, Address(j_rarg2));
*** 741,751 **** #ifdef ASSERT // Make sure we are never given < 8 words { Label L; ! __ cmp(count, 8); __ br(Assembler::GE, L); __ stop("genrate_copy_longs called with < 8 words"); __ bind(L); } #endif --- 741,751 ---- #ifdef ASSERT // Make sure we are never given < 8 words { Label L; ! __ cmp(count, (u1)8); __ br(Assembler::GE, L); __ stop("genrate_copy_longs called with < 8 words"); __ bind(L); } #endif
*** 1101,1123 **** const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12; const Register send = r17, dend = r18; if (PrefetchCopyIntervalInBytes > 0) __ prfm(Address(s, 0), PLDL1KEEP); ! __ cmp(count, (UseSIMDForMemoryOps ? 96:80)/granularity); __ br(Assembler::HI, copy_big); __ lea(send, Address(s, count, Address::lsl(exact_log2(granularity)))); __ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity)))); ! __ cmp(count, 16/granularity); __ br(Assembler::LS, copy16); ! __ cmp(count, 64/granularity); __ br(Assembler::HI, copy80); ! __ cmp(count, 32/granularity); __ br(Assembler::LS, copy32); // 33..64 bytes if (UseSIMDForMemoryOps) { __ ldpq(v0, v1, Address(s, 0)); --- 1101,1123 ---- const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12; const Register send = r17, dend = r18; if (PrefetchCopyIntervalInBytes > 0) __ prfm(Address(s, 0), PLDL1KEEP); ! __ cmp(count, u1((UseSIMDForMemoryOps ? 96:80)/granularity)); __ br(Assembler::HI, copy_big); __ lea(send, Address(s, count, Address::lsl(exact_log2(granularity)))); __ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity)))); ! __ cmp(count, u1(16/granularity)); __ br(Assembler::LS, copy16); ! __ cmp(count, u1(64/granularity)); __ br(Assembler::HI, copy80); ! __ cmp(count, u1(32/granularity)); __ br(Assembler::LS, copy32); // 33..64 bytes if (UseSIMDForMemoryOps) { __ ldpq(v0, v1, Address(s, 0));
*** 1168,1178 **** } __ b(finish); // 0..16 bytes __ bind(copy16); ! __ cmp(count, 8/granularity); __ br(Assembler::LO, copy8); // 8..16 bytes __ ldr(t0, Address(s, 0)); __ ldr(t1, Address(send, -8)); --- 1168,1178 ---- } __ b(finish); // 0..16 bytes __ bind(copy16); ! __ cmp(count, u1(8/granularity)); __ br(Assembler::LO, copy8); // 8..16 bytes __ ldr(t0, Address(s, 0)); __ ldr(t1, Address(send, -8));
*** 3268,3278 **** __ ubfx(s2, adler, 16, 16); // s2 = ((adler >> 16) & 0xffff) __ uxth(s1, adler); // s1 = (adler & 0xffff) // The pipelined loop needs at least 16 elements for 1 iteration // It does check this, but it is more effective to skip to the cleanup loop ! __ cmp(len, 16); __ br(Assembler::HS, L_nmax); __ cbz(len, L_combine); __ bind(L_simple_by1_loop); __ ldrb(temp0, Address(__ post(buff, 1))); --- 3268,3278 ---- __ ubfx(s2, adler, 16, 16); // s2 = ((adler >> 16) & 0xffff) __ uxth(s1, adler); // s1 = (adler & 0xffff) // The pipelined loop needs at least 16 elements for 1 iteration // It does check this, but it is more effective to skip to the cleanup loop ! __ cmp(len, (u1)16); __ br(Assembler::HS, L_nmax); __ cbz(len, L_combine); __ bind(L_simple_by1_loop); __ ldrb(temp0, Address(__ post(buff, 1)));
*** 3652,3662 **** __ eor(result, __ T16B, lo, t0); } address generate_has_negatives(address &has_negatives_long) { StubCodeMark mark(this, "StubRoutines", "has_negatives"); ! const int large_loop_size = 64; const uint64_t UPPER_BIT_MASK=0x8080808080808080; int dcache_line = VM_Version::dcache_line_size(); Register ary1 = r1, len = r2, result = r0; --- 3652,3662 ---- __ eor(result, __ T16B, lo, t0); } address generate_has_negatives(address &has_negatives_long) { StubCodeMark mark(this, "StubRoutines", "has_negatives"); ! const u1 large_loop_size = 64; const uint64_t UPPER_BIT_MASK=0x8080808080808080; int dcache_line = VM_Version::dcache_line_size(); Register ary1 = r1, len = r2, result = r0;
*** 3666,3676 **** __ enter(); Label RET_TRUE, RET_TRUE_NO_POP, RET_FALSE, ALIGNED, LOOP16, CHECK_16, DONE, LARGE_LOOP, POST_LOOP16, LEN_OVER_15, LEN_OVER_8, POST_LOOP16_LOAD_TAIL; ! __ cmp(len, 15); __ br(Assembler::GT, LEN_OVER_15); // The only case when execution falls into this code is when pointer is near // the end of memory page and we have to avoid reading next page __ add(ary1, ary1, len); __ subs(len, len, 8); --- 3666,3676 ---- __ enter(); Label RET_TRUE, RET_TRUE_NO_POP, RET_FALSE, ALIGNED, LOOP16, CHECK_16, DONE, LARGE_LOOP, POST_LOOP16, LEN_OVER_15, LEN_OVER_8, POST_LOOP16_LOAD_TAIL; ! __ cmp(len, (u1)15); __ br(Assembler::GT, LEN_OVER_15); // The only case when execution falls into this code is when pointer is near // the end of memory page and we have to avoid reading next page __ add(ary1, ary1, len); __ subs(len, len, 8);
*** 3762,3785 **** __ br(Assembler::NE, RET_TRUE); __ cmp(len, large_loop_size); __ br(Assembler::GE, LARGE_LOOP); __ bind(CHECK_16); // small 16-byte load pre-loop ! __ cmp(len, 16); __ br(Assembler::LT, POST_LOOP16); __ bind(LOOP16); // small 16-byte load loop __ ldp(tmp2, tmp3, Address(__ post(ary1, 16))); __ sub(len, len, 16); __ orr(tmp2, tmp2, tmp3); __ tst(tmp2, UPPER_BIT_MASK); __ br(Assembler::NE, RET_TRUE); ! __ cmp(len, 16); __ br(Assembler::GE, LOOP16); // 16-byte load loop end __ bind(POST_LOOP16); // 16-byte aligned, so we can read unconditionally ! __ cmp(len, 8); __ br(Assembler::LE, POST_LOOP16_LOAD_TAIL); __ ldr(tmp3, Address(__ post(ary1, 8))); __ sub(len, len, 8); __ tst(tmp3, UPPER_BIT_MASK); __ br(Assembler::NE, RET_TRUE); --- 3762,3785 ---- __ br(Assembler::NE, RET_TRUE); __ cmp(len, large_loop_size); __ br(Assembler::GE, LARGE_LOOP); __ bind(CHECK_16); // small 16-byte load pre-loop ! __ cmp(len, (u1)16); __ br(Assembler::LT, POST_LOOP16); __ bind(LOOP16); // small 16-byte load loop __ ldp(tmp2, tmp3, Address(__ post(ary1, 16))); __ sub(len, len, 16); __ orr(tmp2, tmp2, tmp3); __ tst(tmp2, UPPER_BIT_MASK); __ br(Assembler::NE, RET_TRUE); ! __ cmp(len, (u1)16); __ br(Assembler::GE, LOOP16); // 16-byte load loop end __ bind(POST_LOOP16); // 16-byte aligned, so we can read unconditionally ! __ cmp(len, (u1)8); __ br(Assembler::LE, POST_LOOP16_LOAD_TAIL); __ ldr(tmp3, Address(__ post(ary1, 8))); __ sub(len, len, 8); __ tst(tmp3, UPPER_BIT_MASK); __ br(Assembler::NE, RET_TRUE);
*** 3940,3950 **** if (SoftwarePrefetchHintDistance >= 0) { __ subs(tmp1, cnt1, prefetchLoopThreshold); __ br(__ LE, NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_simd(prefetchLoopThreshold, /* prfm = */ true, NOT_EQUAL); ! __ cmp(cnt1, nonPrefetchLoopThreshold); __ br(__ LT, TAIL); } __ bind(NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_simd(nonPrefetchLoopThreshold, /* prfm = */ false, NOT_EQUAL); --- 3940,3950 ---- if (SoftwarePrefetchHintDistance >= 0) { __ subs(tmp1, cnt1, prefetchLoopThreshold); __ br(__ LE, NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_simd(prefetchLoopThreshold, /* prfm = */ true, NOT_EQUAL); ! __ subs(zr, cnt1, nonPrefetchLoopThreshold); __ br(__ LT, TAIL); } __ bind(NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_simd(nonPrefetchLoopThreshold, /* prfm = */ false, NOT_EQUAL);
*** 3953,3963 **** if (SoftwarePrefetchHintDistance >= 0) { __ subs(tmp1, cnt1, prefetchLoopThreshold); __ br(__ LE, NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_nonsimd(prefetchLoopThreshold, /* prfm = */ true, NOT_EQUAL); ! __ cmp(cnt1, nonPrefetchLoopThreshold); __ br(__ LT, TAIL); } __ bind(NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_nonsimd(nonPrefetchLoopThreshold, /* prfm = */ false, NOT_EQUAL); --- 3953,3963 ---- if (SoftwarePrefetchHintDistance >= 0) { __ subs(tmp1, cnt1, prefetchLoopThreshold); __ br(__ LE, NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_nonsimd(prefetchLoopThreshold, /* prfm = */ true, NOT_EQUAL); ! __ subs(zr, cnt1, nonPrefetchLoopThreshold); __ br(__ LT, TAIL); } __ bind(NO_PREFETCH_LARGE_LOOP); generate_large_array_equals_loop_nonsimd(nonPrefetchLoopThreshold, /* prfm = */ false, NOT_EQUAL);
*** 4104,4114 **** __ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from __ ldr(tmp3, Address(__ post(cnt1, 8))); if (SoftwarePrefetchHintDistance >= 0) { ! __ cmp(cnt2, prefetchLoopExitCondition); __ br(__ LT, SMALL_LOOP); __ bind(LARGE_LOOP_PREFETCH); __ prfm(Address(tmp2, SoftwarePrefetchHintDistance)); __ mov(tmp4, 2); __ prfm(Address(cnt1, SoftwarePrefetchHintDistance)); --- 4104,4114 ---- __ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from __ ldr(tmp3, Address(__ post(cnt1, 8))); if (SoftwarePrefetchHintDistance >= 0) { ! __ subs(rscratch2, cnt2, prefetchLoopExitCondition); __ br(__ LT, SMALL_LOOP); __ bind(LARGE_LOOP_PREFETCH); __ prfm(Address(tmp2, SoftwarePrefetchHintDistance)); __ mov(tmp4, 2); __ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
*** 4121,4131 **** __ bind(LARGE_LOOP_PREFETCH_REPEAT2); compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); __ subs(tmp4, tmp4, 1); __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2); __ sub(cnt2, cnt2, 64); ! __ cmp(cnt2, prefetchLoopExitCondition); __ br(__ GE, LARGE_LOOP_PREFETCH); } __ cbz(cnt2, LOAD_LAST); // no characters left except last load __ subs(cnt2, cnt2, 16); __ br(__ LT, TAIL); --- 4121,4131 ---- __ bind(LARGE_LOOP_PREFETCH_REPEAT2); compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); __ subs(tmp4, tmp4, 1); __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2); __ sub(cnt2, cnt2, 64); ! __ subs(rscratch2, cnt2, prefetchLoopExitCondition); __ br(__ GE, LARGE_LOOP_PREFETCH); } __ cbz(cnt2, LOAD_LAST); // no characters left except last load __ subs(cnt2, cnt2, 16); __ br(__ LT, TAIL);
*** 4135,4145 **** __ bind(SMALL_LOOP_ENTER); compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); __ br(__ GE, SMALL_LOOP); __ cbz(cnt2, LOAD_LAST); __ bind(TAIL); // 1..15 characters left ! __ cmp(cnt2, -8); __ br(__ GT, TAIL_LOAD_16); __ ldrd(vtmp, Address(tmp2)); __ zip1(vtmp3, __ T8B, vtmp, vtmpZ); __ ldr(tmpU, Address(__ post(cnt1, 8))); --- 4135,4145 ---- __ bind(SMALL_LOOP_ENTER); compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2); __ br(__ GE, SMALL_LOOP); __ cbz(cnt2, LOAD_LAST); __ bind(TAIL); // 1..15 characters left ! __ subs(zr, cnt2, -8); __ br(__ GT, TAIL_LOAD_16); __ ldrd(vtmp, Address(tmp2)); __ zip1(vtmp3, __ T8B, vtmp, vtmpZ); __ ldr(tmpU, Address(__ post(cnt1, 8)));
*** 4238,4248 **** __ prfm(Address(str2, SoftwarePrefetchHintDistance)); compare_string_16_bytes_same(DIFF, DIFF2); compare_string_16_bytes_same(DIFF, DIFF2); __ sub(cnt2, cnt2, isLL ? 64 : 32); compare_string_16_bytes_same(DIFF, DIFF2); ! __ cmp(cnt2, largeLoopExitCondition); compare_string_16_bytes_same(DIFF, DIFF2); __ br(__ GT, LARGE_LOOP_PREFETCH); __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left? // less than 16 bytes left? __ subs(cnt2, cnt2, isLL ? 16 : 8); --- 4238,4248 ---- __ prfm(Address(str2, SoftwarePrefetchHintDistance)); compare_string_16_bytes_same(DIFF, DIFF2); compare_string_16_bytes_same(DIFF, DIFF2); __ sub(cnt2, cnt2, isLL ? 64 : 32); compare_string_16_bytes_same(DIFF, DIFF2); ! __ subs(rscratch2, cnt2, largeLoopExitCondition); compare_string_16_bytes_same(DIFF, DIFF2); __ br(__ GT, LARGE_LOOP_PREFETCH); __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left? // less than 16 bytes left? __ subs(cnt2, cnt2, isLL ? 16 : 8);
*** 4414,4424 **** __ subs(cnt2, cnt2, wordSize/str2_chr_size); __ add(str2, str2, wordSize); __ add(result, result, wordSize/str2_chr_size); __ br(__ GE, L_LOOP); __ BIND(L_POST_LOOP); ! __ cmp(cnt2, -wordSize/str2_chr_size); // no extra characters to check __ br(__ LE, NOMATCH); __ ldr(ch2, Address(str2)); __ sub(cnt2, zr, cnt2, __ LSL, LogBitsPerByte + str2_chr_shift); __ eor(ch2, first, ch2); __ sub(tmp2, ch2, tmp1); --- 4414,4424 ---- __ subs(cnt2, cnt2, wordSize/str2_chr_size); __ add(str2, str2, wordSize); __ add(result, result, wordSize/str2_chr_size); __ br(__ GE, L_LOOP); __ BIND(L_POST_LOOP); ! __ subs(zr, cnt2, -wordSize/str2_chr_size); // no extra characters to check __ br(__ LE, NOMATCH); __ ldr(ch2, Address(str2)); __ sub(cnt2, zr, cnt2, __ LSL, LogBitsPerByte + str2_chr_shift); __ eor(ch2, first, ch2); __ sub(tmp2, ch2, tmp1);
*** 4444,4454 **** __ ands(tmp2, tmp2, tmp4); // clear useless bits and check __ rbit(tmp2, tmp2); __ br(__ EQ, NOMATCH); __ BIND(L_SMALL_HAS_ZERO_LOOP); __ clz(tmp4, tmp2); // potentially long. Up to 4 cycles on some cpu's ! __ cmp(cnt1, wordSize/str2_chr_size); __ br(__ LE, L_SMALL_CMP_LOOP_LAST_CMP2); if (str2_isL) { // LL __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); // address of "index" __ ldr(ch2, Address(str2)); // read whole register of str2. Safe. __ lslv(tmp2, tmp2, tmp4); // shift off leading zeroes from match info --- 4444,4454 ---- __ ands(tmp2, tmp2, tmp4); // clear useless bits and check __ rbit(tmp2, tmp2); __ br(__ EQ, NOMATCH); __ BIND(L_SMALL_HAS_ZERO_LOOP); __ clz(tmp4, tmp2); // potentially long. Up to 4 cycles on some cpu's ! __ cmp(cnt1, u1(wordSize/str2_chr_size)); __ br(__ LE, L_SMALL_CMP_LOOP_LAST_CMP2); if (str2_isL) { // LL __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); // address of "index" __ ldr(ch2, Address(str2)); // read whole register of str2. Safe. __ lslv(tmp2, tmp2, tmp4); // shift off leading zeroes from match info
*** 4657,4685 **** __ sub(octetCounter, octetCounter, 2); __ zip1(v1, __ T16B, v1, v0); __ zip1(v2, __ T16B, v2, v0); __ st1(v1, v2, __ T16B, __ post(dst, 32)); __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64))); ! __ cmp(octetCounter, large_loop_threshold); __ br(__ LE, LOOP_START); __ b(LOOP_PRFM_START); __ bind(LOOP_PRFM); __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64))); __ bind(LOOP_PRFM_START); __ prfm(Address(src, SoftwarePrefetchHintDistance)); __ sub(octetCounter, octetCounter, 8); ! __ cmp(octetCounter, large_loop_threshold); inflate_and_store_2_fp_registers(true, v3, v4); inflate_and_store_2_fp_registers(true, v5, v6); __ br(__ GT, LOOP_PRFM); ! __ cmp(octetCounter, 8); __ br(__ LT, DONE); __ bind(LOOP); __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64))); __ bind(LOOP_START); __ sub(octetCounter, octetCounter, 8); ! __ cmp(octetCounter, 8); inflate_and_store_2_fp_registers(false, v3, v4); inflate_and_store_2_fp_registers(false, v5, v6); __ br(__ GE, LOOP); __ bind(DONE); __ ret(lr); --- 4657,4685 ---- __ sub(octetCounter, octetCounter, 2); __ zip1(v1, __ T16B, v1, v0); __ zip1(v2, __ T16B, v2, v0); __ st1(v1, v2, __ T16B, __ post(dst, 32)); __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64))); ! __ subs(rscratch1, octetCounter, large_loop_threshold); __ br(__ LE, LOOP_START); __ b(LOOP_PRFM_START); __ bind(LOOP_PRFM); __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64))); __ bind(LOOP_PRFM_START); __ prfm(Address(src, SoftwarePrefetchHintDistance)); __ sub(octetCounter, octetCounter, 8); ! __ subs(rscratch1, octetCounter, large_loop_threshold); inflate_and_store_2_fp_registers(true, v3, v4); inflate_and_store_2_fp_registers(true, v5, v6); __ br(__ GT, LOOP_PRFM); ! __ cmp(octetCounter, (u1)8); __ br(__ LT, DONE); __ bind(LOOP); __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64))); __ bind(LOOP_START); __ sub(octetCounter, octetCounter, 8); ! __ cmp(octetCounter, (u1)8); inflate_and_store_2_fp_registers(false, v3, v4); inflate_and_store_2_fp_registers(false, v5, v6); __ br(__ GE, LOOP); __ bind(DONE); __ ret(lr);
*** 5306,5316 **** #ifndef PRODUCT // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); { ldr(Rn, Address(Pn_base, 0)); mul(Rlo_mn, Rn, inv); ! cmp(Rlo_mn, -1); Label ok; br(EQ, ok); { stop("broken inverse in Montgomery multiply"); } bind(ok); } --- 5306,5316 ---- #ifndef PRODUCT // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); { ldr(Rn, Address(Pn_base, 0)); mul(Rlo_mn, Rn, inv); ! subs(zr, Rlo_mn, -1); Label ok; br(EQ, ok); { stop("broken inverse in Montgomery multiply"); } bind(ok); }
< prev index next >