< prev index next >
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Print this page
*** 263,273 ****
#ifdef ASSERT
// make sure we have no pending exceptions
{
Label L;
__ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
! __ cmp(rscratch1, (unsigned)NULL_WORD);
__ br(Assembler::EQ, L);
__ stop("StubRoutines::call_stub: entered with pending exception");
__ BIND(L);
}
#endif
--- 263,273 ----
#ifdef ASSERT
// make sure we have no pending exceptions
{
Label L;
__ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
! __ cmp(rscratch1, (u1)NULL_WORD);
__ br(Assembler::EQ, L);
__ stop("StubRoutines::call_stub: entered with pending exception");
__ BIND(L);
}
#endif
*** 320,336 ****
// n.b. this assumes Java returns an integral result in r0
// and a floating result in j_farg0
__ ldr(j_rarg2, result);
Label is_long, is_float, is_double, exit;
__ ldr(j_rarg1, result_type);
! __ cmp(j_rarg1, T_OBJECT);
__ br(Assembler::EQ, is_long);
! __ cmp(j_rarg1, T_LONG);
__ br(Assembler::EQ, is_long);
! __ cmp(j_rarg1, T_FLOAT);
__ br(Assembler::EQ, is_float);
! __ cmp(j_rarg1, T_DOUBLE);
__ br(Assembler::EQ, is_double);
// handle T_INT case
__ strw(r0, Address(j_rarg2));
--- 320,336 ----
// n.b. this assumes Java returns an integral result in r0
// and a floating result in j_farg0
__ ldr(j_rarg2, result);
Label is_long, is_float, is_double, exit;
__ ldr(j_rarg1, result_type);
! __ cmp(j_rarg1, (u1)T_OBJECT);
__ br(Assembler::EQ, is_long);
! __ cmp(j_rarg1, (u1)T_LONG);
__ br(Assembler::EQ, is_long);
! __ cmp(j_rarg1, (u1)T_FLOAT);
__ br(Assembler::EQ, is_float);
! __ cmp(j_rarg1, (u1)T_DOUBLE);
__ br(Assembler::EQ, is_double);
// handle T_INT case
__ strw(r0, Address(j_rarg2));
*** 741,751 ****
#ifdef ASSERT
// Make sure we are never given < 8 words
{
Label L;
! __ cmp(count, 8);
__ br(Assembler::GE, L);
__ stop("genrate_copy_longs called with < 8 words");
__ bind(L);
}
#endif
--- 741,751 ----
#ifdef ASSERT
// Make sure we are never given < 8 words
{
Label L;
! __ cmp(count, (u1)8);
__ br(Assembler::GE, L);
__ stop("genrate_copy_longs called with < 8 words");
__ bind(L);
}
#endif
*** 1101,1123 ****
const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
const Register send = r17, dend = r18;
if (PrefetchCopyIntervalInBytes > 0)
__ prfm(Address(s, 0), PLDL1KEEP);
! __ cmp(count, (UseSIMDForMemoryOps ? 96:80)/granularity);
__ br(Assembler::HI, copy_big);
__ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
__ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
! __ cmp(count, 16/granularity);
__ br(Assembler::LS, copy16);
! __ cmp(count, 64/granularity);
__ br(Assembler::HI, copy80);
! __ cmp(count, 32/granularity);
__ br(Assembler::LS, copy32);
// 33..64 bytes
if (UseSIMDForMemoryOps) {
__ ldpq(v0, v1, Address(s, 0));
--- 1101,1123 ----
const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
const Register send = r17, dend = r18;
if (PrefetchCopyIntervalInBytes > 0)
__ prfm(Address(s, 0), PLDL1KEEP);
! __ cmp(count, u1((UseSIMDForMemoryOps ? 96:80)/granularity));
__ br(Assembler::HI, copy_big);
__ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
__ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
! __ cmp(count, u1(16/granularity));
__ br(Assembler::LS, copy16);
! __ cmp(count, u1(64/granularity));
__ br(Assembler::HI, copy80);
! __ cmp(count, u1(32/granularity));
__ br(Assembler::LS, copy32);
// 33..64 bytes
if (UseSIMDForMemoryOps) {
__ ldpq(v0, v1, Address(s, 0));
*** 1168,1178 ****
}
__ b(finish);
// 0..16 bytes
__ bind(copy16);
! __ cmp(count, 8/granularity);
__ br(Assembler::LO, copy8);
// 8..16 bytes
__ ldr(t0, Address(s, 0));
__ ldr(t1, Address(send, -8));
--- 1168,1178 ----
}
__ b(finish);
// 0..16 bytes
__ bind(copy16);
! __ cmp(count, u1(8/granularity));
__ br(Assembler::LO, copy8);
// 8..16 bytes
__ ldr(t0, Address(s, 0));
__ ldr(t1, Address(send, -8));
*** 3268,3278 ****
__ ubfx(s2, adler, 16, 16); // s2 = ((adler >> 16) & 0xffff)
__ uxth(s1, adler); // s1 = (adler & 0xffff)
// The pipelined loop needs at least 16 elements for 1 iteration
// It does check this, but it is more effective to skip to the cleanup loop
! __ cmp(len, 16);
__ br(Assembler::HS, L_nmax);
__ cbz(len, L_combine);
__ bind(L_simple_by1_loop);
__ ldrb(temp0, Address(__ post(buff, 1)));
--- 3268,3278 ----
__ ubfx(s2, adler, 16, 16); // s2 = ((adler >> 16) & 0xffff)
__ uxth(s1, adler); // s1 = (adler & 0xffff)
// The pipelined loop needs at least 16 elements for 1 iteration
// It does check this, but it is more effective to skip to the cleanup loop
! __ cmp(len, (u1)16);
__ br(Assembler::HS, L_nmax);
__ cbz(len, L_combine);
__ bind(L_simple_by1_loop);
__ ldrb(temp0, Address(__ post(buff, 1)));
*** 3652,3662 ****
__ eor(result, __ T16B, lo, t0);
}
address generate_has_negatives(address &has_negatives_long) {
StubCodeMark mark(this, "StubRoutines", "has_negatives");
! const int large_loop_size = 64;
const uint64_t UPPER_BIT_MASK=0x8080808080808080;
int dcache_line = VM_Version::dcache_line_size();
Register ary1 = r1, len = r2, result = r0;
--- 3652,3662 ----
__ eor(result, __ T16B, lo, t0);
}
address generate_has_negatives(address &has_negatives_long) {
StubCodeMark mark(this, "StubRoutines", "has_negatives");
! const u1 large_loop_size = 64;
const uint64_t UPPER_BIT_MASK=0x8080808080808080;
int dcache_line = VM_Version::dcache_line_size();
Register ary1 = r1, len = r2, result = r0;
*** 3666,3676 ****
__ enter();
Label RET_TRUE, RET_TRUE_NO_POP, RET_FALSE, ALIGNED, LOOP16, CHECK_16, DONE,
LARGE_LOOP, POST_LOOP16, LEN_OVER_15, LEN_OVER_8, POST_LOOP16_LOAD_TAIL;
! __ cmp(len, 15);
__ br(Assembler::GT, LEN_OVER_15);
// The only case when execution falls into this code is when pointer is near
// the end of memory page and we have to avoid reading next page
__ add(ary1, ary1, len);
__ subs(len, len, 8);
--- 3666,3676 ----
__ enter();
Label RET_TRUE, RET_TRUE_NO_POP, RET_FALSE, ALIGNED, LOOP16, CHECK_16, DONE,
LARGE_LOOP, POST_LOOP16, LEN_OVER_15, LEN_OVER_8, POST_LOOP16_LOAD_TAIL;
! __ cmp(len, (u1)15);
__ br(Assembler::GT, LEN_OVER_15);
// The only case when execution falls into this code is when pointer is near
// the end of memory page and we have to avoid reading next page
__ add(ary1, ary1, len);
__ subs(len, len, 8);
*** 3762,3785 ****
__ br(Assembler::NE, RET_TRUE);
__ cmp(len, large_loop_size);
__ br(Assembler::GE, LARGE_LOOP);
__ bind(CHECK_16); // small 16-byte load pre-loop
! __ cmp(len, 16);
__ br(Assembler::LT, POST_LOOP16);
__ bind(LOOP16); // small 16-byte load loop
__ ldp(tmp2, tmp3, Address(__ post(ary1, 16)));
__ sub(len, len, 16);
__ orr(tmp2, tmp2, tmp3);
__ tst(tmp2, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE);
! __ cmp(len, 16);
__ br(Assembler::GE, LOOP16); // 16-byte load loop end
__ bind(POST_LOOP16); // 16-byte aligned, so we can read unconditionally
! __ cmp(len, 8);
__ br(Assembler::LE, POST_LOOP16_LOAD_TAIL);
__ ldr(tmp3, Address(__ post(ary1, 8)));
__ sub(len, len, 8);
__ tst(tmp3, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE);
--- 3762,3785 ----
__ br(Assembler::NE, RET_TRUE);
__ cmp(len, large_loop_size);
__ br(Assembler::GE, LARGE_LOOP);
__ bind(CHECK_16); // small 16-byte load pre-loop
! __ cmp(len, (u1)16);
__ br(Assembler::LT, POST_LOOP16);
__ bind(LOOP16); // small 16-byte load loop
__ ldp(tmp2, tmp3, Address(__ post(ary1, 16)));
__ sub(len, len, 16);
__ orr(tmp2, tmp2, tmp3);
__ tst(tmp2, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE);
! __ cmp(len, (u1)16);
__ br(Assembler::GE, LOOP16); // 16-byte load loop end
__ bind(POST_LOOP16); // 16-byte aligned, so we can read unconditionally
! __ cmp(len, (u1)8);
__ br(Assembler::LE, POST_LOOP16_LOAD_TAIL);
__ ldr(tmp3, Address(__ post(ary1, 8)));
__ sub(len, len, 8);
__ tst(tmp3, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE);
*** 3940,3950 ****
if (SoftwarePrefetchHintDistance >= 0) {
__ subs(tmp1, cnt1, prefetchLoopThreshold);
__ br(__ LE, NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_simd(prefetchLoopThreshold,
/* prfm = */ true, NOT_EQUAL);
! __ cmp(cnt1, nonPrefetchLoopThreshold);
__ br(__ LT, TAIL);
}
__ bind(NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_simd(nonPrefetchLoopThreshold,
/* prfm = */ false, NOT_EQUAL);
--- 3940,3950 ----
if (SoftwarePrefetchHintDistance >= 0) {
__ subs(tmp1, cnt1, prefetchLoopThreshold);
__ br(__ LE, NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_simd(prefetchLoopThreshold,
/* prfm = */ true, NOT_EQUAL);
! __ subs(zr, cnt1, nonPrefetchLoopThreshold);
__ br(__ LT, TAIL);
}
__ bind(NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_simd(nonPrefetchLoopThreshold,
/* prfm = */ false, NOT_EQUAL);
*** 3953,3963 ****
if (SoftwarePrefetchHintDistance >= 0) {
__ subs(tmp1, cnt1, prefetchLoopThreshold);
__ br(__ LE, NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_nonsimd(prefetchLoopThreshold,
/* prfm = */ true, NOT_EQUAL);
! __ cmp(cnt1, nonPrefetchLoopThreshold);
__ br(__ LT, TAIL);
}
__ bind(NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_nonsimd(nonPrefetchLoopThreshold,
/* prfm = */ false, NOT_EQUAL);
--- 3953,3963 ----
if (SoftwarePrefetchHintDistance >= 0) {
__ subs(tmp1, cnt1, prefetchLoopThreshold);
__ br(__ LE, NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_nonsimd(prefetchLoopThreshold,
/* prfm = */ true, NOT_EQUAL);
! __ subs(zr, cnt1, nonPrefetchLoopThreshold);
__ br(__ LT, TAIL);
}
__ bind(NO_PREFETCH_LARGE_LOOP);
generate_large_array_equals_loop_nonsimd(nonPrefetchLoopThreshold,
/* prfm = */ false, NOT_EQUAL);
*** 4104,4114 ****
__ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from
__ ldr(tmp3, Address(__ post(cnt1, 8)));
if (SoftwarePrefetchHintDistance >= 0) {
! __ cmp(cnt2, prefetchLoopExitCondition);
__ br(__ LT, SMALL_LOOP);
__ bind(LARGE_LOOP_PREFETCH);
__ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
__ mov(tmp4, 2);
__ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
--- 4104,4114 ----
__ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from
__ ldr(tmp3, Address(__ post(cnt1, 8)));
if (SoftwarePrefetchHintDistance >= 0) {
! __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
__ br(__ LT, SMALL_LOOP);
__ bind(LARGE_LOOP_PREFETCH);
__ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
__ mov(tmp4, 2);
__ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
*** 4121,4131 ****
__ bind(LARGE_LOOP_PREFETCH_REPEAT2);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
__ subs(tmp4, tmp4, 1);
__ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
__ sub(cnt2, cnt2, 64);
! __ cmp(cnt2, prefetchLoopExitCondition);
__ br(__ GE, LARGE_LOOP_PREFETCH);
}
__ cbz(cnt2, LOAD_LAST); // no characters left except last load
__ subs(cnt2, cnt2, 16);
__ br(__ LT, TAIL);
--- 4121,4131 ----
__ bind(LARGE_LOOP_PREFETCH_REPEAT2);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
__ subs(tmp4, tmp4, 1);
__ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
__ sub(cnt2, cnt2, 64);
! __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
__ br(__ GE, LARGE_LOOP_PREFETCH);
}
__ cbz(cnt2, LOAD_LAST); // no characters left except last load
__ subs(cnt2, cnt2, 16);
__ br(__ LT, TAIL);
*** 4135,4145 ****
__ bind(SMALL_LOOP_ENTER);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
__ br(__ GE, SMALL_LOOP);
__ cbz(cnt2, LOAD_LAST);
__ bind(TAIL); // 1..15 characters left
! __ cmp(cnt2, -8);
__ br(__ GT, TAIL_LOAD_16);
__ ldrd(vtmp, Address(tmp2));
__ zip1(vtmp3, __ T8B, vtmp, vtmpZ);
__ ldr(tmpU, Address(__ post(cnt1, 8)));
--- 4135,4145 ----
__ bind(SMALL_LOOP_ENTER);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
__ br(__ GE, SMALL_LOOP);
__ cbz(cnt2, LOAD_LAST);
__ bind(TAIL); // 1..15 characters left
! __ subs(zr, cnt2, -8);
__ br(__ GT, TAIL_LOAD_16);
__ ldrd(vtmp, Address(tmp2));
__ zip1(vtmp3, __ T8B, vtmp, vtmpZ);
__ ldr(tmpU, Address(__ post(cnt1, 8)));
*** 4238,4248 ****
__ prfm(Address(str2, SoftwarePrefetchHintDistance));
compare_string_16_bytes_same(DIFF, DIFF2);
compare_string_16_bytes_same(DIFF, DIFF2);
__ sub(cnt2, cnt2, isLL ? 64 : 32);
compare_string_16_bytes_same(DIFF, DIFF2);
! __ cmp(cnt2, largeLoopExitCondition);
compare_string_16_bytes_same(DIFF, DIFF2);
__ br(__ GT, LARGE_LOOP_PREFETCH);
__ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
// less than 16 bytes left?
__ subs(cnt2, cnt2, isLL ? 16 : 8);
--- 4238,4248 ----
__ prfm(Address(str2, SoftwarePrefetchHintDistance));
compare_string_16_bytes_same(DIFF, DIFF2);
compare_string_16_bytes_same(DIFF, DIFF2);
__ sub(cnt2, cnt2, isLL ? 64 : 32);
compare_string_16_bytes_same(DIFF, DIFF2);
! __ subs(rscratch2, cnt2, largeLoopExitCondition);
compare_string_16_bytes_same(DIFF, DIFF2);
__ br(__ GT, LARGE_LOOP_PREFETCH);
__ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
// less than 16 bytes left?
__ subs(cnt2, cnt2, isLL ? 16 : 8);
*** 4414,4424 ****
__ subs(cnt2, cnt2, wordSize/str2_chr_size);
__ add(str2, str2, wordSize);
__ add(result, result, wordSize/str2_chr_size);
__ br(__ GE, L_LOOP);
__ BIND(L_POST_LOOP);
! __ cmp(cnt2, -wordSize/str2_chr_size); // no extra characters to check
__ br(__ LE, NOMATCH);
__ ldr(ch2, Address(str2));
__ sub(cnt2, zr, cnt2, __ LSL, LogBitsPerByte + str2_chr_shift);
__ eor(ch2, first, ch2);
__ sub(tmp2, ch2, tmp1);
--- 4414,4424 ----
__ subs(cnt2, cnt2, wordSize/str2_chr_size);
__ add(str2, str2, wordSize);
__ add(result, result, wordSize/str2_chr_size);
__ br(__ GE, L_LOOP);
__ BIND(L_POST_LOOP);
! __ subs(zr, cnt2, -wordSize/str2_chr_size); // no extra characters to check
__ br(__ LE, NOMATCH);
__ ldr(ch2, Address(str2));
__ sub(cnt2, zr, cnt2, __ LSL, LogBitsPerByte + str2_chr_shift);
__ eor(ch2, first, ch2);
__ sub(tmp2, ch2, tmp1);
*** 4444,4454 ****
__ ands(tmp2, tmp2, tmp4); // clear useless bits and check
__ rbit(tmp2, tmp2);
__ br(__ EQ, NOMATCH);
__ BIND(L_SMALL_HAS_ZERO_LOOP);
__ clz(tmp4, tmp2); // potentially long. Up to 4 cycles on some cpu's
! __ cmp(cnt1, wordSize/str2_chr_size);
__ br(__ LE, L_SMALL_CMP_LOOP_LAST_CMP2);
if (str2_isL) { // LL
__ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); // address of "index"
__ ldr(ch2, Address(str2)); // read whole register of str2. Safe.
__ lslv(tmp2, tmp2, tmp4); // shift off leading zeroes from match info
--- 4444,4454 ----
__ ands(tmp2, tmp2, tmp4); // clear useless bits and check
__ rbit(tmp2, tmp2);
__ br(__ EQ, NOMATCH);
__ BIND(L_SMALL_HAS_ZERO_LOOP);
__ clz(tmp4, tmp2); // potentially long. Up to 4 cycles on some cpu's
! __ cmp(cnt1, u1(wordSize/str2_chr_size));
__ br(__ LE, L_SMALL_CMP_LOOP_LAST_CMP2);
if (str2_isL) { // LL
__ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); // address of "index"
__ ldr(ch2, Address(str2)); // read whole register of str2. Safe.
__ lslv(tmp2, tmp2, tmp4); // shift off leading zeroes from match info
*** 4657,4685 ****
__ sub(octetCounter, octetCounter, 2);
__ zip1(v1, __ T16B, v1, v0);
__ zip1(v2, __ T16B, v2, v0);
__ st1(v1, v2, __ T16B, __ post(dst, 32));
__ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
! __ cmp(octetCounter, large_loop_threshold);
__ br(__ LE, LOOP_START);
__ b(LOOP_PRFM_START);
__ bind(LOOP_PRFM);
__ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
__ bind(LOOP_PRFM_START);
__ prfm(Address(src, SoftwarePrefetchHintDistance));
__ sub(octetCounter, octetCounter, 8);
! __ cmp(octetCounter, large_loop_threshold);
inflate_and_store_2_fp_registers(true, v3, v4);
inflate_and_store_2_fp_registers(true, v5, v6);
__ br(__ GT, LOOP_PRFM);
! __ cmp(octetCounter, 8);
__ br(__ LT, DONE);
__ bind(LOOP);
__ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
__ bind(LOOP_START);
__ sub(octetCounter, octetCounter, 8);
! __ cmp(octetCounter, 8);
inflate_and_store_2_fp_registers(false, v3, v4);
inflate_and_store_2_fp_registers(false, v5, v6);
__ br(__ GE, LOOP);
__ bind(DONE);
__ ret(lr);
--- 4657,4685 ----
__ sub(octetCounter, octetCounter, 2);
__ zip1(v1, __ T16B, v1, v0);
__ zip1(v2, __ T16B, v2, v0);
__ st1(v1, v2, __ T16B, __ post(dst, 32));
__ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
! __ subs(rscratch1, octetCounter, large_loop_threshold);
__ br(__ LE, LOOP_START);
__ b(LOOP_PRFM_START);
__ bind(LOOP_PRFM);
__ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
__ bind(LOOP_PRFM_START);
__ prfm(Address(src, SoftwarePrefetchHintDistance));
__ sub(octetCounter, octetCounter, 8);
! __ subs(rscratch1, octetCounter, large_loop_threshold);
inflate_and_store_2_fp_registers(true, v3, v4);
inflate_and_store_2_fp_registers(true, v5, v6);
__ br(__ GT, LOOP_PRFM);
! __ cmp(octetCounter, (u1)8);
__ br(__ LT, DONE);
__ bind(LOOP);
__ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
__ bind(LOOP_START);
__ sub(octetCounter, octetCounter, 8);
! __ cmp(octetCounter, (u1)8);
inflate_and_store_2_fp_registers(false, v3, v4);
inflate_and_store_2_fp_registers(false, v5, v6);
__ br(__ GE, LOOP);
__ bind(DONE);
__ ret(lr);
*** 5306,5316 ****
#ifndef PRODUCT
// assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
{
ldr(Rn, Address(Pn_base, 0));
mul(Rlo_mn, Rn, inv);
! cmp(Rlo_mn, -1);
Label ok;
br(EQ, ok); {
stop("broken inverse in Montgomery multiply");
} bind(ok);
}
--- 5306,5316 ----
#ifndef PRODUCT
// assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
{
ldr(Rn, Address(Pn_base, 0));
mul(Rlo_mn, Rn, inv);
! subs(zr, Rlo_mn, -1);
Label ok;
br(EQ, ok); {
stop("broken inverse in Montgomery multiply");
} bind(ok);
}
< prev index next >