< prev index next >
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Print this page
rev 60623 : 8248500: AArch64: Remove the r18 dependency on Windows AArch64
Reviewed-by:
Contributed-by: mbeckwit, luhenry, burban
*** 1085,1095 ****
// <= 96 bytes do inline. Direction doesn't matter because we always
// load all the data before writing anything
Label copy4, copy8, copy16, copy32, copy80, copy_big, finish;
const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
! const Register send = r17, dend = r18;
if (PrefetchCopyIntervalInBytes > 0)
__ prfm(Address(s, 0), PLDL1KEEP);
__ cmp(count, u1((UseSIMDForMemoryOps ? 96:80)/granularity));
__ br(Assembler::HI, copy_big);
--- 1085,1095 ----
// <= 96 bytes do inline. Direction doesn't matter because we always
// load all the data before writing anything
Label copy4, copy8, copy16, copy32, copy80, copy_big, finish;
const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
! const Register send = r17, dend = r16;
if (PrefetchCopyIntervalInBytes > 0)
__ prfm(Address(s, 0), PLDL1KEEP);
__ cmp(count, u1((UseSIMDForMemoryOps ? 96:80)/granularity));
__ br(Assembler::HI, copy_big);
*** 1275,1289 ****
}
void clobber_registers() {
#ifdef ASSERT
__ mov(rscratch1, (uint64_t)0xdeadbeef);
__ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32);
! for (Register r = r3; r <= r18; r++)
! if (r != rscratch1) __ mov(r, rscratch1);
#endif
}
// Scan over array at a for count oops, verifying each one.
// Preserves a and count, clobbers rscratch1 and rscratch2.
void verify_oop_array (size_t size, Register a, Register count, Register temp) {
--- 1275,1293 ----
}
void clobber_registers() {
#ifdef ASSERT
+ RegSet clobbered
+ = MacroAssembler::call_clobbered_registers() - rscratch1;
__ mov(rscratch1, (uint64_t)0xdeadbeef);
__ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32);
! for (RegSetIterator it = clobbered.begin(); *it != noreg; ++it) {
! __ mov(*it, rscratch1);
! }
#endif
+
}
// Scan over array at a for count oops, verifying each one.
// Preserves a and count, clobbers rscratch1 and rscratch2.
void verify_oop_array (size_t size, Register a, Register count, Register temp) {
*** 1712,1725 ****
const Register ckval = c_rarg4; // super_klass
RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4);
RegSet wb_post_saved_regs = RegSet::of(count);
! // Registers used as temps (r18, r19, r20 are save-on-entry)
const Register count_save = r21; // orig elementscount
const Register start_to = r20; // destination array start address
- const Register copied_oop = r18; // actual oop copied
const Register r19_klass = r19; // oop._klass
//---------------------------------------------------------------
// Assembler stub will be used for this call to arraycopy
// if the two arrays are subtypes of Object[] but the
--- 1716,1729 ----
const Register ckval = c_rarg4; // super_klass
RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4);
RegSet wb_post_saved_regs = RegSet::of(count);
! // Registers used as temps (r19, r20, r21, r22 are save-on-entry)
! const Register copied_oop = r22; // actual oop copied
const Register count_save = r21; // orig elementscount
const Register start_to = r20; // destination array start address
const Register r19_klass = r19; // oop._klass
//---------------------------------------------------------------
// Assembler stub will be used for this call to arraycopy
// if the two arrays are subtypes of Object[] but the
*** 1752,1763 ****
BLOCK_COMMENT("Entry:");
}
// Empty array: Nothing to do.
__ cbz(count, L_done);
!
! __ push(RegSet::of(r18, r19, r20, r21), sp);
#ifdef ASSERT
BLOCK_COMMENT("assert consistent ckoff/ckval");
// The ckoff and ckval must be mutually consistent,
// even though caller generates both.
--- 1756,1766 ----
BLOCK_COMMENT("Entry:");
}
// Empty array: Nothing to do.
__ cbz(count, L_done);
! __ push(RegSet::of(r19, r20, r21, r22), sp);
#ifdef ASSERT
BLOCK_COMMENT("assert consistent ckoff/ckval");
// The ckoff and ckval must be mutually consistent,
// even though caller generates both.
*** 1822,1832 ****
__ BIND(L_do_card_marks);
bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, rscratch1, wb_post_saved_regs);
__ bind(L_done_pop);
! __ pop(RegSet::of(r18, r19, r20, r21), sp);
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
__ bind(L_done);
__ mov(r0, count);
__ leave();
--- 1825,1835 ----
__ BIND(L_do_card_marks);
bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, rscratch1, wb_post_saved_regs);
__ bind(L_done_pop);
! __ pop(RegSet::of(r19, r20, r21, r22), sp);
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
__ bind(L_done);
__ mov(r0, count);
__ leave();
*** 1999,2009 ****
__ tbnz(dst_pos, 31, L_failed); // i.e. sign bit set
// registers used as temp
const Register scratch_length = r16; // elements count to copy
const Register scratch_src_klass = r17; // array klass
! const Register lh = r18; // layout helper
// if (length < 0) return -1;
__ movw(scratch_length, length); // length (elements count, 32-bits value)
__ tbnz(scratch_length, 31, L_failed); // i.e. sign bit set
--- 2002,2012 ----
__ tbnz(dst_pos, 31, L_failed); // i.e. sign bit set
// registers used as temp
const Register scratch_length = r16; // elements count to copy
const Register scratch_src_klass = r17; // array klass
! const Register lh = r15; // layout helper
// if (length < 0) return -1;
__ movw(scratch_length, length); // length (elements count, 32-bits value)
__ tbnz(scratch_length, 31, L_failed); // i.e. sign bit set
*** 2070,2080 ****
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
//
const Register rscratch1_offset = rscratch1; // array offset
! const Register r18_elsize = lh; // element size
__ ubfx(rscratch1_offset, lh, Klass::_lh_header_size_shift,
exact_log2(Klass::_lh_header_size_mask+1)); // array_offset
__ add(src, src, rscratch1_offset); // src array offset
__ add(dst, dst, rscratch1_offset); // dst array offset
--- 2073,2083 ----
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
//
const Register rscratch1_offset = rscratch1; // array offset
! const Register r15_elsize = lh; // element size
__ ubfx(rscratch1_offset, lh, Klass::_lh_header_size_shift,
exact_log2(Klass::_lh_header_size_mask+1)); // array_offset
__ add(src, src, rscratch1_offset); // src array offset
__ add(dst, dst, rscratch1_offset); // dst array offset
*** 2091,2102 ****
assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
// The possible values of elsize are 0-3, i.e. exact_log2(element
// size in bytes). We do a simple bitwise binary search.
__ BIND(L_copy_bytes);
! __ tbnz(r18_elsize, 1, L_copy_ints);
! __ tbnz(r18_elsize, 0, L_copy_shorts);
__ lea(from, Address(src, src_pos));// src_addr
__ lea(to, Address(dst, dst_pos));// dst_addr
__ movw(count, scratch_length); // length
__ b(RuntimeAddress(byte_copy_entry));
--- 2094,2105 ----
assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
// The possible values of elsize are 0-3, i.e. exact_log2(element
// size in bytes). We do a simple bitwise binary search.
__ BIND(L_copy_bytes);
! __ tbnz(r15_elsize, 1, L_copy_ints);
! __ tbnz(r15_elsize, 0, L_copy_shorts);
__ lea(from, Address(src, src_pos));// src_addr
__ lea(to, Address(dst, dst_pos));// dst_addr
__ movw(count, scratch_length); // length
__ b(RuntimeAddress(byte_copy_entry));
*** 2105,2127 ****
__ lea(to, Address(dst, dst_pos, Address::lsl(1)));// dst_addr
__ movw(count, scratch_length); // length
__ b(RuntimeAddress(short_copy_entry));
__ BIND(L_copy_ints);
! __ tbnz(r18_elsize, 0, L_copy_longs);
__ lea(from, Address(src, src_pos, Address::lsl(2)));// src_addr
__ lea(to, Address(dst, dst_pos, Address::lsl(2)));// dst_addr
__ movw(count, scratch_length); // length
__ b(RuntimeAddress(int_copy_entry));
__ BIND(L_copy_longs);
#ifdef ASSERT
{
BLOCK_COMMENT("assert long copy {");
Label L;
! __ andw(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> r18_elsize
! __ cmpw(r18_elsize, LogBytesPerLong);
__ br(Assembler::EQ, L);
__ stop("must be long copy, but elsize is wrong");
__ bind(L);
BLOCK_COMMENT("} assert long copy done");
}
--- 2108,2130 ----
__ lea(to, Address(dst, dst_pos, Address::lsl(1)));// dst_addr
__ movw(count, scratch_length); // length
__ b(RuntimeAddress(short_copy_entry));
__ BIND(L_copy_ints);
! __ tbnz(r15_elsize, 0, L_copy_longs);
__ lea(from, Address(src, src_pos, Address::lsl(2)));// src_addr
__ lea(to, Address(dst, dst_pos, Address::lsl(2)));// dst_addr
__ movw(count, scratch_length); // length
__ b(RuntimeAddress(int_copy_entry));
__ BIND(L_copy_longs);
#ifdef ASSERT
{
BLOCK_COMMENT("assert long copy {");
Label L;
! __ andw(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> r15_elsize
! __ cmpw(r15_elsize, LogBytesPerLong);
__ br(Assembler::EQ, L);
__ stop("must be long copy, but elsize is wrong");
__ bind(L);
BLOCK_COMMENT("} assert long copy done");
}
*** 2135,2146 ****
__ BIND(L_objArray);
// live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos]
Label L_plain_copy, L_checkcast_copy;
// test array classes for subtyping
! __ load_klass(r18, dst);
! __ cmp(scratch_src_klass, r18); // usual case is exact equality
__ br(Assembler::NE, L_checkcast_copy);
// Identically typed arrays can be copied without element-wise checks.
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
rscratch2, L_failed);
--- 2138,2149 ----
__ BIND(L_objArray);
// live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos]
Label L_plain_copy, L_checkcast_copy;
// test array classes for subtyping
! __ load_klass(r15, dst);
! __ cmp(scratch_src_klass, r15); // usual case is exact equality
__ br(Assembler::NE, L_checkcast_copy);
// Identically typed arrays can be copied without element-wise checks.
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
rscratch2, L_failed);
*** 2152,2172 ****
__ movw(count, scratch_length); // length
__ BIND(L_plain_copy);
__ b(RuntimeAddress(oop_copy_entry));
__ BIND(L_checkcast_copy);
! // live at this point: scratch_src_klass, scratch_length, r18 (dst_klass)
{
// Before looking at dst.length, make sure dst is also an objArray.
! __ ldrw(rscratch1, Address(r18, lh_offset));
__ movw(rscratch2, objArray_lh);
__ eorw(rscratch1, rscratch1, rscratch2);
__ cbnzw(rscratch1, L_failed);
// It is safe to examine both src.length and dst.length.
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
! r18, L_failed);
__ load_klass(dst_klass, dst); // reload
// Marshal the base address arguments now, freeing registers.
__ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
--- 2155,2175 ----
__ movw(count, scratch_length); // length
__ BIND(L_plain_copy);
__ b(RuntimeAddress(oop_copy_entry));
__ BIND(L_checkcast_copy);
! // live at this point: scratch_src_klass, scratch_length, r15 (dst_klass)
{
// Before looking at dst.length, make sure dst is also an objArray.
! __ ldrw(rscratch1, Address(r15, lh_offset));
__ movw(rscratch2, objArray_lh);
__ eorw(rscratch1, rscratch1, rscratch2);
__ cbnzw(rscratch1, L_failed);
// It is safe to examine both src.length and dst.length.
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
! r15, L_failed);
__ load_klass(dst_klass, dst); // reload
// Marshal the base address arguments now, freeing registers.
__ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
*** 5053,5098 ****
MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
: MacroAssembler(as->code()), _squaring(squaring) {
// Register allocation
! Register reg = c_rarg0;
! Pa_base = reg; // Argument registers
if (squaring)
Pb_base = Pa_base;
else
! Pb_base = ++reg;
! Pn_base = ++reg;
! Rlen= ++reg;
! inv = ++reg;
! Pm_base = ++reg;
// Working registers:
! Ra = ++reg; // The current digit of a, b, n, and m.
! Rb = ++reg;
! Rm = ++reg;
! Rn = ++reg;
!
! Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m.
! Pb = ++reg;
! Pm = ++reg;
! Pn = ++reg;
!
! t0 = ++reg; // Three registers which form a
! t1 = ++reg; // triple-precision accumuator.
! t2 = ++reg;
!
! Ri = ++reg; // Inner and outer loop indexes.
! Rj = ++reg;
!
! Rhi_ab = ++reg; // Product registers: low and high parts
! Rlo_ab = ++reg; // of a*b and m*n.
! Rhi_mn = ++reg;
! Rlo_mn = ++reg;
// r19 and up are callee-saved.
! _toSave = RegSet::range(r19, reg) + Pm_base;
}
private:
void save_regs() {
push(_toSave, sp);
--- 5056,5101 ----
MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
: MacroAssembler(as->code()), _squaring(squaring) {
// Register allocation
! RegSetIterator regs = (RegSet::range(r0, r26) - r18_tls).begin();
! Pa_base = *regs; // Argument registers
if (squaring)
Pb_base = Pa_base;
else
! Pb_base = *++regs;
! Pn_base = *++regs;
! Rlen= *++regs;
! inv = *++regs;
! Pm_base = *++regs;
// Working registers:
! Ra = *++regs; // The current digit of a, b, n, and m.
! Rb = *++regs;
! Rm = *++regs;
! Rn = *++regs;
!
! Pa = *++regs; // Pointers to the current/next digit of a, b, n, and m.
! Pb = *++regs;
! Pm = *++regs;
! Pn = *++regs;
!
! t0 = *++regs; // Three registers which form a
! t1 = *++regs; // triple-precision accumuator.
! t2 = *++regs;
!
! Ri = *++regs; // Inner and outer loop indexes.
! Rj = *++regs;
!
! Rhi_ab = *++regs; // Product registers: low and high parts
! Rlo_ab = *++regs; // of a*b and m*n.
! Rhi_mn = *++regs;
! Rlo_mn = *++regs;
// r19 and up are callee-saved.
! _toSave = RegSet::range(r19, *regs) + Pm_base;
}
private:
void save_regs() {
push(_toSave, sp);
< prev index next >