1070
1071 // All-singing all-dancing memory copy.
1072 //
1073 // Copy count units of memory from s to d. The size of a unit is
1074 // step, which can be positive or negative depending on the direction
1075 // of copy. If is_aligned is false, we align the source address.
1076 //
1077
1078 void copy_memory(bool is_aligned, Register s, Register d,
1079 Register count, Register tmp, int step) {
1080 copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
1081 bool is_backwards = step < 0;
1082 int granularity = uabs(step);
1083 const Register t0 = r3, t1 = r4;
1084
1085 // <= 96 bytes do inline. Direction doesn't matter because we always
1086 // load all the data before writing anything
1087 Label copy4, copy8, copy16, copy32, copy80, copy_big, finish;
1088 const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
1089 const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
1090 const Register send = r17, dend = r18;
1091
1092 if (PrefetchCopyIntervalInBytes > 0)
1093 __ prfm(Address(s, 0), PLDL1KEEP);
1094 __ cmp(count, u1((UseSIMDForMemoryOps ? 96:80)/granularity));
1095 __ br(Assembler::HI, copy_big);
1096
1097 __ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
1098 __ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
1099
1100 __ cmp(count, u1(16/granularity));
1101 __ br(Assembler::LS, copy16);
1102
1103 __ cmp(count, u1(64/granularity));
1104 __ br(Assembler::HI, copy80);
1105
1106 __ cmp(count, u1(32/granularity));
1107 __ br(Assembler::LS, copy32);
1108
1109 // 33..64 bytes
1110 if (UseSIMDForMemoryOps) {
1260
1261 // We have a count of units and some trailing bytes. Adjust the
1262 // count and do a bulk copy of words.
1263 __ lsr(rscratch2, count, exact_log2(wordSize/granularity));
1264 if (direction == copy_forwards)
1265 __ bl(copy_f);
1266 else
1267 __ bl(copy_b);
1268
1269 // And the tail.
1270 copy_memory_small(s, d, count, tmp, step);
1271
1272 if (granularity >= 8) __ bind(copy8);
1273 if (granularity >= 4) __ bind(copy4);
1274 __ bind(finish);
1275 }
1276
1277
1278 void clobber_registers() {
1279 #ifdef ASSERT
1280 __ mov(rscratch1, (uint64_t)0xdeadbeef);
1281 __ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32);
1282 for (Register r = r3; r <= r18; r++)
1283 if (r != rscratch1) __ mov(r, rscratch1);
1284 #endif
1285 }
1286
1287 // Scan over array at a for count oops, verifying each one.
1288 // Preserves a and count, clobbers rscratch1 and rscratch2.
1289 void verify_oop_array (size_t size, Register a, Register count, Register temp) {
1290 Label loop, end;
1291 __ mov(rscratch1, a);
1292 __ mov(rscratch2, zr);
1293 __ bind(loop);
1294 __ cmp(rscratch2, count);
1295 __ br(Assembler::HS, end);
1296 if (size == (size_t)wordSize) {
1297 __ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size))));
1298 __ verify_oop(temp);
1299 } else {
1300 __ ldrw(r16, Address(a, rscratch2, Address::lsl(exact_log2(size))));
1301 __ decode_heap_oop(temp); // calls verify_oop
1302 }
1303 __ add(rscratch2, rscratch2, size);
1304 __ b(loop);
1697 //
1698 // Output:
1699 // r0 == 0 - success
1700 // r0 == -1^K - failure, where K is partial transfer count
1701 //
1702 address generate_checkcast_copy(const char *name, address *entry,
1703 bool dest_uninitialized = false) {
1704
1705 Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
1706
1707 // Input registers (after setup_arg_regs)
1708 const Register from = c_rarg0; // source array address
1709 const Register to = c_rarg1; // destination array address
1710 const Register count = c_rarg2; // elementscount
1711 const Register ckoff = c_rarg3; // super_check_offset
1712 const Register ckval = c_rarg4; // super_klass
1713
1714 RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4);
1715 RegSet wb_post_saved_regs = RegSet::of(count);
1716
1717 // Registers used as temps (r18, r19, r20 are save-on-entry)
1718 const Register count_save = r21; // orig elementscount
1719 const Register start_to = r20; // destination array start address
1720 const Register copied_oop = r18; // actual oop copied
1721 const Register r19_klass = r19; // oop._klass
1722
1723 //---------------------------------------------------------------
1724 // Assembler stub will be used for this call to arraycopy
1725 // if the two arrays are subtypes of Object[] but the
1726 // destination array type is not equal to or a supertype
1727 // of the source type. Each element must be separately
1728 // checked.
1729
1730 assert_different_registers(from, to, count, ckoff, ckval, start_to,
1731 copied_oop, r19_klass, count_save);
1732
1733 __ align(CodeEntryAlignment);
1734 StubCodeMark mark(this, "StubRoutines", name);
1735 address start = __ pc();
1736
1737 __ enter(); // required for proper stackwalking of RuntimeStub frame
1738
1739 #ifdef ASSERT
1740 // caller guarantees that the arrays really are different
1741 // otherwise, we would have to make conjoint checks
1742 { Label L;
1743 array_overlap_test(L, TIMES_OOP);
1744 __ stop("checkcast_copy within a single array");
1745 __ bind(L);
1746 }
1747 #endif //ASSERT
1748
1749 // Caller of this entry point must set up the argument registers.
1750 if (entry != NULL) {
1751 *entry = __ pc();
1752 BLOCK_COMMENT("Entry:");
1753 }
1754
1755 // Empty array: Nothing to do.
1756 __ cbz(count, L_done);
1757
1758 __ push(RegSet::of(r18, r19, r20, r21), sp);
1759
1760 #ifdef ASSERT
1761 BLOCK_COMMENT("assert consistent ckoff/ckval");
1762 // The ckoff and ckval must be mutually consistent,
1763 // even though caller generates both.
1764 { Label L;
1765 int sco_offset = in_bytes(Klass::super_check_offset_offset());
1766 __ ldrw(start_to, Address(ckval, sco_offset));
1767 __ cmpw(ckoff, start_to);
1768 __ br(Assembler::EQ, L);
1769 __ stop("super_check_offset inconsistent");
1770 __ bind(L);
1771 }
1772 #endif //ASSERT
1773
1774 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
1775 bool is_oop = true;
1776 if (dest_uninitialized) {
1777 decorators |= IS_DEST_UNINITIALIZED;
1778 }
1807 __ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8), noreg, noreg, AS_RAW); // load the oop
1808 __ cbz(copied_oop, L_store_element);
1809
1810 __ load_klass(r19_klass, copied_oop);// query the object klass
1811 generate_type_check(r19_klass, ckoff, ckval, L_store_element);
1812 // ======== end loop ========
1813
1814 // It was a real error; we must depend on the caller to finish the job.
1815 // Register count = remaining oops, count_orig = total oops.
1816 // Emit GC store barriers for the oops we have copied and report
1817 // their number to the caller.
1818
1819 __ subs(count, count_save, count); // K = partially copied oop count
1820 __ eon(count, count, zr); // report (-1^K) to caller
1821 __ br(Assembler::EQ, L_done_pop);
1822
1823 __ BIND(L_do_card_marks);
1824 bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, rscratch1, wb_post_saved_regs);
1825
1826 __ bind(L_done_pop);
1827 __ pop(RegSet::of(r18, r19, r20, r21), sp);
1828 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
1829
1830 __ bind(L_done);
1831 __ mov(r0, count);
1832 __ leave();
1833 __ ret(lr);
1834
1835 return start;
1836 }
1837
1838 // Perform range checks on the proposed arraycopy.
1839 // Kills temp, but nothing else.
1840 // Also, clean the sign bits of src_pos and dst_pos.
1841 void arraycopy_range_checks(Register src, // source array oop (c_rarg0)
1842 Register src_pos, // source position (c_rarg1)
1843 Register dst, // destination array oo (c_rarg2)
1844 Register dst_pos, // destination position (c_rarg3)
1845 Register length,
1846 Register temp,
1847 Label& L_failed) {
1984 // (6) src and dst should be arrays.
1985 // (7) src_pos + length must not exceed length of src.
1986 // (8) dst_pos + length must not exceed length of dst.
1987 //
1988
1989 // if (src == NULL) return -1;
1990 __ cbz(src, L_failed);
1991
1992 // if (src_pos < 0) return -1;
1993 __ tbnz(src_pos, 31, L_failed); // i.e. sign bit set
1994
1995 // if (dst == NULL) return -1;
1996 __ cbz(dst, L_failed);
1997
1998 // if (dst_pos < 0) return -1;
1999 __ tbnz(dst_pos, 31, L_failed); // i.e. sign bit set
2000
2001 // registers used as temp
2002 const Register scratch_length = r16; // elements count to copy
2003 const Register scratch_src_klass = r17; // array klass
2004 const Register lh = r18; // layout helper
2005
2006 // if (length < 0) return -1;
2007 __ movw(scratch_length, length); // length (elements count, 32-bits value)
2008 __ tbnz(scratch_length, 31, L_failed); // i.e. sign bit set
2009
2010 __ load_klass(scratch_src_klass, src);
2011 #ifdef ASSERT
2012 // assert(src->klass() != NULL);
2013 {
2014 BLOCK_COMMENT("assert klasses not null {");
2015 Label L1, L2;
2016 __ cbnz(scratch_src_klass, L2); // it is broken if klass is NULL
2017 __ bind(L1);
2018 __ stop("broken null klass");
2019 __ bind(L2);
2020 __ load_klass(rscratch1, dst);
2021 __ cbz(rscratch1, L1); // this would be broken also
2022 BLOCK_COMMENT("} assert klasses not null done");
2023 }
2024 #endif
2055 Label L;
2056 __ movw(rscratch2, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2057 __ cmpw(lh, rscratch2);
2058 __ br(Assembler::GE, L);
2059 __ stop("must be a primitive array");
2060 __ bind(L);
2061 BLOCK_COMMENT("} assert primitive array done");
2062 }
2063 #endif
2064
2065 arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2066 rscratch2, L_failed);
2067
2068 // TypeArrayKlass
2069 //
2070 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
2071 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
2072 //
2073
2074 const Register rscratch1_offset = rscratch1; // array offset
2075 const Register r18_elsize = lh; // element size
2076
2077 __ ubfx(rscratch1_offset, lh, Klass::_lh_header_size_shift,
2078 exact_log2(Klass::_lh_header_size_mask+1)); // array_offset
2079 __ add(src, src, rscratch1_offset); // src array offset
2080 __ add(dst, dst, rscratch1_offset); // dst array offset
2081 BLOCK_COMMENT("choose copy loop based on element size");
2082
2083 // next registers should be set before the jump to corresponding stub
2084 const Register from = c_rarg0; // source array address
2085 const Register to = c_rarg1; // destination array address
2086 const Register count = c_rarg2; // elements count
2087
2088 // 'from', 'to', 'count' registers should be set in such order
2089 // since they are the same as 'src', 'src_pos', 'dst'.
2090
2091 assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
2092
2093 // The possible values of elsize are 0-3, i.e. exact_log2(element
2094 // size in bytes). We do a simple bitwise binary search.
2095 __ BIND(L_copy_bytes);
2096 __ tbnz(r18_elsize, 1, L_copy_ints);
2097 __ tbnz(r18_elsize, 0, L_copy_shorts);
2098 __ lea(from, Address(src, src_pos));// src_addr
2099 __ lea(to, Address(dst, dst_pos));// dst_addr
2100 __ movw(count, scratch_length); // length
2101 __ b(RuntimeAddress(byte_copy_entry));
2102
2103 __ BIND(L_copy_shorts);
2104 __ lea(from, Address(src, src_pos, Address::lsl(1)));// src_addr
2105 __ lea(to, Address(dst, dst_pos, Address::lsl(1)));// dst_addr
2106 __ movw(count, scratch_length); // length
2107 __ b(RuntimeAddress(short_copy_entry));
2108
2109 __ BIND(L_copy_ints);
2110 __ tbnz(r18_elsize, 0, L_copy_longs);
2111 __ lea(from, Address(src, src_pos, Address::lsl(2)));// src_addr
2112 __ lea(to, Address(dst, dst_pos, Address::lsl(2)));// dst_addr
2113 __ movw(count, scratch_length); // length
2114 __ b(RuntimeAddress(int_copy_entry));
2115
2116 __ BIND(L_copy_longs);
2117 #ifdef ASSERT
2118 {
2119 BLOCK_COMMENT("assert long copy {");
2120 Label L;
2121 __ andw(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> r18_elsize
2122 __ cmpw(r18_elsize, LogBytesPerLong);
2123 __ br(Assembler::EQ, L);
2124 __ stop("must be long copy, but elsize is wrong");
2125 __ bind(L);
2126 BLOCK_COMMENT("} assert long copy done");
2127 }
2128 #endif
2129 __ lea(from, Address(src, src_pos, Address::lsl(3)));// src_addr
2130 __ lea(to, Address(dst, dst_pos, Address::lsl(3)));// dst_addr
2131 __ movw(count, scratch_length); // length
2132 __ b(RuntimeAddress(long_copy_entry));
2133
2134 // ObjArrayKlass
2135 __ BIND(L_objArray);
2136 // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos]
2137
2138 Label L_plain_copy, L_checkcast_copy;
2139 // test array classes for subtyping
2140 __ load_klass(r18, dst);
2141 __ cmp(scratch_src_klass, r18); // usual case is exact equality
2142 __ br(Assembler::NE, L_checkcast_copy);
2143
2144 // Identically typed arrays can be copied without element-wise checks.
2145 arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2146 rscratch2, L_failed);
2147
2148 __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
2149 __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2150 __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop)));
2151 __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2152 __ movw(count, scratch_length); // length
2153 __ BIND(L_plain_copy);
2154 __ b(RuntimeAddress(oop_copy_entry));
2155
2156 __ BIND(L_checkcast_copy);
2157 // live at this point: scratch_src_klass, scratch_length, r18 (dst_klass)
2158 {
2159 // Before looking at dst.length, make sure dst is also an objArray.
2160 __ ldrw(rscratch1, Address(r18, lh_offset));
2161 __ movw(rscratch2, objArray_lh);
2162 __ eorw(rscratch1, rscratch1, rscratch2);
2163 __ cbnzw(rscratch1, L_failed);
2164
2165 // It is safe to examine both src.length and dst.length.
2166 arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2167 r18, L_failed);
2168
2169 __ load_klass(dst_klass, dst); // reload
2170
2171 // Marshal the base address arguments now, freeing registers.
2172 __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
2173 __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2174 __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop)));
2175 __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2176 __ movw(count, length); // length (reloaded)
2177 Register sco_temp = c_rarg3; // this register is free now
2178 assert_different_registers(from, to, count, sco_temp,
2179 dst_klass, scratch_src_klass);
2180 // assert_clean_int(count, sco_temp);
2181
2182 // Generate the type check.
2183 const int sco_offset = in_bytes(Klass::super_check_offset_offset());
2184 __ ldrw(sco_temp, Address(dst_klass, sco_offset));
2185
2186 // Smashes rscratch1, rscratch2
2187 generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);
5038 frame_complete,
5039 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
5040 oop_maps, false);
5041 return stub->entry_point();
5042 }
5043
5044 class MontgomeryMultiplyGenerator : public MacroAssembler {
5045
5046 Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
5047 Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
5048
5049 RegSet _toSave;
5050 bool _squaring;
5051
5052 public:
5053 MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
5054 : MacroAssembler(as->code()), _squaring(squaring) {
5055
5056 // Register allocation
5057
5058 Register reg = c_rarg0;
5059 Pa_base = reg; // Argument registers
5060 if (squaring)
5061 Pb_base = Pa_base;
5062 else
5063 Pb_base = ++reg;
5064 Pn_base = ++reg;
5065 Rlen= ++reg;
5066 inv = ++reg;
5067 Pm_base = ++reg;
5068
5069 // Working registers:
5070 Ra = ++reg; // The current digit of a, b, n, and m.
5071 Rb = ++reg;
5072 Rm = ++reg;
5073 Rn = ++reg;
5074
5075 Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m.
5076 Pb = ++reg;
5077 Pm = ++reg;
5078 Pn = ++reg;
5079
5080 t0 = ++reg; // Three registers which form a
5081 t1 = ++reg; // triple-precision accumuator.
5082 t2 = ++reg;
5083
5084 Ri = ++reg; // Inner and outer loop indexes.
5085 Rj = ++reg;
5086
5087 Rhi_ab = ++reg; // Product registers: low and high parts
5088 Rlo_ab = ++reg; // of a*b and m*n.
5089 Rhi_mn = ++reg;
5090 Rlo_mn = ++reg;
5091
5092 // r19 and up are callee-saved.
5093 _toSave = RegSet::range(r19, reg) + Pm_base;
5094 }
5095
5096 private:
5097 void save_regs() {
5098 push(_toSave, sp);
5099 }
5100
5101 void restore_regs() {
5102 pop(_toSave, sp);
5103 }
5104
5105 template <typename T>
5106 void unroll_2(Register count, T block) {
5107 Label loop, end, odd;
5108 tbnz(count, 0, odd);
5109 cbz(count, end);
5110 align(16);
5111 bind(loop);
5112 (this->*block)();
5113 bind(odd);
|
1070
1071 // All-singing all-dancing memory copy.
1072 //
1073 // Copy count units of memory from s to d. The size of a unit is
1074 // step, which can be positive or negative depending on the direction
1075 // of copy. If is_aligned is false, we align the source address.
1076 //
1077
1078 void copy_memory(bool is_aligned, Register s, Register d,
1079 Register count, Register tmp, int step) {
1080 copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
1081 bool is_backwards = step < 0;
1082 int granularity = uabs(step);
1083 const Register t0 = r3, t1 = r4;
1084
1085 // <= 96 bytes do inline. Direction doesn't matter because we always
1086 // load all the data before writing anything
1087 Label copy4, copy8, copy16, copy32, copy80, copy_big, finish;
1088 const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
1089 const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
1090 const Register send = r17, dend = r16;
1091
1092 if (PrefetchCopyIntervalInBytes > 0)
1093 __ prfm(Address(s, 0), PLDL1KEEP);
1094 __ cmp(count, u1((UseSIMDForMemoryOps ? 96:80)/granularity));
1095 __ br(Assembler::HI, copy_big);
1096
1097 __ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
1098 __ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
1099
1100 __ cmp(count, u1(16/granularity));
1101 __ br(Assembler::LS, copy16);
1102
1103 __ cmp(count, u1(64/granularity));
1104 __ br(Assembler::HI, copy80);
1105
1106 __ cmp(count, u1(32/granularity));
1107 __ br(Assembler::LS, copy32);
1108
1109 // 33..64 bytes
1110 if (UseSIMDForMemoryOps) {
1260
1261 // We have a count of units and some trailing bytes. Adjust the
1262 // count and do a bulk copy of words.
1263 __ lsr(rscratch2, count, exact_log2(wordSize/granularity));
1264 if (direction == copy_forwards)
1265 __ bl(copy_f);
1266 else
1267 __ bl(copy_b);
1268
1269 // And the tail.
1270 copy_memory_small(s, d, count, tmp, step);
1271
1272 if (granularity >= 8) __ bind(copy8);
1273 if (granularity >= 4) __ bind(copy4);
1274 __ bind(finish);
1275 }
1276
1277
1278 void clobber_registers() {
1279 #ifdef ASSERT
1280 RegSet clobbered
1281 = MacroAssembler::call_clobbered_registers() - rscratch1;
1282 __ mov(rscratch1, (uint64_t)0xdeadbeef);
1283 __ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32);
1284 for (RegSetIterator it = clobbered.begin(); *it != noreg; ++it) {
1285 __ mov(*it, rscratch1);
1286 }
1287 #endif
1288
1289 }
1290
1291 // Scan over array at a for count oops, verifying each one.
1292 // Preserves a and count, clobbers rscratch1 and rscratch2.
1293 void verify_oop_array (size_t size, Register a, Register count, Register temp) {
1294 Label loop, end;
1295 __ mov(rscratch1, a);
1296 __ mov(rscratch2, zr);
1297 __ bind(loop);
1298 __ cmp(rscratch2, count);
1299 __ br(Assembler::HS, end);
1300 if (size == (size_t)wordSize) {
1301 __ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size))));
1302 __ verify_oop(temp);
1303 } else {
1304 __ ldrw(r16, Address(a, rscratch2, Address::lsl(exact_log2(size))));
1305 __ decode_heap_oop(temp); // calls verify_oop
1306 }
1307 __ add(rscratch2, rscratch2, size);
1308 __ b(loop);
1701 //
1702 // Output:
1703 // r0 == 0 - success
1704 // r0 == -1^K - failure, where K is partial transfer count
1705 //
1706 address generate_checkcast_copy(const char *name, address *entry,
1707 bool dest_uninitialized = false) {
1708
1709 Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
1710
1711 // Input registers (after setup_arg_regs)
1712 const Register from = c_rarg0; // source array address
1713 const Register to = c_rarg1; // destination array address
1714 const Register count = c_rarg2; // elementscount
1715 const Register ckoff = c_rarg3; // super_check_offset
1716 const Register ckval = c_rarg4; // super_klass
1717
1718 RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4);
1719 RegSet wb_post_saved_regs = RegSet::of(count);
1720
1721 // Registers used as temps (r19, r20, r21, r22 are save-on-entry)
1722 const Register copied_oop = r22; // actual oop copied
1723 const Register count_save = r21; // orig elementscount
1724 const Register start_to = r20; // destination array start address
1725 const Register r19_klass = r19; // oop._klass
1726
1727 //---------------------------------------------------------------
1728 // Assembler stub will be used for this call to arraycopy
1729 // if the two arrays are subtypes of Object[] but the
1730 // destination array type is not equal to or a supertype
1731 // of the source type. Each element must be separately
1732 // checked.
1733
1734 assert_different_registers(from, to, count, ckoff, ckval, start_to,
1735 copied_oop, r19_klass, count_save);
1736
1737 __ align(CodeEntryAlignment);
1738 StubCodeMark mark(this, "StubRoutines", name);
1739 address start = __ pc();
1740
1741 __ enter(); // required for proper stackwalking of RuntimeStub frame
1742
1743 #ifdef ASSERT
1744 // caller guarantees that the arrays really are different
1745 // otherwise, we would have to make conjoint checks
1746 { Label L;
1747 array_overlap_test(L, TIMES_OOP);
1748 __ stop("checkcast_copy within a single array");
1749 __ bind(L);
1750 }
1751 #endif //ASSERT
1752
1753 // Caller of this entry point must set up the argument registers.
1754 if (entry != NULL) {
1755 *entry = __ pc();
1756 BLOCK_COMMENT("Entry:");
1757 }
1758
1759 // Empty array: Nothing to do.
1760 __ cbz(count, L_done);
1761 __ push(RegSet::of(r19, r20, r21, r22), sp);
1762
1763 #ifdef ASSERT
1764 BLOCK_COMMENT("assert consistent ckoff/ckval");
1765 // The ckoff and ckval must be mutually consistent,
1766 // even though caller generates both.
1767 { Label L;
1768 int sco_offset = in_bytes(Klass::super_check_offset_offset());
1769 __ ldrw(start_to, Address(ckval, sco_offset));
1770 __ cmpw(ckoff, start_to);
1771 __ br(Assembler::EQ, L);
1772 __ stop("super_check_offset inconsistent");
1773 __ bind(L);
1774 }
1775 #endif //ASSERT
1776
1777 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
1778 bool is_oop = true;
1779 if (dest_uninitialized) {
1780 decorators |= IS_DEST_UNINITIALIZED;
1781 }
1810 __ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8), noreg, noreg, AS_RAW); // load the oop
1811 __ cbz(copied_oop, L_store_element);
1812
1813 __ load_klass(r19_klass, copied_oop);// query the object klass
1814 generate_type_check(r19_klass, ckoff, ckval, L_store_element);
1815 // ======== end loop ========
1816
1817 // It was a real error; we must depend on the caller to finish the job.
1818 // Register count = remaining oops, count_orig = total oops.
1819 // Emit GC store barriers for the oops we have copied and report
1820 // their number to the caller.
1821
1822 __ subs(count, count_save, count); // K = partially copied oop count
1823 __ eon(count, count, zr); // report (-1^K) to caller
1824 __ br(Assembler::EQ, L_done_pop);
1825
1826 __ BIND(L_do_card_marks);
1827 bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, rscratch1, wb_post_saved_regs);
1828
1829 __ bind(L_done_pop);
1830 __ pop(RegSet::of(r19, r20, r21, r22), sp);
1831 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
1832
1833 __ bind(L_done);
1834 __ mov(r0, count);
1835 __ leave();
1836 __ ret(lr);
1837
1838 return start;
1839 }
1840
1841 // Perform range checks on the proposed arraycopy.
1842 // Kills temp, but nothing else.
1843 // Also, clean the sign bits of src_pos and dst_pos.
1844 void arraycopy_range_checks(Register src, // source array oop (c_rarg0)
1845 Register src_pos, // source position (c_rarg1)
1846 Register dst, // destination array oo (c_rarg2)
1847 Register dst_pos, // destination position (c_rarg3)
1848 Register length,
1849 Register temp,
1850 Label& L_failed) {
1987 // (6) src and dst should be arrays.
1988 // (7) src_pos + length must not exceed length of src.
1989 // (8) dst_pos + length must not exceed length of dst.
1990 //
1991
1992 // if (src == NULL) return -1;
1993 __ cbz(src, L_failed);
1994
1995 // if (src_pos < 0) return -1;
1996 __ tbnz(src_pos, 31, L_failed); // i.e. sign bit set
1997
1998 // if (dst == NULL) return -1;
1999 __ cbz(dst, L_failed);
2000
2001 // if (dst_pos < 0) return -1;
2002 __ tbnz(dst_pos, 31, L_failed); // i.e. sign bit set
2003
2004 // registers used as temp
2005 const Register scratch_length = r16; // elements count to copy
2006 const Register scratch_src_klass = r17; // array klass
2007 const Register lh = r15; // layout helper
2008
2009 // if (length < 0) return -1;
2010 __ movw(scratch_length, length); // length (elements count, 32-bits value)
2011 __ tbnz(scratch_length, 31, L_failed); // i.e. sign bit set
2012
2013 __ load_klass(scratch_src_klass, src);
2014 #ifdef ASSERT
2015 // assert(src->klass() != NULL);
2016 {
2017 BLOCK_COMMENT("assert klasses not null {");
2018 Label L1, L2;
2019 __ cbnz(scratch_src_klass, L2); // it is broken if klass is NULL
2020 __ bind(L1);
2021 __ stop("broken null klass");
2022 __ bind(L2);
2023 __ load_klass(rscratch1, dst);
2024 __ cbz(rscratch1, L1); // this would be broken also
2025 BLOCK_COMMENT("} assert klasses not null done");
2026 }
2027 #endif
2058 Label L;
2059 __ movw(rscratch2, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2060 __ cmpw(lh, rscratch2);
2061 __ br(Assembler::GE, L);
2062 __ stop("must be a primitive array");
2063 __ bind(L);
2064 BLOCK_COMMENT("} assert primitive array done");
2065 }
2066 #endif
2067
2068 arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2069 rscratch2, L_failed);
2070
2071 // TypeArrayKlass
2072 //
2073 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
2074 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
2075 //
2076
2077 const Register rscratch1_offset = rscratch1; // array offset
2078 const Register r15_elsize = lh; // element size
2079
2080 __ ubfx(rscratch1_offset, lh, Klass::_lh_header_size_shift,
2081 exact_log2(Klass::_lh_header_size_mask+1)); // array_offset
2082 __ add(src, src, rscratch1_offset); // src array offset
2083 __ add(dst, dst, rscratch1_offset); // dst array offset
2084 BLOCK_COMMENT("choose copy loop based on element size");
2085
2086 // next registers should be set before the jump to corresponding stub
2087 const Register from = c_rarg0; // source array address
2088 const Register to = c_rarg1; // destination array address
2089 const Register count = c_rarg2; // elements count
2090
2091 // 'from', 'to', 'count' registers should be set in such order
2092 // since they are the same as 'src', 'src_pos', 'dst'.
2093
2094 assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
2095
2096 // The possible values of elsize are 0-3, i.e. exact_log2(element
2097 // size in bytes). We do a simple bitwise binary search.
2098 __ BIND(L_copy_bytes);
2099 __ tbnz(r15_elsize, 1, L_copy_ints);
2100 __ tbnz(r15_elsize, 0, L_copy_shorts);
2101 __ lea(from, Address(src, src_pos));// src_addr
2102 __ lea(to, Address(dst, dst_pos));// dst_addr
2103 __ movw(count, scratch_length); // length
2104 __ b(RuntimeAddress(byte_copy_entry));
2105
2106 __ BIND(L_copy_shorts);
2107 __ lea(from, Address(src, src_pos, Address::lsl(1)));// src_addr
2108 __ lea(to, Address(dst, dst_pos, Address::lsl(1)));// dst_addr
2109 __ movw(count, scratch_length); // length
2110 __ b(RuntimeAddress(short_copy_entry));
2111
2112 __ BIND(L_copy_ints);
2113 __ tbnz(r15_elsize, 0, L_copy_longs);
2114 __ lea(from, Address(src, src_pos, Address::lsl(2)));// src_addr
2115 __ lea(to, Address(dst, dst_pos, Address::lsl(2)));// dst_addr
2116 __ movw(count, scratch_length); // length
2117 __ b(RuntimeAddress(int_copy_entry));
2118
2119 __ BIND(L_copy_longs);
2120 #ifdef ASSERT
2121 {
2122 BLOCK_COMMENT("assert long copy {");
2123 Label L;
2124 __ andw(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> r15_elsize
2125 __ cmpw(r15_elsize, LogBytesPerLong);
2126 __ br(Assembler::EQ, L);
2127 __ stop("must be long copy, but elsize is wrong");
2128 __ bind(L);
2129 BLOCK_COMMENT("} assert long copy done");
2130 }
2131 #endif
2132 __ lea(from, Address(src, src_pos, Address::lsl(3)));// src_addr
2133 __ lea(to, Address(dst, dst_pos, Address::lsl(3)));// dst_addr
2134 __ movw(count, scratch_length); // length
2135 __ b(RuntimeAddress(long_copy_entry));
2136
2137 // ObjArrayKlass
2138 __ BIND(L_objArray);
2139 // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos]
2140
2141 Label L_plain_copy, L_checkcast_copy;
2142 // test array classes for subtyping
2143 __ load_klass(r15, dst);
2144 __ cmp(scratch_src_klass, r15); // usual case is exact equality
2145 __ br(Assembler::NE, L_checkcast_copy);
2146
2147 // Identically typed arrays can be copied without element-wise checks.
2148 arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2149 rscratch2, L_failed);
2150
2151 __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
2152 __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2153 __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop)));
2154 __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2155 __ movw(count, scratch_length); // length
2156 __ BIND(L_plain_copy);
2157 __ b(RuntimeAddress(oop_copy_entry));
2158
2159 __ BIND(L_checkcast_copy);
2160 // live at this point: scratch_src_klass, scratch_length, r15 (dst_klass)
2161 {
2162 // Before looking at dst.length, make sure dst is also an objArray.
2163 __ ldrw(rscratch1, Address(r15, lh_offset));
2164 __ movw(rscratch2, objArray_lh);
2165 __ eorw(rscratch1, rscratch1, rscratch2);
2166 __ cbnzw(rscratch1, L_failed);
2167
2168 // It is safe to examine both src.length and dst.length.
2169 arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2170 r15, L_failed);
2171
2172 __ load_klass(dst_klass, dst); // reload
2173
2174 // Marshal the base address arguments now, freeing registers.
2175 __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
2176 __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2177 __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop)));
2178 __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2179 __ movw(count, length); // length (reloaded)
2180 Register sco_temp = c_rarg3; // this register is free now
2181 assert_different_registers(from, to, count, sco_temp,
2182 dst_klass, scratch_src_klass);
2183 // assert_clean_int(count, sco_temp);
2184
2185 // Generate the type check.
2186 const int sco_offset = in_bytes(Klass::super_check_offset_offset());
2187 __ ldrw(sco_temp, Address(dst_klass, sco_offset));
2188
2189 // Smashes rscratch1, rscratch2
2190 generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);
5041 frame_complete,
5042 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
5043 oop_maps, false);
5044 return stub->entry_point();
5045 }
5046
5047 class MontgomeryMultiplyGenerator : public MacroAssembler {
5048
5049 Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
5050 Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
5051
5052 RegSet _toSave;
5053 bool _squaring;
5054
5055 public:
5056 MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
5057 : MacroAssembler(as->code()), _squaring(squaring) {
5058
5059 // Register allocation
5060
5061 RegSetIterator regs = (RegSet::range(r0, r26) - r18_tls).begin();
5062 Pa_base = *regs; // Argument registers
5063 if (squaring)
5064 Pb_base = Pa_base;
5065 else
5066 Pb_base = *++regs;
5067 Pn_base = *++regs;
5068 Rlen= *++regs;
5069 inv = *++regs;
5070 Pm_base = *++regs;
5071
5072 // Working registers:
5073 Ra = *++regs; // The current digit of a, b, n, and m.
5074 Rb = *++regs;
5075 Rm = *++regs;
5076 Rn = *++regs;
5077
5078 Pa = *++regs; // Pointers to the current/next digit of a, b, n, and m.
5079 Pb = *++regs;
5080 Pm = *++regs;
5081 Pn = *++regs;
5082
5083 t0 = *++regs; // Three registers which form a
5084 t1 = *++regs; // triple-precision accumuator.
5085 t2 = *++regs;
5086
5087 Ri = *++regs; // Inner and outer loop indexes.
5088 Rj = *++regs;
5089
5090 Rhi_ab = *++regs; // Product registers: low and high parts
5091 Rlo_ab = *++regs; // of a*b and m*n.
5092 Rhi_mn = *++regs;
5093 Rlo_mn = *++regs;
5094
5095 // r19 and up are callee-saved.
5096 _toSave = RegSet::range(r19, *regs) + Pm_base;
5097 }
5098
5099 private:
5100 void save_regs() {
5101 push(_toSave, sp);
5102 }
5103
5104 void restore_regs() {
5105 pop(_toSave, sp);
5106 }
5107
5108 template <typename T>
5109 void unroll_2(Register count, T block) {
5110 Label loop, end, odd;
5111 tbnz(count, 0, odd);
5112 cbz(count, end);
5113 align(16);
5114 bind(loop);
5115 (this->*block)();
5116 bind(odd);
|