< prev index next >

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

Print this page
rev 60623 : 8248500: AArch64: Remove the r18 dependency on Windows AArch64
Reviewed-by:
Contributed-by: mbeckwit, luhenry, burban


1070 
1071   // All-singing all-dancing memory copy.
1072   //
1073   // Copy count units of memory from s to d.  The size of a unit is
1074   // step, which can be positive or negative depending on the direction
1075   // of copy.  If is_aligned is false, we align the source address.
1076   //
1077 
1078   void copy_memory(bool is_aligned, Register s, Register d,
1079                    Register count, Register tmp, int step) {
1080     copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
1081     bool is_backwards = step < 0;
1082     int granularity = uabs(step);
1083     const Register t0 = r3, t1 = r4;
1084 
1085     // <= 96 bytes do inline. Direction doesn't matter because we always
1086     // load all the data before writing anything
1087     Label copy4, copy8, copy16, copy32, copy80, copy_big, finish;
1088     const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
1089     const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
1090     const Register send = r17, dend = r18;
1091 
1092     if (PrefetchCopyIntervalInBytes > 0)
1093       __ prfm(Address(s, 0), PLDL1KEEP);
1094     __ cmp(count, u1((UseSIMDForMemoryOps ? 96:80)/granularity));
1095     __ br(Assembler::HI, copy_big);
1096 
1097     __ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
1098     __ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
1099 
1100     __ cmp(count, u1(16/granularity));
1101     __ br(Assembler::LS, copy16);
1102 
1103     __ cmp(count, u1(64/granularity));
1104     __ br(Assembler::HI, copy80);
1105 
1106     __ cmp(count, u1(32/granularity));
1107     __ br(Assembler::LS, copy32);
1108 
1109     // 33..64 bytes
1110     if (UseSIMDForMemoryOps) {


1260 
1261     // We have a count of units and some trailing bytes.  Adjust the
1262     // count and do a bulk copy of words.
1263     __ lsr(rscratch2, count, exact_log2(wordSize/granularity));
1264     if (direction == copy_forwards)
1265       __ bl(copy_f);
1266     else
1267       __ bl(copy_b);
1268 
1269     // And the tail.
1270     copy_memory_small(s, d, count, tmp, step);
1271 
1272     if (granularity >= 8) __ bind(copy8);
1273     if (granularity >= 4) __ bind(copy4);
1274     __ bind(finish);
1275   }
1276 
1277 
1278   void clobber_registers() {
1279 #ifdef ASSERT


1280     __ mov(rscratch1, (uint64_t)0xdeadbeef);
1281     __ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32);
1282     for (Register r = r3; r <= r18; r++)
1283       if (r != rscratch1) __ mov(r, rscratch1);

1284 #endif

1285   }
1286 
1287   // Scan over array at a for count oops, verifying each one.
1288   // Preserves a and count, clobbers rscratch1 and rscratch2.
1289   void verify_oop_array (size_t size, Register a, Register count, Register temp) {
1290     Label loop, end;
1291     __ mov(rscratch1, a);
1292     __ mov(rscratch2, zr);
1293     __ bind(loop);
1294     __ cmp(rscratch2, count);
1295     __ br(Assembler::HS, end);
1296     if (size == (size_t)wordSize) {
1297       __ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size))));
1298       __ verify_oop(temp);
1299     } else {
1300       __ ldrw(r16, Address(a, rscratch2, Address::lsl(exact_log2(size))));
1301       __ decode_heap_oop(temp); // calls verify_oop
1302     }
1303     __ add(rscratch2, rscratch2, size);
1304     __ b(loop);


1697   //
1698   //  Output:
1699   //    r0 ==  0  -  success
1700   //    r0 == -1^K - failure, where K is partial transfer count
1701   //
1702   address generate_checkcast_copy(const char *name, address *entry,
1703                                   bool dest_uninitialized = false) {
1704 
1705     Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
1706 
1707     // Input registers (after setup_arg_regs)
1708     const Register from        = c_rarg0;   // source array address
1709     const Register to          = c_rarg1;   // destination array address
1710     const Register count       = c_rarg2;   // elementscount
1711     const Register ckoff       = c_rarg3;   // super_check_offset
1712     const Register ckval       = c_rarg4;   // super_klass
1713 
1714     RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4);
1715     RegSet wb_post_saved_regs = RegSet::of(count);
1716 
1717     // Registers used as temps (r18, r19, r20 are save-on-entry)

1718     const Register count_save  = r21;       // orig elementscount
1719     const Register start_to    = r20;       // destination array start address
1720     const Register copied_oop  = r18;       // actual oop copied
1721     const Register r19_klass   = r19;       // oop._klass
1722 
1723     //---------------------------------------------------------------
1724     // Assembler stub will be used for this call to arraycopy
1725     // if the two arrays are subtypes of Object[] but the
1726     // destination array type is not equal to or a supertype
1727     // of the source type.  Each element must be separately
1728     // checked.
1729 
1730     assert_different_registers(from, to, count, ckoff, ckval, start_to,
1731                                copied_oop, r19_klass, count_save);
1732 
1733     __ align(CodeEntryAlignment);
1734     StubCodeMark mark(this, "StubRoutines", name);
1735     address start = __ pc();
1736 
1737     __ enter(); // required for proper stackwalking of RuntimeStub frame
1738 
1739 #ifdef ASSERT
1740     // caller guarantees that the arrays really are different
1741     // otherwise, we would have to make conjoint checks
1742     { Label L;
1743       array_overlap_test(L, TIMES_OOP);
1744       __ stop("checkcast_copy within a single array");
1745       __ bind(L);
1746     }
1747 #endif //ASSERT
1748 
1749     // Caller of this entry point must set up the argument registers.
1750     if (entry != NULL) {
1751       *entry = __ pc();
1752       BLOCK_COMMENT("Entry:");
1753     }
1754 
1755      // Empty array:  Nothing to do.
1756     __ cbz(count, L_done);
1757 
1758     __ push(RegSet::of(r18, r19, r20, r21), sp);
1759 
1760 #ifdef ASSERT
1761     BLOCK_COMMENT("assert consistent ckoff/ckval");
1762     // The ckoff and ckval must be mutually consistent,
1763     // even though caller generates both.
1764     { Label L;
1765       int sco_offset = in_bytes(Klass::super_check_offset_offset());
1766       __ ldrw(start_to, Address(ckval, sco_offset));
1767       __ cmpw(ckoff, start_to);
1768       __ br(Assembler::EQ, L);
1769       __ stop("super_check_offset inconsistent");
1770       __ bind(L);
1771     }
1772 #endif //ASSERT
1773 
1774     DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
1775     bool is_oop = true;
1776     if (dest_uninitialized) {
1777       decorators |= IS_DEST_UNINITIALIZED;
1778     }


1807     __ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8), noreg, noreg, AS_RAW); // load the oop
1808     __ cbz(copied_oop, L_store_element);
1809 
1810     __ load_klass(r19_klass, copied_oop);// query the object klass
1811     generate_type_check(r19_klass, ckoff, ckval, L_store_element);
1812     // ======== end loop ========
1813 
1814     // It was a real error; we must depend on the caller to finish the job.
1815     // Register count = remaining oops, count_orig = total oops.
1816     // Emit GC store barriers for the oops we have copied and report
1817     // their number to the caller.
1818 
1819     __ subs(count, count_save, count);     // K = partially copied oop count
1820     __ eon(count, count, zr);                   // report (-1^K) to caller
1821     __ br(Assembler::EQ, L_done_pop);
1822 
1823     __ BIND(L_do_card_marks);
1824     bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, rscratch1, wb_post_saved_regs);
1825 
1826     __ bind(L_done_pop);
1827     __ pop(RegSet::of(r18, r19, r20, r21), sp);
1828     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
1829 
1830     __ bind(L_done);
1831     __ mov(r0, count);
1832     __ leave();
1833     __ ret(lr);
1834 
1835     return start;
1836   }
1837 
1838   // Perform range checks on the proposed arraycopy.
1839   // Kills temp, but nothing else.
1840   // Also, clean the sign bits of src_pos and dst_pos.
1841   void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
1842                               Register src_pos, // source position (c_rarg1)
1843                               Register dst,     // destination array oo (c_rarg2)
1844                               Register dst_pos, // destination position (c_rarg3)
1845                               Register length,
1846                               Register temp,
1847                               Label& L_failed) {


1984     // (6) src and dst should be arrays.
1985     // (7) src_pos + length must not exceed length of src.
1986     // (8) dst_pos + length must not exceed length of dst.
1987     //
1988 
1989     //  if (src == NULL) return -1;
1990     __ cbz(src, L_failed);
1991 
1992     //  if (src_pos < 0) return -1;
1993     __ tbnz(src_pos, 31, L_failed);  // i.e. sign bit set
1994 
1995     //  if (dst == NULL) return -1;
1996     __ cbz(dst, L_failed);
1997 
1998     //  if (dst_pos < 0) return -1;
1999     __ tbnz(dst_pos, 31, L_failed);  // i.e. sign bit set
2000 
2001     // registers used as temp
2002     const Register scratch_length    = r16; // elements count to copy
2003     const Register scratch_src_klass = r17; // array klass
2004     const Register lh                = r18; // layout helper
2005 
2006     //  if (length < 0) return -1;
2007     __ movw(scratch_length, length);        // length (elements count, 32-bits value)
2008     __ tbnz(scratch_length, 31, L_failed);  // i.e. sign bit set
2009 
2010     __ load_klass(scratch_src_klass, src);
2011 #ifdef ASSERT
2012     //  assert(src->klass() != NULL);
2013     {
2014       BLOCK_COMMENT("assert klasses not null {");
2015       Label L1, L2;
2016       __ cbnz(scratch_src_klass, L2);   // it is broken if klass is NULL
2017       __ bind(L1);
2018       __ stop("broken null klass");
2019       __ bind(L2);
2020       __ load_klass(rscratch1, dst);
2021       __ cbz(rscratch1, L1);     // this would be broken also
2022       BLOCK_COMMENT("} assert klasses not null done");
2023     }
2024 #endif


2055       Label L;
2056       __ movw(rscratch2, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2057       __ cmpw(lh, rscratch2);
2058       __ br(Assembler::GE, L);
2059       __ stop("must be a primitive array");
2060       __ bind(L);
2061       BLOCK_COMMENT("} assert primitive array done");
2062     }
2063 #endif
2064 
2065     arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2066                            rscratch2, L_failed);
2067 
2068     // TypeArrayKlass
2069     //
2070     // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
2071     // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
2072     //
2073 
2074     const Register rscratch1_offset = rscratch1;    // array offset
2075     const Register r18_elsize = lh; // element size
2076 
2077     __ ubfx(rscratch1_offset, lh, Klass::_lh_header_size_shift,
2078            exact_log2(Klass::_lh_header_size_mask+1));   // array_offset
2079     __ add(src, src, rscratch1_offset);           // src array offset
2080     __ add(dst, dst, rscratch1_offset);           // dst array offset
2081     BLOCK_COMMENT("choose copy loop based on element size");
2082 
2083     // next registers should be set before the jump to corresponding stub
2084     const Register from     = c_rarg0;  // source array address
2085     const Register to       = c_rarg1;  // destination array address
2086     const Register count    = c_rarg2;  // elements count
2087 
2088     // 'from', 'to', 'count' registers should be set in such order
2089     // since they are the same as 'src', 'src_pos', 'dst'.
2090 
2091     assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
2092 
2093     // The possible values of elsize are 0-3, i.e. exact_log2(element
2094     // size in bytes).  We do a simple bitwise binary search.
2095   __ BIND(L_copy_bytes);
2096     __ tbnz(r18_elsize, 1, L_copy_ints);
2097     __ tbnz(r18_elsize, 0, L_copy_shorts);
2098     __ lea(from, Address(src, src_pos));// src_addr
2099     __ lea(to,   Address(dst, dst_pos));// dst_addr
2100     __ movw(count, scratch_length); // length
2101     __ b(RuntimeAddress(byte_copy_entry));
2102 
2103   __ BIND(L_copy_shorts);
2104     __ lea(from, Address(src, src_pos, Address::lsl(1)));// src_addr
2105     __ lea(to,   Address(dst, dst_pos, Address::lsl(1)));// dst_addr
2106     __ movw(count, scratch_length); // length
2107     __ b(RuntimeAddress(short_copy_entry));
2108 
2109   __ BIND(L_copy_ints);
2110     __ tbnz(r18_elsize, 0, L_copy_longs);
2111     __ lea(from, Address(src, src_pos, Address::lsl(2)));// src_addr
2112     __ lea(to,   Address(dst, dst_pos, Address::lsl(2)));// dst_addr
2113     __ movw(count, scratch_length); // length
2114     __ b(RuntimeAddress(int_copy_entry));
2115 
2116   __ BIND(L_copy_longs);
2117 #ifdef ASSERT
2118     {
2119       BLOCK_COMMENT("assert long copy {");
2120       Label L;
2121       __ andw(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> r18_elsize
2122       __ cmpw(r18_elsize, LogBytesPerLong);
2123       __ br(Assembler::EQ, L);
2124       __ stop("must be long copy, but elsize is wrong");
2125       __ bind(L);
2126       BLOCK_COMMENT("} assert long copy done");
2127     }
2128 #endif
2129     __ lea(from, Address(src, src_pos, Address::lsl(3)));// src_addr
2130     __ lea(to,   Address(dst, dst_pos, Address::lsl(3)));// dst_addr
2131     __ movw(count, scratch_length); // length
2132     __ b(RuntimeAddress(long_copy_entry));
2133 
2134     // ObjArrayKlass
2135   __ BIND(L_objArray);
2136     // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
2137 
2138     Label L_plain_copy, L_checkcast_copy;
2139     //  test array classes for subtyping
2140     __ load_klass(r18, dst);
2141     __ cmp(scratch_src_klass, r18); // usual case is exact equality
2142     __ br(Assembler::NE, L_checkcast_copy);
2143 
2144     // Identically typed arrays can be copied without element-wise checks.
2145     arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2146                            rscratch2, L_failed);
2147 
2148     __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
2149     __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2150     __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop)));
2151     __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2152     __ movw(count, scratch_length); // length
2153   __ BIND(L_plain_copy);
2154     __ b(RuntimeAddress(oop_copy_entry));
2155 
2156   __ BIND(L_checkcast_copy);
2157     // live at this point:  scratch_src_klass, scratch_length, r18 (dst_klass)
2158     {
2159       // Before looking at dst.length, make sure dst is also an objArray.
2160       __ ldrw(rscratch1, Address(r18, lh_offset));
2161       __ movw(rscratch2, objArray_lh);
2162       __ eorw(rscratch1, rscratch1, rscratch2);
2163       __ cbnzw(rscratch1, L_failed);
2164 
2165       // It is safe to examine both src.length and dst.length.
2166       arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2167                              r18, L_failed);
2168 
2169       __ load_klass(dst_klass, dst); // reload
2170 
2171       // Marshal the base address arguments now, freeing registers.
2172       __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
2173       __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2174       __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop)));
2175       __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2176       __ movw(count, length);           // length (reloaded)
2177       Register sco_temp = c_rarg3;      // this register is free now
2178       assert_different_registers(from, to, count, sco_temp,
2179                                  dst_klass, scratch_src_klass);
2180       // assert_clean_int(count, sco_temp);
2181 
2182       // Generate the type check.
2183       const int sco_offset = in_bytes(Klass::super_check_offset_offset());
2184       __ ldrw(sco_temp, Address(dst_klass, sco_offset));
2185 
2186       // Smashes rscratch1, rscratch2
2187       generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);


5038                                     frame_complete,
5039                                     (framesize >> (LogBytesPerWord - LogBytesPerInt)),
5040                                     oop_maps, false);
5041     return stub->entry_point();
5042   }
5043 
5044   class MontgomeryMultiplyGenerator : public MacroAssembler {
5045 
5046     Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
5047       Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
5048 
5049     RegSet _toSave;
5050     bool _squaring;
5051 
5052   public:
5053     MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
5054       : MacroAssembler(as->code()), _squaring(squaring) {
5055 
5056       // Register allocation
5057 
5058       Register reg = c_rarg0;
5059       Pa_base = reg;       // Argument registers
5060       if (squaring)
5061         Pb_base = Pa_base;
5062       else
5063         Pb_base = ++reg;
5064       Pn_base = ++reg;
5065       Rlen= ++reg;
5066       inv = ++reg;
5067       Pm_base = ++reg;
5068 
5069                           // Working registers:
5070       Ra =  ++reg;        // The current digit of a, b, n, and m.
5071       Rb =  ++reg;
5072       Rm =  ++reg;
5073       Rn =  ++reg;
5074 
5075       Pa =  ++reg;        // Pointers to the current/next digit of a, b, n, and m.
5076       Pb =  ++reg;
5077       Pm =  ++reg;
5078       Pn =  ++reg;
5079 
5080       t0 =  ++reg;        // Three registers which form a
5081       t1 =  ++reg;        // triple-precision accumuator.
5082       t2 =  ++reg;
5083 
5084       Ri =  ++reg;        // Inner and outer loop indexes.
5085       Rj =  ++reg;
5086 
5087       Rhi_ab = ++reg;     // Product registers: low and high parts
5088       Rlo_ab = ++reg;     // of a*b and m*n.
5089       Rhi_mn = ++reg;
5090       Rlo_mn = ++reg;
5091 
5092       // r19 and up are callee-saved.
5093       _toSave = RegSet::range(r19, reg) + Pm_base;
5094     }
5095 
5096   private:
5097     void save_regs() {
5098       push(_toSave, sp);
5099     }
5100 
5101     void restore_regs() {
5102       pop(_toSave, sp);
5103     }
5104 
5105     template <typename T>
5106     void unroll_2(Register count, T block) {
5107       Label loop, end, odd;
5108       tbnz(count, 0, odd);
5109       cbz(count, end);
5110       align(16);
5111       bind(loop);
5112       (this->*block)();
5113       bind(odd);




1070 
1071   // All-singing all-dancing memory copy.
1072   //
1073   // Copy count units of memory from s to d.  The size of a unit is
1074   // step, which can be positive or negative depending on the direction
1075   // of copy.  If is_aligned is false, we align the source address.
1076   //
1077 
1078   void copy_memory(bool is_aligned, Register s, Register d,
1079                    Register count, Register tmp, int step) {
1080     copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
1081     bool is_backwards = step < 0;
1082     int granularity = uabs(step);
1083     const Register t0 = r3, t1 = r4;
1084 
1085     // <= 96 bytes do inline. Direction doesn't matter because we always
1086     // load all the data before writing anything
1087     Label copy4, copy8, copy16, copy32, copy80, copy_big, finish;
1088     const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
1089     const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
1090     const Register send = r17, dend = r16;
1091 
1092     if (PrefetchCopyIntervalInBytes > 0)
1093       __ prfm(Address(s, 0), PLDL1KEEP);
1094     __ cmp(count, u1((UseSIMDForMemoryOps ? 96:80)/granularity));
1095     __ br(Assembler::HI, copy_big);
1096 
1097     __ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
1098     __ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
1099 
1100     __ cmp(count, u1(16/granularity));
1101     __ br(Assembler::LS, copy16);
1102 
1103     __ cmp(count, u1(64/granularity));
1104     __ br(Assembler::HI, copy80);
1105 
1106     __ cmp(count, u1(32/granularity));
1107     __ br(Assembler::LS, copy32);
1108 
1109     // 33..64 bytes
1110     if (UseSIMDForMemoryOps) {


1260 
1261     // We have a count of units and some trailing bytes.  Adjust the
1262     // count and do a bulk copy of words.
1263     __ lsr(rscratch2, count, exact_log2(wordSize/granularity));
1264     if (direction == copy_forwards)
1265       __ bl(copy_f);
1266     else
1267       __ bl(copy_b);
1268 
1269     // And the tail.
1270     copy_memory_small(s, d, count, tmp, step);
1271 
1272     if (granularity >= 8) __ bind(copy8);
1273     if (granularity >= 4) __ bind(copy4);
1274     __ bind(finish);
1275   }
1276 
1277 
1278   void clobber_registers() {
1279 #ifdef ASSERT
1280     RegSet clobbered
1281       = MacroAssembler::call_clobbered_registers() - rscratch1;
1282     __ mov(rscratch1, (uint64_t)0xdeadbeef);
1283     __ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32);
1284     for (RegSetIterator it = clobbered.begin(); *it != noreg; ++it) {
1285       __ mov(*it, rscratch1);
1286     }
1287 #endif
1288 
1289   }
1290 
1291   // Scan over array at a for count oops, verifying each one.
1292   // Preserves a and count, clobbers rscratch1 and rscratch2.
1293   void verify_oop_array (size_t size, Register a, Register count, Register temp) {
1294     Label loop, end;
1295     __ mov(rscratch1, a);
1296     __ mov(rscratch2, zr);
1297     __ bind(loop);
1298     __ cmp(rscratch2, count);
1299     __ br(Assembler::HS, end);
1300     if (size == (size_t)wordSize) {
1301       __ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size))));
1302       __ verify_oop(temp);
1303     } else {
1304       __ ldrw(r16, Address(a, rscratch2, Address::lsl(exact_log2(size))));
1305       __ decode_heap_oop(temp); // calls verify_oop
1306     }
1307     __ add(rscratch2, rscratch2, size);
1308     __ b(loop);


1701   //
1702   //  Output:
1703   //    r0 ==  0  -  success
1704   //    r0 == -1^K - failure, where K is partial transfer count
1705   //
1706   address generate_checkcast_copy(const char *name, address *entry,
1707                                   bool dest_uninitialized = false) {
1708 
1709     Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
1710 
1711     // Input registers (after setup_arg_regs)
1712     const Register from        = c_rarg0;   // source array address
1713     const Register to          = c_rarg1;   // destination array address
1714     const Register count       = c_rarg2;   // elementscount
1715     const Register ckoff       = c_rarg3;   // super_check_offset
1716     const Register ckval       = c_rarg4;   // super_klass
1717 
1718     RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4);
1719     RegSet wb_post_saved_regs = RegSet::of(count);
1720 
1721     // Registers used as temps (r19, r20, r21, r22 are save-on-entry)
1722     const Register copied_oop  = r22;       // actual oop copied
1723     const Register count_save  = r21;       // orig elementscount
1724     const Register start_to    = r20;       // destination array start address

1725     const Register r19_klass   = r19;       // oop._klass
1726 
1727     //---------------------------------------------------------------
1728     // Assembler stub will be used for this call to arraycopy
1729     // if the two arrays are subtypes of Object[] but the
1730     // destination array type is not equal to or a supertype
1731     // of the source type.  Each element must be separately
1732     // checked.
1733 
1734     assert_different_registers(from, to, count, ckoff, ckval, start_to,
1735                                copied_oop, r19_klass, count_save);
1736 
1737     __ align(CodeEntryAlignment);
1738     StubCodeMark mark(this, "StubRoutines", name);
1739     address start = __ pc();
1740 
1741     __ enter(); // required for proper stackwalking of RuntimeStub frame
1742 
1743 #ifdef ASSERT
1744     // caller guarantees that the arrays really are different
1745     // otherwise, we would have to make conjoint checks
1746     { Label L;
1747       array_overlap_test(L, TIMES_OOP);
1748       __ stop("checkcast_copy within a single array");
1749       __ bind(L);
1750     }
1751 #endif //ASSERT
1752 
1753     // Caller of this entry point must set up the argument registers.
1754     if (entry != NULL) {
1755       *entry = __ pc();
1756       BLOCK_COMMENT("Entry:");
1757     }
1758 
1759      // Empty array:  Nothing to do.
1760     __ cbz(count, L_done);
1761     __ push(RegSet::of(r19, r20, r21, r22), sp);

1762 
1763 #ifdef ASSERT
1764     BLOCK_COMMENT("assert consistent ckoff/ckval");
1765     // The ckoff and ckval must be mutually consistent,
1766     // even though caller generates both.
1767     { Label L;
1768       int sco_offset = in_bytes(Klass::super_check_offset_offset());
1769       __ ldrw(start_to, Address(ckval, sco_offset));
1770       __ cmpw(ckoff, start_to);
1771       __ br(Assembler::EQ, L);
1772       __ stop("super_check_offset inconsistent");
1773       __ bind(L);
1774     }
1775 #endif //ASSERT
1776 
1777     DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
1778     bool is_oop = true;
1779     if (dest_uninitialized) {
1780       decorators |= IS_DEST_UNINITIALIZED;
1781     }


1810     __ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8), noreg, noreg, AS_RAW); // load the oop
1811     __ cbz(copied_oop, L_store_element);
1812 
1813     __ load_klass(r19_klass, copied_oop);// query the object klass
1814     generate_type_check(r19_klass, ckoff, ckval, L_store_element);
1815     // ======== end loop ========
1816 
1817     // It was a real error; we must depend on the caller to finish the job.
1818     // Register count = remaining oops, count_orig = total oops.
1819     // Emit GC store barriers for the oops we have copied and report
1820     // their number to the caller.
1821 
1822     __ subs(count, count_save, count);     // K = partially copied oop count
1823     __ eon(count, count, zr);                   // report (-1^K) to caller
1824     __ br(Assembler::EQ, L_done_pop);
1825 
1826     __ BIND(L_do_card_marks);
1827     bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, rscratch1, wb_post_saved_regs);
1828 
1829     __ bind(L_done_pop);
1830     __ pop(RegSet::of(r19, r20, r21, r22), sp);
1831     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
1832 
1833     __ bind(L_done);
1834     __ mov(r0, count);
1835     __ leave();
1836     __ ret(lr);
1837 
1838     return start;
1839   }
1840 
1841   // Perform range checks on the proposed arraycopy.
1842   // Kills temp, but nothing else.
1843   // Also, clean the sign bits of src_pos and dst_pos.
1844   void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
1845                               Register src_pos, // source position (c_rarg1)
1846                               Register dst,     // destination array oo (c_rarg2)
1847                               Register dst_pos, // destination position (c_rarg3)
1848                               Register length,
1849                               Register temp,
1850                               Label& L_failed) {


1987     // (6) src and dst should be arrays.
1988     // (7) src_pos + length must not exceed length of src.
1989     // (8) dst_pos + length must not exceed length of dst.
1990     //
1991 
1992     //  if (src == NULL) return -1;
1993     __ cbz(src, L_failed);
1994 
1995     //  if (src_pos < 0) return -1;
1996     __ tbnz(src_pos, 31, L_failed);  // i.e. sign bit set
1997 
1998     //  if (dst == NULL) return -1;
1999     __ cbz(dst, L_failed);
2000 
2001     //  if (dst_pos < 0) return -1;
2002     __ tbnz(dst_pos, 31, L_failed);  // i.e. sign bit set
2003 
2004     // registers used as temp
2005     const Register scratch_length    = r16; // elements count to copy
2006     const Register scratch_src_klass = r17; // array klass
2007     const Register lh                = r15; // layout helper
2008 
2009     //  if (length < 0) return -1;
2010     __ movw(scratch_length, length);        // length (elements count, 32-bits value)
2011     __ tbnz(scratch_length, 31, L_failed);  // i.e. sign bit set
2012 
2013     __ load_klass(scratch_src_klass, src);
2014 #ifdef ASSERT
2015     //  assert(src->klass() != NULL);
2016     {
2017       BLOCK_COMMENT("assert klasses not null {");
2018       Label L1, L2;
2019       __ cbnz(scratch_src_klass, L2);   // it is broken if klass is NULL
2020       __ bind(L1);
2021       __ stop("broken null klass");
2022       __ bind(L2);
2023       __ load_klass(rscratch1, dst);
2024       __ cbz(rscratch1, L1);     // this would be broken also
2025       BLOCK_COMMENT("} assert klasses not null done");
2026     }
2027 #endif


2058       Label L;
2059       __ movw(rscratch2, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2060       __ cmpw(lh, rscratch2);
2061       __ br(Assembler::GE, L);
2062       __ stop("must be a primitive array");
2063       __ bind(L);
2064       BLOCK_COMMENT("} assert primitive array done");
2065     }
2066 #endif
2067 
2068     arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2069                            rscratch2, L_failed);
2070 
2071     // TypeArrayKlass
2072     //
2073     // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
2074     // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
2075     //
2076 
2077     const Register rscratch1_offset = rscratch1;    // array offset
2078     const Register r15_elsize = lh; // element size
2079 
2080     __ ubfx(rscratch1_offset, lh, Klass::_lh_header_size_shift,
2081            exact_log2(Klass::_lh_header_size_mask+1));   // array_offset
2082     __ add(src, src, rscratch1_offset);           // src array offset
2083     __ add(dst, dst, rscratch1_offset);           // dst array offset
2084     BLOCK_COMMENT("choose copy loop based on element size");
2085 
2086     // next registers should be set before the jump to corresponding stub
2087     const Register from     = c_rarg0;  // source array address
2088     const Register to       = c_rarg1;  // destination array address
2089     const Register count    = c_rarg2;  // elements count
2090 
2091     // 'from', 'to', 'count' registers should be set in such order
2092     // since they are the same as 'src', 'src_pos', 'dst'.
2093 
2094     assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
2095 
2096     // The possible values of elsize are 0-3, i.e. exact_log2(element
2097     // size in bytes).  We do a simple bitwise binary search.
2098   __ BIND(L_copy_bytes);
2099     __ tbnz(r15_elsize, 1, L_copy_ints);
2100     __ tbnz(r15_elsize, 0, L_copy_shorts);
2101     __ lea(from, Address(src, src_pos));// src_addr
2102     __ lea(to,   Address(dst, dst_pos));// dst_addr
2103     __ movw(count, scratch_length); // length
2104     __ b(RuntimeAddress(byte_copy_entry));
2105 
2106   __ BIND(L_copy_shorts);
2107     __ lea(from, Address(src, src_pos, Address::lsl(1)));// src_addr
2108     __ lea(to,   Address(dst, dst_pos, Address::lsl(1)));// dst_addr
2109     __ movw(count, scratch_length); // length
2110     __ b(RuntimeAddress(short_copy_entry));
2111 
2112   __ BIND(L_copy_ints);
2113     __ tbnz(r15_elsize, 0, L_copy_longs);
2114     __ lea(from, Address(src, src_pos, Address::lsl(2)));// src_addr
2115     __ lea(to,   Address(dst, dst_pos, Address::lsl(2)));// dst_addr
2116     __ movw(count, scratch_length); // length
2117     __ b(RuntimeAddress(int_copy_entry));
2118 
2119   __ BIND(L_copy_longs);
2120 #ifdef ASSERT
2121     {
2122       BLOCK_COMMENT("assert long copy {");
2123       Label L;
2124       __ andw(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> r15_elsize
2125       __ cmpw(r15_elsize, LogBytesPerLong);
2126       __ br(Assembler::EQ, L);
2127       __ stop("must be long copy, but elsize is wrong");
2128       __ bind(L);
2129       BLOCK_COMMENT("} assert long copy done");
2130     }
2131 #endif
2132     __ lea(from, Address(src, src_pos, Address::lsl(3)));// src_addr
2133     __ lea(to,   Address(dst, dst_pos, Address::lsl(3)));// dst_addr
2134     __ movw(count, scratch_length); // length
2135     __ b(RuntimeAddress(long_copy_entry));
2136 
2137     // ObjArrayKlass
2138   __ BIND(L_objArray);
2139     // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
2140 
2141     Label L_plain_copy, L_checkcast_copy;
2142     //  test array classes for subtyping
2143     __ load_klass(r15, dst);
2144     __ cmp(scratch_src_klass, r15); // usual case is exact equality
2145     __ br(Assembler::NE, L_checkcast_copy);
2146 
2147     // Identically typed arrays can be copied without element-wise checks.
2148     arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2149                            rscratch2, L_failed);
2150 
2151     __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
2152     __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2153     __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop)));
2154     __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2155     __ movw(count, scratch_length); // length
2156   __ BIND(L_plain_copy);
2157     __ b(RuntimeAddress(oop_copy_entry));
2158 
2159   __ BIND(L_checkcast_copy);
2160     // live at this point:  scratch_src_klass, scratch_length, r15 (dst_klass)
2161     {
2162       // Before looking at dst.length, make sure dst is also an objArray.
2163       __ ldrw(rscratch1, Address(r15, lh_offset));
2164       __ movw(rscratch2, objArray_lh);
2165       __ eorw(rscratch1, rscratch1, rscratch2);
2166       __ cbnzw(rscratch1, L_failed);
2167 
2168       // It is safe to examine both src.length and dst.length.
2169       arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
2170                              r15, L_failed);
2171 
2172       __ load_klass(dst_klass, dst); // reload
2173 
2174       // Marshal the base address arguments now, freeing registers.
2175       __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
2176       __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2177       __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop)));
2178       __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2179       __ movw(count, length);           // length (reloaded)
2180       Register sco_temp = c_rarg3;      // this register is free now
2181       assert_different_registers(from, to, count, sco_temp,
2182                                  dst_klass, scratch_src_klass);
2183       // assert_clean_int(count, sco_temp);
2184 
2185       // Generate the type check.
2186       const int sco_offset = in_bytes(Klass::super_check_offset_offset());
2187       __ ldrw(sco_temp, Address(dst_klass, sco_offset));
2188 
2189       // Smashes rscratch1, rscratch2
2190       generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);


5041                                     frame_complete,
5042                                     (framesize >> (LogBytesPerWord - LogBytesPerInt)),
5043                                     oop_maps, false);
5044     return stub->entry_point();
5045   }
5046 
5047   class MontgomeryMultiplyGenerator : public MacroAssembler {
5048 
5049     Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
5050       Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
5051 
5052     RegSet _toSave;
5053     bool _squaring;
5054 
5055   public:
5056     MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
5057       : MacroAssembler(as->code()), _squaring(squaring) {
5058 
5059       // Register allocation
5060 
5061       RegSetIterator regs = (RegSet::range(r0, r26) - r18_tls).begin();
5062       Pa_base = *regs;       // Argument registers
5063       if (squaring)
5064         Pb_base = Pa_base;
5065       else
5066         Pb_base = *++regs;
5067       Pn_base = *++regs;
5068       Rlen= *++regs;
5069       inv = *++regs;
5070       Pm_base = *++regs;
5071 
5072                           // Working registers:
5073       Ra =  *++regs;        // The current digit of a, b, n, and m.
5074       Rb =  *++regs;
5075       Rm =  *++regs;
5076       Rn =  *++regs;
5077 
5078       Pa =  *++regs;        // Pointers to the current/next digit of a, b, n, and m.
5079       Pb =  *++regs;
5080       Pm =  *++regs;
5081       Pn =  *++regs;
5082 
5083       t0 =  *++regs;        // Three registers which form a
5084       t1 =  *++regs;        // triple-precision accumuator.
5085       t2 =  *++regs;
5086 
5087       Ri =  *++regs;        // Inner and outer loop indexes.
5088       Rj =  *++regs;
5089 
5090       Rhi_ab = *++regs;     // Product registers: low and high parts
5091       Rlo_ab = *++regs;     // of a*b and m*n.
5092       Rhi_mn = *++regs;
5093       Rlo_mn = *++regs;
5094 
5095       // r19 and up are callee-saved.
5096       _toSave = RegSet::range(r19, *regs) + Pm_base;
5097     }
5098 
5099   private:
5100     void save_regs() {
5101       push(_toSave, sp);
5102     }
5103 
5104     void restore_regs() {
5105       pop(_toSave, sp);
5106     }
5107 
5108     template <typename T>
5109     void unroll_2(Register count, T block) {
5110       Label loop, end, odd;
5111       tbnz(count, 0, odd);
5112       cbz(count, end);
5113       align(16);
5114       bind(loop);
5115       (this->*block)();
5116       bind(odd);


< prev index next >