< prev index next >

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Print this page




1154       __ cmpptr(to, end_from);
1155       __ jump_cc(Assembler::aboveEqual, no_overlap);
1156     } else {
1157       __ jcc(Assembler::belowEqual, (*NOLp));
1158       __ cmpptr(to, end_from);
1159       __ jcc(Assembler::aboveEqual, (*NOLp));
1160     }
1161   }
1162 
1163   // Shuffle first three arg regs on Windows into Linux/Solaris locations.
1164   //
1165   // Outputs:
1166   //    rdi - rcx
1167   //    rsi - rdx
1168   //    rdx - r8
1169   //    rcx - r9
1170   //
1171   // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter
1172   // are non-volatile.  r9 and r10 should not be used by the caller.
1173   //


1174   void setup_arg_regs(int nargs = 3) {
1175     const Register saved_rdi = r9;
1176     const Register saved_rsi = r10;
1177     assert(nargs == 3 || nargs == 4, "else fix");
1178 #ifdef _WIN64
1179     assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
1180            "unexpected argument registers");
1181     if (nargs >= 4)
1182       __ mov(rax, r9);  // r9 is also saved_rdi
1183     __ movptr(saved_rdi, rdi);
1184     __ movptr(saved_rsi, rsi);
1185     __ mov(rdi, rcx); // c_rarg0
1186     __ mov(rsi, rdx); // c_rarg1
1187     __ mov(rdx, r8);  // c_rarg2
1188     if (nargs >= 4)
1189       __ mov(rcx, rax); // c_rarg3 (via rax)
1190 #else
1191     assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1192            "unexpected argument registers");
1193 #endif

1194   }
1195 
1196   void restore_arg_regs() {

1197     const Register saved_rdi = r9;
1198     const Register saved_rsi = r10;
1199 #ifdef _WIN64
1200     __ movptr(rdi, saved_rdi);
1201     __ movptr(rsi, saved_rsi);
1202 #endif
1203   }
1204 
































1205 
1206   // Copy big chunks forward
1207   //
1208   // Inputs:
1209   //   end_from     - source arrays end address
1210   //   end_to       - destination array end address
1211   //   qword_count  - 64-bits element count, negative
1212   //   to           - scratch
1213   //   L_copy_bytes - entry label
1214   //   L_copy_8_bytes  - exit  label
1215   //
1216   void copy_bytes_forward(Register end_from, Register end_to,
1217                              Register qword_count, Register to,
1218                              Label& L_copy_bytes, Label& L_copy_8_bytes) {
1219     DEBUG_ONLY(__ stop("enter at entry label, not here"));
1220     Label L_loop;
1221     __ align(OptoLoopAlignment);
1222     if (UseUnalignedLoadStores) {
1223       Label L_end;
1224       if (UseAVX > 2) {


1812     Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1813     const Register from        = rdi;  // source array address
1814     const Register to          = rsi;  // destination array address
1815     const Register count       = rdx;  // elements count
1816     const Register dword_count = rcx;
1817     const Register qword_count = count;
1818     const Register end_from    = from; // source array end address
1819     const Register end_to      = to;   // destination array end address
1820     // End pointers are inclusive, and if count is not zero they point
1821     // to the last unit copied:  end_to[0] := end_from[0]
1822 
1823     __ enter(); // required for proper stackwalking of RuntimeStub frame
1824     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1825 
1826     if (entry != NULL) {
1827       *entry = __ pc();
1828       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1829       BLOCK_COMMENT("Entry:");
1830     }
1831 
1832     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1833                       // r9 and r10 may be used to save non-volatile registers
1834 
1835     DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
1836     if (dest_uninitialized) {
1837       decorators |= IS_DEST_UNINITIALIZED;
1838     }
1839     if (aligned) {
1840       decorators |= ARRAYCOPY_ALIGNED;
1841     }
1842 
1843     BasicType type = is_oop ? T_OBJECT : T_INT;
1844     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1845     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1846 
1847     // 'from', 'to' and 'count' are now valid
1848     __ movptr(dword_count, count);
1849     __ shrptr(count, 1); // count => qword_count
1850 
1851     // Copy from low to high addresses.  Use 'to' as scratch.
1852     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1853     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1854     __ negptr(qword_count);
1855     __ jmp(L_copy_bytes);
1856 
1857     // Copy trailing qwords
1858   __ BIND(L_copy_8_bytes);
1859     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1860     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1861     __ increment(qword_count);
1862     __ jcc(Assembler::notZero, L_copy_8_bytes);
1863 
1864     // Check for and copy trailing dword
1865   __ BIND(L_copy_4_bytes);
1866     __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
1867     __ jccb(Assembler::zero, L_exit);
1868     __ movl(rax, Address(end_from, 8));
1869     __ movl(Address(end_to, 8), rax);
1870 
1871   __ BIND(L_exit);
1872     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
1873     restore_arg_regs();
1874     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1875     __ vzeroupper();
1876     __ xorptr(rax, rax); // return 0
1877     __ leave(); // required for proper stackwalking of RuntimeStub frame
1878     __ ret(0);
1879 
1880     // Copy in multi-bytes chunks
1881     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1882     __ jmp(L_copy_4_bytes);
1883 
1884     return start;
1885   }
1886 
1887   // Arguments:
1888   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1889   //             ignored
1890   //   is_oop  - true => oop array, so generate store check code
1891   //   name    - stub name string
1892   //
1893   // Inputs:


1906     StubCodeMark mark(this, "StubRoutines", name);
1907     address start = __ pc();
1908 
1909     Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1910     const Register from        = rdi;  // source array address
1911     const Register to          = rsi;  // destination array address
1912     const Register count       = rdx;  // elements count
1913     const Register dword_count = rcx;
1914     const Register qword_count = count;
1915 
1916     __ enter(); // required for proper stackwalking of RuntimeStub frame
1917     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1918 
1919     if (entry != NULL) {
1920       *entry = __ pc();
1921        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1922       BLOCK_COMMENT("Entry:");
1923     }
1924 
1925     array_overlap_test(nooverlap_target, Address::times_4);
1926     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1927                       // r9 and r10 may be used to save non-volatile registers
1928 
1929     DecoratorSet decorators = IN_HEAP | IS_ARRAY;
1930     if (dest_uninitialized) {
1931       decorators |= IS_DEST_UNINITIALIZED;
1932     }
1933     if (aligned) {
1934       decorators |= ARRAYCOPY_ALIGNED;
1935     }
1936 
1937     BasicType type = is_oop ? T_OBJECT : T_INT;
1938     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1939     // no registers are destroyed by this call
1940     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1941 
1942     assert_clean_int(count, rax); // Make sure 'count' is clean int.
1943     // 'from', 'to' and 'count' are now valid
1944     __ movptr(dword_count, count);
1945     __ shrptr(count, 1); // count => qword_count
1946 
1947     // Copy from high to low addresses.  Use 'to' as scratch.
1948 
1949     // Check for and copy trailing dword
1950     __ testl(dword_count, 1);
1951     __ jcc(Assembler::zero, L_copy_bytes);
1952     __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1953     __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1954     __ jmp(L_copy_bytes);
1955 
1956     // Copy trailing qwords
1957   __ BIND(L_copy_8_bytes);
1958     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
1959     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
1960     __ decrement(qword_count);
1961     __ jcc(Assembler::notZero, L_copy_8_bytes);
1962 
1963     if (is_oop) {
1964       __ jmp(L_exit);
1965     }
1966     restore_arg_regs();
1967     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1968     __ xorptr(rax, rax); // return 0
1969     __ vzeroupper();
1970     __ leave(); // required for proper stackwalking of RuntimeStub frame
1971     __ ret(0);
1972 
1973     // Copy in multi-bytes chunks
1974     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1975 
1976   __ BIND(L_exit);
1977     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
1978     restore_arg_regs();
1979     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1980     __ xorptr(rax, rax); // return 0
1981     __ vzeroupper();
1982     __ leave(); // required for proper stackwalking of RuntimeStub frame
1983     __ ret(0);
1984 
1985     return start;
1986   }
1987 
1988   // Arguments:
1989   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
1990   //             ignored
1991   //   is_oop  - true => oop array, so generate store check code
1992   //   name    - stub name string
1993   //
1994   // Inputs:
1995   //   c_rarg0   - source array address
1996   //   c_rarg1   - destination array address
1997   //   c_rarg2   - element count, treated as ssize_t, can be zero
1998   //


2009     Label L_copy_bytes, L_copy_8_bytes, L_exit;
2010     const Register from        = rdi;  // source array address
2011     const Register to          = rsi;  // destination array address
2012     const Register qword_count = rdx;  // elements count
2013     const Register end_from    = from; // source array end address
2014     const Register end_to      = rcx;  // destination array end address
2015     const Register saved_count = r11;
2016     // End pointers are inclusive, and if count is not zero they point
2017     // to the last unit copied:  end_to[0] := end_from[0]
2018 
2019     __ enter(); // required for proper stackwalking of RuntimeStub frame
2020     // Save no-overlap entry point for generate_conjoint_long_oop_copy()
2021     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
2022 
2023     if (entry != NULL) {
2024       *entry = __ pc();
2025       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2026       BLOCK_COMMENT("Entry:");
2027     }
2028 
2029     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2030                       // r9 and r10 may be used to save non-volatile registers
2031     // 'from', 'to' and 'qword_count' are now valid
2032 
2033     DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2034     if (dest_uninitialized) {
2035       decorators |= IS_DEST_UNINITIALIZED;
2036     }
2037     if (aligned) {
2038       decorators |= ARRAYCOPY_ALIGNED;
2039     }
2040 
2041     BasicType type = is_oop ? T_OBJECT : T_LONG;
2042     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2043     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2044 
2045     // Copy from low to high addresses.  Use 'to' as scratch.
2046     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
2047     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
2048     __ negptr(qword_count);
2049     __ jmp(L_copy_bytes);
2050 
2051     // Copy trailing qwords
2052   __ BIND(L_copy_8_bytes);
2053     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2054     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2055     __ increment(qword_count);
2056     __ jcc(Assembler::notZero, L_copy_8_bytes);
2057 
2058     if (is_oop) {
2059       __ jmp(L_exit);
2060     } else {
2061       restore_arg_regs();
2062       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2063       __ xorptr(rax, rax); // return 0
2064       __ vzeroupper();
2065       __ leave(); // required for proper stackwalking of RuntimeStub frame
2066       __ ret(0);
2067     }
2068 
2069     // Copy in multi-bytes chunks
2070     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2071 
2072     __ BIND(L_exit);
2073     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2074     restore_arg_regs();
2075     if (is_oop) {
2076       inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2077     } else {
2078       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2079     }
2080     __ vzeroupper();
2081     __ xorptr(rax, rax); // return 0
2082     __ leave(); // required for proper stackwalking of RuntimeStub frame
2083     __ ret(0);
2084 
2085     return start;
2086   }
2087 
2088   // Arguments:
2089   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2090   //             ignored
2091   //   is_oop  - true => oop array, so generate store check code
2092   //   name    - stub name string
2093   //
2094   // Inputs:


2102     __ align(CodeEntryAlignment);
2103     StubCodeMark mark(this, "StubRoutines", name);
2104     address start = __ pc();
2105 
2106     Label L_copy_bytes, L_copy_8_bytes, L_exit;
2107     const Register from        = rdi;  // source array address
2108     const Register to          = rsi;  // destination array address
2109     const Register qword_count = rdx;  // elements count
2110     const Register saved_count = rcx;
2111 
2112     __ enter(); // required for proper stackwalking of RuntimeStub frame
2113     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
2114 
2115     if (entry != NULL) {
2116       *entry = __ pc();
2117       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2118       BLOCK_COMMENT("Entry:");
2119     }
2120 
2121     array_overlap_test(nooverlap_target, Address::times_8);
2122     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2123                       // r9 and r10 may be used to save non-volatile registers
2124     // 'from', 'to' and 'qword_count' are now valid
2125 
2126     DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2127     if (dest_uninitialized) {
2128       decorators |= IS_DEST_UNINITIALIZED;
2129     }
2130     if (aligned) {
2131       decorators |= ARRAYCOPY_ALIGNED;
2132     }
2133 
2134     BasicType type = is_oop ? T_OBJECT : T_LONG;
2135     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2136     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2137 
2138     __ jmp(L_copy_bytes);
2139 
2140     // Copy trailing qwords
2141   __ BIND(L_copy_8_bytes);
2142     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2143     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2144     __ decrement(qword_count);
2145     __ jcc(Assembler::notZero, L_copy_8_bytes);
2146 
2147     if (is_oop) {
2148       __ jmp(L_exit);
2149     } else {
2150       restore_arg_regs();
2151       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2152       __ xorptr(rax, rax); // return 0
2153       __ vzeroupper();
2154       __ leave(); // required for proper stackwalking of RuntimeStub frame
2155       __ ret(0);
2156     }
2157 
2158     // Copy in multi-bytes chunks
2159     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2160 
2161     __ BIND(L_exit);
2162     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2163     restore_arg_regs();
2164     if (is_oop) {
2165       inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2166     } else {
2167       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2168     }
2169     __ vzeroupper();
2170     __ xorptr(rax, rax); // return 0
2171     __ leave(); // required for proper stackwalking of RuntimeStub frame
2172     __ ret(0);
2173 
2174     return start;
2175   }
2176 
2177 
2178   // Helper for generating a dynamic type check.
2179   // Smashes no registers.
2180   void generate_type_check(Register sub_klass,
2181                            Register super_check_offset,
2182                            Register super_klass,
2183                            Label& L_success) {


2259 #endif //ASSERT
2260 
2261     setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
2262                        // ckoff => rcx, ckval => r8
2263                        // r9 and r10 may be used to save non-volatile registers
2264 #ifdef _WIN64
2265     // last argument (#4) is on stack on Win64
2266     __ movptr(ckval, Address(rsp, 6 * wordSize));
2267 #endif
2268 
2269     // Caller of this entry point must set up the argument registers.
2270     if (entry != NULL) {
2271       *entry = __ pc();
2272       BLOCK_COMMENT("Entry:");
2273     }
2274 
2275     // allocate spill slots for r13, r14
2276     enum {
2277       saved_r13_offset,
2278       saved_r14_offset,

2279       saved_rbp_offset
2280     };
2281     __ subptr(rsp, saved_rbp_offset * wordSize);
2282     __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
2283     __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);










2284 
2285     // check that int operands are properly extended to size_t
2286     assert_clean_int(length, rax);
2287     assert_clean_int(ckoff, rax);
2288 
2289 #ifdef ASSERT
2290     BLOCK_COMMENT("assert consistent ckoff/ckval");
2291     // The ckoff and ckval must be mutually consistent,
2292     // even though caller generates both.
2293     { Label L;
2294       int sco_offset = in_bytes(Klass::super_check_offset_offset());
2295       __ cmpl(ckoff, Address(ckval, sco_offset));
2296       __ jcc(Assembler::equal, L);
2297       __ stop("super_check_offset inconsistent");
2298       __ bind(L);
2299     }
2300 #endif //ASSERT
2301 
2302     // Loop-invariant addresses.  They are exclusive end pointers.
2303     Address end_from_addr(from, length, TIMES_OOP, 0);


2355     // and report their number to the caller.
2356     assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
2357     Label L_post_barrier;
2358     __ addptr(r14_length, count);     // K = (original - remaining) oops
2359     __ movptr(rax, r14_length);       // save the value
2360     __ notptr(rax);                   // report (-1^K) to caller (does not affect flags)
2361     __ jccb(Assembler::notZero, L_post_barrier);
2362     __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
2363 
2364     // Come here on success only.
2365     __ BIND(L_do_card_marks);
2366     __ xorptr(rax, rax);              // return 0 on success
2367 
2368     __ BIND(L_post_barrier);
2369     bs->arraycopy_epilogue(_masm, decorators, type, from, to, r14_length);
2370 
2371     // Common exit point (success or failure).
2372     __ BIND(L_done);
2373     __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
2374     __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));

2375     restore_arg_regs();
2376     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
2377     __ leave(); // required for proper stackwalking of RuntimeStub frame
2378     __ ret(0);
2379 
2380     return start;
2381   }
2382 
2383   //
2384   //  Generate 'unsafe' array copy stub
2385   //  Though just as safe as the other stubs, it takes an unscaled
2386   //  size_t argument instead of an element count.
2387   //
2388   //  Input:
2389   //    c_rarg0   - source array address
2390   //    c_rarg1   - destination array address
2391   //    c_rarg2   - byte count, treated as ssize_t, can be zero
2392   //
2393   // Examines the alignment of the operands and dispatches
2394   // to a long, int, short, or byte copy loop.




1154       __ cmpptr(to, end_from);
1155       __ jump_cc(Assembler::aboveEqual, no_overlap);
1156     } else {
1157       __ jcc(Assembler::belowEqual, (*NOLp));
1158       __ cmpptr(to, end_from);
1159       __ jcc(Assembler::aboveEqual, (*NOLp));
1160     }
1161   }
1162 
1163   // Shuffle first three arg regs on Windows into Linux/Solaris locations.
1164   //
1165   // Outputs:
1166   //    rdi - rcx
1167   //    rsi - rdx
1168   //    rdx - r8
1169   //    rcx - r9
1170   //
1171   // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter
1172   // are non-volatile.  r9 and r10 should not be used by the caller.
1173   //
1174   DEBUG_ONLY(bool regs_in_thread;)
1175 
1176   void setup_arg_regs(int nargs = 3) {
1177     const Register saved_rdi = r9;
1178     const Register saved_rsi = r10;
1179     assert(nargs == 3 || nargs == 4, "else fix");
1180 #ifdef _WIN64
1181     assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
1182            "unexpected argument registers");
1183     if (nargs >= 4)
1184       __ mov(rax, r9);  // r9 is also saved_rdi
1185     __ movptr(saved_rdi, rdi);
1186     __ movptr(saved_rsi, rsi);
1187     __ mov(rdi, rcx); // c_rarg0
1188     __ mov(rsi, rdx); // c_rarg1
1189     __ mov(rdx, r8);  // c_rarg2
1190     if (nargs >= 4)
1191       __ mov(rcx, rax); // c_rarg3 (via rax)
1192 #else
1193     assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1194            "unexpected argument registers");
1195 #endif
1196     DEBUG_ONLY(regs_in_thread = false;)
1197   }
1198 
1199   void restore_arg_regs() {
1200     assert(!regs_in_thread, "wrong call to restore_arg_regs");
1201     const Register saved_rdi = r9;
1202     const Register saved_rsi = r10;
1203 #ifdef _WIN64
1204     __ movptr(rdi, saved_rdi);
1205     __ movptr(rsi, saved_rsi);
1206 #endif
1207   }
1208 
1209   // This is used in places where r10 is a scratch register, and can
1210   // be adapted if r9 is needed also.
1211   void setup_arg_regs_using_thread() {
1212     const Register saved_r15 = r9;
1213 #ifdef _WIN64
1214     __ mov(saved_r15, r15);  // r15 is callee saved and needs to be restored
1215     __ get_thread(r15_thread);
1216     assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
1217            "unexpected argument registers");
1218     __ movptr(Address(r15_thread, in_bytes(JavaThread::windows_saved_rdi_offset())), rdi);
1219     __ movptr(Address(r15_thread, in_bytes(JavaThread::windows_saved_rsi_offset())), rsi);
1220 
1221     __ mov(rdi, rcx); // c_rarg0
1222     __ mov(rsi, rdx); // c_rarg1
1223     __ mov(rdx, r8);  // c_rarg2
1224 #else
1225     assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1226            "unexpected argument registers");
1227 #endif
1228     DEBUG_ONLY(regs_in_thread = true;)
1229   }
1230 
1231   void restore_arg_regs_using_thread() {
1232     assert(regs_in_thread, "wrong call to restore_arg_regs");
1233     const Register saved_r15 = r9;
1234 #ifdef _WIN64
1235     __ get_thread(r15_thread);
1236     __ movptr(rsi, Address(r15_thread, in_bytes(JavaThread::windows_saved_rsi_offset())));
1237     __ movptr(rdi, Address(r15_thread, in_bytes(JavaThread::windows_saved_rdi_offset())));
1238     __ mov(r15, saved_r15);  // r15 is callee saved and needs to be restored
1239 #endif
1240   }
1241 
1242   // Copy big chunks forward
1243   //
1244   // Inputs:
1245   //   end_from     - source arrays end address
1246   //   end_to       - destination array end address
1247   //   qword_count  - 64-bits element count, negative
1248   //   to           - scratch
1249   //   L_copy_bytes - entry label
1250   //   L_copy_8_bytes  - exit  label
1251   //
1252   void copy_bytes_forward(Register end_from, Register end_to,
1253                              Register qword_count, Register to,
1254                              Label& L_copy_bytes, Label& L_copy_8_bytes) {
1255     DEBUG_ONLY(__ stop("enter at entry label, not here"));
1256     Label L_loop;
1257     __ align(OptoLoopAlignment);
1258     if (UseUnalignedLoadStores) {
1259       Label L_end;
1260       if (UseAVX > 2) {


1848     Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1849     const Register from        = rdi;  // source array address
1850     const Register to          = rsi;  // destination array address
1851     const Register count       = rdx;  // elements count
1852     const Register dword_count = rcx;
1853     const Register qword_count = count;
1854     const Register end_from    = from; // source array end address
1855     const Register end_to      = to;   // destination array end address
1856     // End pointers are inclusive, and if count is not zero they point
1857     // to the last unit copied:  end_to[0] := end_from[0]
1858 
1859     __ enter(); // required for proper stackwalking of RuntimeStub frame
1860     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1861 
1862     if (entry != NULL) {
1863       *entry = __ pc();
1864       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1865       BLOCK_COMMENT("Entry:");
1866     }
1867 
1868     setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
1869                                    // r9 is used to save r15_thread
1870 
1871     DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
1872     if (dest_uninitialized) {
1873       decorators |= IS_DEST_UNINITIALIZED;
1874     }
1875     if (aligned) {
1876       decorators |= ARRAYCOPY_ALIGNED;
1877     }
1878 
1879     BasicType type = is_oop ? T_OBJECT : T_INT;
1880     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1881     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1882 
1883     // 'from', 'to' and 'count' are now valid
1884     __ movptr(dword_count, count);
1885     __ shrptr(count, 1); // count => qword_count
1886 
1887     // Copy from low to high addresses.  Use 'to' as scratch.
1888     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1889     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
1890     __ negptr(qword_count);
1891     __ jmp(L_copy_bytes);
1892 
1893     // Copy trailing qwords
1894   __ BIND(L_copy_8_bytes);
1895     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1896     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1897     __ increment(qword_count);
1898     __ jcc(Assembler::notZero, L_copy_8_bytes);
1899 
1900     // Check for and copy trailing dword
1901   __ BIND(L_copy_4_bytes);
1902     __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
1903     __ jccb(Assembler::zero, L_exit);
1904     __ movl(rax, Address(end_from, 8));
1905     __ movl(Address(end_to, 8), rax);
1906 
1907   __ BIND(L_exit);
1908     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
1909     restore_arg_regs_using_thread();
1910     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1911     __ vzeroupper();
1912     __ xorptr(rax, rax); // return 0
1913     __ leave(); // required for proper stackwalking of RuntimeStub frame
1914     __ ret(0);
1915 
1916     // Copy in multi-bytes chunks
1917     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1918     __ jmp(L_copy_4_bytes);
1919 
1920     return start;
1921   }
1922 
1923   // Arguments:
1924   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1925   //             ignored
1926   //   is_oop  - true => oop array, so generate store check code
1927   //   name    - stub name string
1928   //
1929   // Inputs:


1942     StubCodeMark mark(this, "StubRoutines", name);
1943     address start = __ pc();
1944 
1945     Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1946     const Register from        = rdi;  // source array address
1947     const Register to          = rsi;  // destination array address
1948     const Register count       = rdx;  // elements count
1949     const Register dword_count = rcx;
1950     const Register qword_count = count;
1951 
1952     __ enter(); // required for proper stackwalking of RuntimeStub frame
1953     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
1954 
1955     if (entry != NULL) {
1956       *entry = __ pc();
1957        // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1958       BLOCK_COMMENT("Entry:");
1959     }
1960 
1961     array_overlap_test(nooverlap_target, Address::times_4);
1962     setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
1963                                    // r9 is used to save r15_thread
1964 
1965     DecoratorSet decorators = IN_HEAP | IS_ARRAY;
1966     if (dest_uninitialized) {
1967       decorators |= IS_DEST_UNINITIALIZED;
1968     }
1969     if (aligned) {
1970       decorators |= ARRAYCOPY_ALIGNED;
1971     }
1972 
1973     BasicType type = is_oop ? T_OBJECT : T_INT;
1974     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1975     // no registers are destroyed by this call
1976     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1977 
1978     assert_clean_int(count, rax); // Make sure 'count' is clean int.
1979     // 'from', 'to' and 'count' are now valid
1980     __ movptr(dword_count, count);
1981     __ shrptr(count, 1); // count => qword_count
1982 
1983     // Copy from high to low addresses.  Use 'to' as scratch.
1984 
1985     // Check for and copy trailing dword
1986     __ testl(dword_count, 1);
1987     __ jcc(Assembler::zero, L_copy_bytes);
1988     __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1989     __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1990     __ jmp(L_copy_bytes);
1991 
1992     // Copy trailing qwords
1993   __ BIND(L_copy_8_bytes);
1994     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
1995     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
1996     __ decrement(qword_count);
1997     __ jcc(Assembler::notZero, L_copy_8_bytes);
1998 
1999     if (is_oop) {
2000       __ jmp(L_exit);
2001     }
2002     restore_arg_regs_using_thread();
2003     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
2004     __ xorptr(rax, rax); // return 0
2005     __ vzeroupper();
2006     __ leave(); // required for proper stackwalking of RuntimeStub frame
2007     __ ret(0);
2008 
2009     // Copy in multi-bytes chunks
2010     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2011 
2012   __ BIND(L_exit);
2013     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
2014     restore_arg_regs_using_thread();
2015     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
2016     __ xorptr(rax, rax); // return 0
2017     __ vzeroupper();
2018     __ leave(); // required for proper stackwalking of RuntimeStub frame
2019     __ ret(0);
2020 
2021     return start;
2022   }
2023 
2024   // Arguments:
2025   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2026   //             ignored
2027   //   is_oop  - true => oop array, so generate store check code
2028   //   name    - stub name string
2029   //
2030   // Inputs:
2031   //   c_rarg0   - source array address
2032   //   c_rarg1   - destination array address
2033   //   c_rarg2   - element count, treated as ssize_t, can be zero
2034   //


2045     Label L_copy_bytes, L_copy_8_bytes, L_exit;
2046     const Register from        = rdi;  // source array address
2047     const Register to          = rsi;  // destination array address
2048     const Register qword_count = rdx;  // elements count
2049     const Register end_from    = from; // source array end address
2050     const Register end_to      = rcx;  // destination array end address
2051     const Register saved_count = r11;
2052     // End pointers are inclusive, and if count is not zero they point
2053     // to the last unit copied:  end_to[0] := end_from[0]
2054 
2055     __ enter(); // required for proper stackwalking of RuntimeStub frame
2056     // Save no-overlap entry point for generate_conjoint_long_oop_copy()
2057     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
2058 
2059     if (entry != NULL) {
2060       *entry = __ pc();
2061       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2062       BLOCK_COMMENT("Entry:");
2063     }
2064 
2065     setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2066                                      // r9 is used to save r15_thread
2067     // 'from', 'to' and 'qword_count' are now valid
2068 
2069     DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2070     if (dest_uninitialized) {
2071       decorators |= IS_DEST_UNINITIALIZED;
2072     }
2073     if (aligned) {
2074       decorators |= ARRAYCOPY_ALIGNED;
2075     }
2076 
2077     BasicType type = is_oop ? T_OBJECT : T_LONG;
2078     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2079     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2080 
2081     // Copy from low to high addresses.  Use 'to' as scratch.
2082     __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
2083     __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
2084     __ negptr(qword_count);
2085     __ jmp(L_copy_bytes);
2086 
2087     // Copy trailing qwords
2088   __ BIND(L_copy_8_bytes);
2089     __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2090     __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2091     __ increment(qword_count);
2092     __ jcc(Assembler::notZero, L_copy_8_bytes);
2093 
2094     if (is_oop) {
2095       __ jmp(L_exit);
2096     } else {
2097       restore_arg_regs_using_thread();
2098       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2099       __ xorptr(rax, rax); // return 0
2100       __ vzeroupper();
2101       __ leave(); // required for proper stackwalking of RuntimeStub frame
2102       __ ret(0);
2103     }
2104 
2105     // Copy in multi-bytes chunks
2106     copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2107 
2108     __ BIND(L_exit);
2109     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2110     restore_arg_regs_using_thread();
2111     if (is_oop) {
2112       inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2113     } else {
2114       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2115     }
2116     __ vzeroupper();
2117     __ xorptr(rax, rax); // return 0
2118     __ leave(); // required for proper stackwalking of RuntimeStub frame
2119     __ ret(0);
2120 
2121     return start;
2122   }
2123 
2124   // Arguments:
2125   //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2126   //             ignored
2127   //   is_oop  - true => oop array, so generate store check code
2128   //   name    - stub name string
2129   //
2130   // Inputs:


2138     __ align(CodeEntryAlignment);
2139     StubCodeMark mark(this, "StubRoutines", name);
2140     address start = __ pc();
2141 
2142     Label L_copy_bytes, L_copy_8_bytes, L_exit;
2143     const Register from        = rdi;  // source array address
2144     const Register to          = rsi;  // destination array address
2145     const Register qword_count = rdx;  // elements count
2146     const Register saved_count = rcx;
2147 
2148     __ enter(); // required for proper stackwalking of RuntimeStub frame
2149     assert_clean_int(c_rarg2, rax);    // Make sure 'count' is clean int.
2150 
2151     if (entry != NULL) {
2152       *entry = __ pc();
2153       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2154       BLOCK_COMMENT("Entry:");
2155     }
2156 
2157     array_overlap_test(nooverlap_target, Address::times_8);
2158     setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2159                                    // r9 is used to save r15_thread
2160     // 'from', 'to' and 'qword_count' are now valid
2161 
2162     DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2163     if (dest_uninitialized) {
2164       decorators |= IS_DEST_UNINITIALIZED;
2165     }
2166     if (aligned) {
2167       decorators |= ARRAYCOPY_ALIGNED;
2168     }
2169 
2170     BasicType type = is_oop ? T_OBJECT : T_LONG;
2171     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2172     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2173 
2174     __ jmp(L_copy_bytes);
2175 
2176     // Copy trailing qwords
2177   __ BIND(L_copy_8_bytes);
2178     __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2179     __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2180     __ decrement(qword_count);
2181     __ jcc(Assembler::notZero, L_copy_8_bytes);
2182 
2183     if (is_oop) {
2184       __ jmp(L_exit);
2185     } else {
2186       restore_arg_regs_using_thread();
2187       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2188       __ xorptr(rax, rax); // return 0
2189       __ vzeroupper();
2190       __ leave(); // required for proper stackwalking of RuntimeStub frame
2191       __ ret(0);
2192     }
2193 
2194     // Copy in multi-bytes chunks
2195     copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2196 
2197     __ BIND(L_exit);
2198     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2199     restore_arg_regs_using_thread();
2200     if (is_oop) {
2201       inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2202     } else {
2203       inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2204     }
2205     __ vzeroupper();
2206     __ xorptr(rax, rax); // return 0
2207     __ leave(); // required for proper stackwalking of RuntimeStub frame
2208     __ ret(0);
2209 
2210     return start;
2211   }
2212 
2213 
2214   // Helper for generating a dynamic type check.
2215   // Smashes no registers.
2216   void generate_type_check(Register sub_klass,
2217                            Register super_check_offset,
2218                            Register super_klass,
2219                            Label& L_success) {


2295 #endif //ASSERT
2296 
2297     setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
2298                        // ckoff => rcx, ckval => r8
2299                        // r9 and r10 may be used to save non-volatile registers
2300 #ifdef _WIN64
2301     // last argument (#4) is on stack on Win64
2302     __ movptr(ckval, Address(rsp, 6 * wordSize));
2303 #endif
2304 
2305     // Caller of this entry point must set up the argument registers.
2306     if (entry != NULL) {
2307       *entry = __ pc();
2308       BLOCK_COMMENT("Entry:");
2309     }
2310 
2311     // allocate spill slots for r13, r14
2312     enum {
2313       saved_r13_offset,
2314       saved_r14_offset,
2315       saved_r10_offset,
2316       saved_rbp_offset
2317     };
2318     __ subptr(rsp, saved_rbp_offset * wordSize);
2319     __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
2320     __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
2321     __ movptr(Address(rsp, saved_r10_offset * wordSize), r10);
2322 
2323 #ifdef ASSERT
2324       Label L2;
2325       __ get_thread(r14);
2326       __ cmpptr(r15_thread, r14);
2327       __ jcc(Assembler::equal, L2);
2328       __ stop("StubRoutines::call_stub: r15_thread is modified by call");
2329       __ bind(L2);
2330 #endif // ASSERT
2331 
2332     // check that int operands are properly extended to size_t
2333     assert_clean_int(length, rax);
2334     assert_clean_int(ckoff, rax);
2335 
2336 #ifdef ASSERT
2337     BLOCK_COMMENT("assert consistent ckoff/ckval");
2338     // The ckoff and ckval must be mutually consistent,
2339     // even though caller generates both.
2340     { Label L;
2341       int sco_offset = in_bytes(Klass::super_check_offset_offset());
2342       __ cmpl(ckoff, Address(ckval, sco_offset));
2343       __ jcc(Assembler::equal, L);
2344       __ stop("super_check_offset inconsistent");
2345       __ bind(L);
2346     }
2347 #endif //ASSERT
2348 
2349     // Loop-invariant addresses.  They are exclusive end pointers.
2350     Address end_from_addr(from, length, TIMES_OOP, 0);


2402     // and report their number to the caller.
2403     assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
2404     Label L_post_barrier;
2405     __ addptr(r14_length, count);     // K = (original - remaining) oops
2406     __ movptr(rax, r14_length);       // save the value
2407     __ notptr(rax);                   // report (-1^K) to caller (does not affect flags)
2408     __ jccb(Assembler::notZero, L_post_barrier);
2409     __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
2410 
2411     // Come here on success only.
2412     __ BIND(L_do_card_marks);
2413     __ xorptr(rax, rax);              // return 0 on success
2414 
2415     __ BIND(L_post_barrier);
2416     bs->arraycopy_epilogue(_masm, decorators, type, from, to, r14_length);
2417 
2418     // Common exit point (success or failure).
2419     __ BIND(L_done);
2420     __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
2421     __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
2422     __ movptr(r10, Address(rsp, saved_r10_offset * wordSize));
2423     restore_arg_regs();
2424     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
2425     __ leave(); // required for proper stackwalking of RuntimeStub frame
2426     __ ret(0);
2427 
2428     return start;
2429   }
2430 
2431   //
2432   //  Generate 'unsafe' array copy stub
2433   //  Though just as safe as the other stubs, it takes an unscaled
2434   //  size_t argument instead of an element count.
2435   //
2436   //  Input:
2437   //    c_rarg0   - source array address
2438   //    c_rarg1   - destination array address
2439   //    c_rarg2   - byte count, treated as ssize_t, can be zero
2440   //
2441   // Examines the alignment of the operands and dispatches
2442   // to a long, int, short, or byte copy loop.


< prev index next >