1154 __ cmpptr(to, end_from);
1155 __ jump_cc(Assembler::aboveEqual, no_overlap);
1156 } else {
1157 __ jcc(Assembler::belowEqual, (*NOLp));
1158 __ cmpptr(to, end_from);
1159 __ jcc(Assembler::aboveEqual, (*NOLp));
1160 }
1161 }
1162
1163 // Shuffle first three arg regs on Windows into Linux/Solaris locations.
1164 //
1165 // Outputs:
1166 // rdi - rcx
1167 // rsi - rdx
1168 // rdx - r8
1169 // rcx - r9
1170 //
1171 // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter
1172 // are non-volatile. r9 and r10 should not be used by the caller.
1173 //
1174 void setup_arg_regs(int nargs = 3) {
1175 const Register saved_rdi = r9;
1176 const Register saved_rsi = r10;
1177 assert(nargs == 3 || nargs == 4, "else fix");
1178 #ifdef _WIN64
1179 assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
1180 "unexpected argument registers");
1181 if (nargs >= 4)
1182 __ mov(rax, r9); // r9 is also saved_rdi
1183 __ movptr(saved_rdi, rdi);
1184 __ movptr(saved_rsi, rsi);
1185 __ mov(rdi, rcx); // c_rarg0
1186 __ mov(rsi, rdx); // c_rarg1
1187 __ mov(rdx, r8); // c_rarg2
1188 if (nargs >= 4)
1189 __ mov(rcx, rax); // c_rarg3 (via rax)
1190 #else
1191 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1192 "unexpected argument registers");
1193 #endif
1194 }
1195
1196 void restore_arg_regs() {
1197 const Register saved_rdi = r9;
1198 const Register saved_rsi = r10;
1199 #ifdef _WIN64
1200 __ movptr(rdi, saved_rdi);
1201 __ movptr(rsi, saved_rsi);
1202 #endif
1203 }
1204
1205
1206 // Copy big chunks forward
1207 //
1208 // Inputs:
1209 // end_from - source arrays end address
1210 // end_to - destination array end address
1211 // qword_count - 64-bits element count, negative
1212 // to - scratch
1213 // L_copy_bytes - entry label
1214 // L_copy_8_bytes - exit label
1215 //
1216 void copy_bytes_forward(Register end_from, Register end_to,
1217 Register qword_count, Register to,
1218 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1219 DEBUG_ONLY(__ stop("enter at entry label, not here"));
1220 Label L_loop;
1221 __ align(OptoLoopAlignment);
1222 if (UseUnalignedLoadStores) {
1223 Label L_end;
1224 if (UseAVX > 2) {
1812 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1813 const Register from = rdi; // source array address
1814 const Register to = rsi; // destination array address
1815 const Register count = rdx; // elements count
1816 const Register dword_count = rcx;
1817 const Register qword_count = count;
1818 const Register end_from = from; // source array end address
1819 const Register end_to = to; // destination array end address
1820 // End pointers are inclusive, and if count is not zero they point
1821 // to the last unit copied: end_to[0] := end_from[0]
1822
1823 __ enter(); // required for proper stackwalking of RuntimeStub frame
1824 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1825
1826 if (entry != NULL) {
1827 *entry = __ pc();
1828 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1829 BLOCK_COMMENT("Entry:");
1830 }
1831
1832 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1833 // r9 and r10 may be used to save non-volatile registers
1834
1835 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
1836 if (dest_uninitialized) {
1837 decorators |= IS_DEST_UNINITIALIZED;
1838 }
1839 if (aligned) {
1840 decorators |= ARRAYCOPY_ALIGNED;
1841 }
1842
1843 BasicType type = is_oop ? T_OBJECT : T_INT;
1844 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1845 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1846
1847 // 'from', 'to' and 'count' are now valid
1848 __ movptr(dword_count, count);
1849 __ shrptr(count, 1); // count => qword_count
1850
1851 // Copy from low to high addresses. Use 'to' as scratch.
1852 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1853 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
1854 __ negptr(qword_count);
1855 __ jmp(L_copy_bytes);
1856
1857 // Copy trailing qwords
1858 __ BIND(L_copy_8_bytes);
1859 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1860 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1861 __ increment(qword_count);
1862 __ jcc(Assembler::notZero, L_copy_8_bytes);
1863
1864 // Check for and copy trailing dword
1865 __ BIND(L_copy_4_bytes);
1866 __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
1867 __ jccb(Assembler::zero, L_exit);
1868 __ movl(rax, Address(end_from, 8));
1869 __ movl(Address(end_to, 8), rax);
1870
1871 __ BIND(L_exit);
1872 bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
1873 restore_arg_regs();
1874 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1875 __ vzeroupper();
1876 __ xorptr(rax, rax); // return 0
1877 __ leave(); // required for proper stackwalking of RuntimeStub frame
1878 __ ret(0);
1879
1880 // Copy in multi-bytes chunks
1881 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1882 __ jmp(L_copy_4_bytes);
1883
1884 return start;
1885 }
1886
1887 // Arguments:
1888 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1889 // ignored
1890 // is_oop - true => oop array, so generate store check code
1891 // name - stub name string
1892 //
1893 // Inputs:
1906 StubCodeMark mark(this, "StubRoutines", name);
1907 address start = __ pc();
1908
1909 Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1910 const Register from = rdi; // source array address
1911 const Register to = rsi; // destination array address
1912 const Register count = rdx; // elements count
1913 const Register dword_count = rcx;
1914 const Register qword_count = count;
1915
1916 __ enter(); // required for proper stackwalking of RuntimeStub frame
1917 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1918
1919 if (entry != NULL) {
1920 *entry = __ pc();
1921 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1922 BLOCK_COMMENT("Entry:");
1923 }
1924
1925 array_overlap_test(nooverlap_target, Address::times_4);
1926 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1927 // r9 and r10 may be used to save non-volatile registers
1928
1929 DecoratorSet decorators = IN_HEAP | IS_ARRAY;
1930 if (dest_uninitialized) {
1931 decorators |= IS_DEST_UNINITIALIZED;
1932 }
1933 if (aligned) {
1934 decorators |= ARRAYCOPY_ALIGNED;
1935 }
1936
1937 BasicType type = is_oop ? T_OBJECT : T_INT;
1938 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1939 // no registers are destroyed by this call
1940 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1941
1942 assert_clean_int(count, rax); // Make sure 'count' is clean int.
1943 // 'from', 'to' and 'count' are now valid
1944 __ movptr(dword_count, count);
1945 __ shrptr(count, 1); // count => qword_count
1946
1947 // Copy from high to low addresses. Use 'to' as scratch.
1948
1949 // Check for and copy trailing dword
1950 __ testl(dword_count, 1);
1951 __ jcc(Assembler::zero, L_copy_bytes);
1952 __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1953 __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1954 __ jmp(L_copy_bytes);
1955
1956 // Copy trailing qwords
1957 __ BIND(L_copy_8_bytes);
1958 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
1959 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
1960 __ decrement(qword_count);
1961 __ jcc(Assembler::notZero, L_copy_8_bytes);
1962
1963 if (is_oop) {
1964 __ jmp(L_exit);
1965 }
1966 restore_arg_regs();
1967 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1968 __ xorptr(rax, rax); // return 0
1969 __ vzeroupper();
1970 __ leave(); // required for proper stackwalking of RuntimeStub frame
1971 __ ret(0);
1972
1973 // Copy in multi-bytes chunks
1974 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1975
1976 __ BIND(L_exit);
1977 bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
1978 restore_arg_regs();
1979 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1980 __ xorptr(rax, rax); // return 0
1981 __ vzeroupper();
1982 __ leave(); // required for proper stackwalking of RuntimeStub frame
1983 __ ret(0);
1984
1985 return start;
1986 }
1987
1988 // Arguments:
1989 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
1990 // ignored
1991 // is_oop - true => oop array, so generate store check code
1992 // name - stub name string
1993 //
1994 // Inputs:
1995 // c_rarg0 - source array address
1996 // c_rarg1 - destination array address
1997 // c_rarg2 - element count, treated as ssize_t, can be zero
1998 //
2009 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2010 const Register from = rdi; // source array address
2011 const Register to = rsi; // destination array address
2012 const Register qword_count = rdx; // elements count
2013 const Register end_from = from; // source array end address
2014 const Register end_to = rcx; // destination array end address
2015 const Register saved_count = r11;
2016 // End pointers are inclusive, and if count is not zero they point
2017 // to the last unit copied: end_to[0] := end_from[0]
2018
2019 __ enter(); // required for proper stackwalking of RuntimeStub frame
2020 // Save no-overlap entry point for generate_conjoint_long_oop_copy()
2021 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2022
2023 if (entry != NULL) {
2024 *entry = __ pc();
2025 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2026 BLOCK_COMMENT("Entry:");
2027 }
2028
2029 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2030 // r9 and r10 may be used to save non-volatile registers
2031 // 'from', 'to' and 'qword_count' are now valid
2032
2033 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2034 if (dest_uninitialized) {
2035 decorators |= IS_DEST_UNINITIALIZED;
2036 }
2037 if (aligned) {
2038 decorators |= ARRAYCOPY_ALIGNED;
2039 }
2040
2041 BasicType type = is_oop ? T_OBJECT : T_LONG;
2042 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2043 bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2044
2045 // Copy from low to high addresses. Use 'to' as scratch.
2046 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
2047 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
2048 __ negptr(qword_count);
2049 __ jmp(L_copy_bytes);
2050
2051 // Copy trailing qwords
2052 __ BIND(L_copy_8_bytes);
2053 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2054 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2055 __ increment(qword_count);
2056 __ jcc(Assembler::notZero, L_copy_8_bytes);
2057
2058 if (is_oop) {
2059 __ jmp(L_exit);
2060 } else {
2061 restore_arg_regs();
2062 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2063 __ xorptr(rax, rax); // return 0
2064 __ vzeroupper();
2065 __ leave(); // required for proper stackwalking of RuntimeStub frame
2066 __ ret(0);
2067 }
2068
2069 // Copy in multi-bytes chunks
2070 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2071
2072 __ BIND(L_exit);
2073 bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2074 restore_arg_regs();
2075 if (is_oop) {
2076 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2077 } else {
2078 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2079 }
2080 __ vzeroupper();
2081 __ xorptr(rax, rax); // return 0
2082 __ leave(); // required for proper stackwalking of RuntimeStub frame
2083 __ ret(0);
2084
2085 return start;
2086 }
2087
2088 // Arguments:
2089 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2090 // ignored
2091 // is_oop - true => oop array, so generate store check code
2092 // name - stub name string
2093 //
2094 // Inputs:
2102 __ align(CodeEntryAlignment);
2103 StubCodeMark mark(this, "StubRoutines", name);
2104 address start = __ pc();
2105
2106 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2107 const Register from = rdi; // source array address
2108 const Register to = rsi; // destination array address
2109 const Register qword_count = rdx; // elements count
2110 const Register saved_count = rcx;
2111
2112 __ enter(); // required for proper stackwalking of RuntimeStub frame
2113 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2114
2115 if (entry != NULL) {
2116 *entry = __ pc();
2117 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2118 BLOCK_COMMENT("Entry:");
2119 }
2120
2121 array_overlap_test(nooverlap_target, Address::times_8);
2122 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2123 // r9 and r10 may be used to save non-volatile registers
2124 // 'from', 'to' and 'qword_count' are now valid
2125
2126 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2127 if (dest_uninitialized) {
2128 decorators |= IS_DEST_UNINITIALIZED;
2129 }
2130 if (aligned) {
2131 decorators |= ARRAYCOPY_ALIGNED;
2132 }
2133
2134 BasicType type = is_oop ? T_OBJECT : T_LONG;
2135 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2136 bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2137
2138 __ jmp(L_copy_bytes);
2139
2140 // Copy trailing qwords
2141 __ BIND(L_copy_8_bytes);
2142 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2143 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2144 __ decrement(qword_count);
2145 __ jcc(Assembler::notZero, L_copy_8_bytes);
2146
2147 if (is_oop) {
2148 __ jmp(L_exit);
2149 } else {
2150 restore_arg_regs();
2151 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2152 __ xorptr(rax, rax); // return 0
2153 __ vzeroupper();
2154 __ leave(); // required for proper stackwalking of RuntimeStub frame
2155 __ ret(0);
2156 }
2157
2158 // Copy in multi-bytes chunks
2159 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2160
2161 __ BIND(L_exit);
2162 bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2163 restore_arg_regs();
2164 if (is_oop) {
2165 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2166 } else {
2167 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2168 }
2169 __ vzeroupper();
2170 __ xorptr(rax, rax); // return 0
2171 __ leave(); // required for proper stackwalking of RuntimeStub frame
2172 __ ret(0);
2173
2174 return start;
2175 }
2176
2177
2178 // Helper for generating a dynamic type check.
2179 // Smashes no registers.
2180 void generate_type_check(Register sub_klass,
2181 Register super_check_offset,
2182 Register super_klass,
2183 Label& L_success) {
2259 #endif //ASSERT
2260
2261 setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
2262 // ckoff => rcx, ckval => r8
2263 // r9 and r10 may be used to save non-volatile registers
2264 #ifdef _WIN64
2265 // last argument (#4) is on stack on Win64
2266 __ movptr(ckval, Address(rsp, 6 * wordSize));
2267 #endif
2268
2269 // Caller of this entry point must set up the argument registers.
2270 if (entry != NULL) {
2271 *entry = __ pc();
2272 BLOCK_COMMENT("Entry:");
2273 }
2274
2275 // allocate spill slots for r13, r14
2276 enum {
2277 saved_r13_offset,
2278 saved_r14_offset,
2279 saved_rbp_offset
2280 };
2281 __ subptr(rsp, saved_rbp_offset * wordSize);
2282 __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
2283 __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
2284
2285 // check that int operands are properly extended to size_t
2286 assert_clean_int(length, rax);
2287 assert_clean_int(ckoff, rax);
2288
2289 #ifdef ASSERT
2290 BLOCK_COMMENT("assert consistent ckoff/ckval");
2291 // The ckoff and ckval must be mutually consistent,
2292 // even though caller generates both.
2293 { Label L;
2294 int sco_offset = in_bytes(Klass::super_check_offset_offset());
2295 __ cmpl(ckoff, Address(ckval, sco_offset));
2296 __ jcc(Assembler::equal, L);
2297 __ stop("super_check_offset inconsistent");
2298 __ bind(L);
2299 }
2300 #endif //ASSERT
2301
2302 // Loop-invariant addresses. They are exclusive end pointers.
2303 Address end_from_addr(from, length, TIMES_OOP, 0);
2355 // and report their number to the caller.
2356 assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
2357 Label L_post_barrier;
2358 __ addptr(r14_length, count); // K = (original - remaining) oops
2359 __ movptr(rax, r14_length); // save the value
2360 __ notptr(rax); // report (-1^K) to caller (does not affect flags)
2361 __ jccb(Assembler::notZero, L_post_barrier);
2362 __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
2363
2364 // Come here on success only.
2365 __ BIND(L_do_card_marks);
2366 __ xorptr(rax, rax); // return 0 on success
2367
2368 __ BIND(L_post_barrier);
2369 bs->arraycopy_epilogue(_masm, decorators, type, from, to, r14_length);
2370
2371 // Common exit point (success or failure).
2372 __ BIND(L_done);
2373 __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
2374 __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
2375 restore_arg_regs();
2376 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
2377 __ leave(); // required for proper stackwalking of RuntimeStub frame
2378 __ ret(0);
2379
2380 return start;
2381 }
2382
2383 //
2384 // Generate 'unsafe' array copy stub
2385 // Though just as safe as the other stubs, it takes an unscaled
2386 // size_t argument instead of an element count.
2387 //
2388 // Input:
2389 // c_rarg0 - source array address
2390 // c_rarg1 - destination array address
2391 // c_rarg2 - byte count, treated as ssize_t, can be zero
2392 //
2393 // Examines the alignment of the operands and dispatches
2394 // to a long, int, short, or byte copy loop.
|
1154 __ cmpptr(to, end_from);
1155 __ jump_cc(Assembler::aboveEqual, no_overlap);
1156 } else {
1157 __ jcc(Assembler::belowEqual, (*NOLp));
1158 __ cmpptr(to, end_from);
1159 __ jcc(Assembler::aboveEqual, (*NOLp));
1160 }
1161 }
1162
1163 // Shuffle first three arg regs on Windows into Linux/Solaris locations.
1164 //
1165 // Outputs:
1166 // rdi - rcx
1167 // rsi - rdx
1168 // rdx - r8
1169 // rcx - r9
1170 //
1171 // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter
1172 // are non-volatile. r9 and r10 should not be used by the caller.
1173 //
1174 DEBUG_ONLY(bool regs_in_thread;)
1175
1176 void setup_arg_regs(int nargs = 3) {
1177 const Register saved_rdi = r9;
1178 const Register saved_rsi = r10;
1179 assert(nargs == 3 || nargs == 4, "else fix");
1180 #ifdef _WIN64
1181 assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
1182 "unexpected argument registers");
1183 if (nargs >= 4)
1184 __ mov(rax, r9); // r9 is also saved_rdi
1185 __ movptr(saved_rdi, rdi);
1186 __ movptr(saved_rsi, rsi);
1187 __ mov(rdi, rcx); // c_rarg0
1188 __ mov(rsi, rdx); // c_rarg1
1189 __ mov(rdx, r8); // c_rarg2
1190 if (nargs >= 4)
1191 __ mov(rcx, rax); // c_rarg3 (via rax)
1192 #else
1193 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1194 "unexpected argument registers");
1195 #endif
1196 DEBUG_ONLY(regs_in_thread = false;)
1197 }
1198
1199 void restore_arg_regs() {
1200 assert(!regs_in_thread, "wrong call to restore_arg_regs");
1201 const Register saved_rdi = r9;
1202 const Register saved_rsi = r10;
1203 #ifdef _WIN64
1204 __ movptr(rdi, saved_rdi);
1205 __ movptr(rsi, saved_rsi);
1206 #endif
1207 }
1208
1209 // This is used in places where r10 is a scratch register, and can
1210 // be adapted if r9 is needed also.
1211 void setup_arg_regs_using_thread() {
1212 const Register saved_r15 = r9;
1213 #ifdef _WIN64
1214 __ mov(saved_r15, r15); // r15 is callee saved and needs to be restored
1215 __ get_thread(r15_thread);
1216 assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
1217 "unexpected argument registers");
1218 __ movptr(Address(r15_thread, in_bytes(JavaThread::windows_saved_rdi_offset())), rdi);
1219 __ movptr(Address(r15_thread, in_bytes(JavaThread::windows_saved_rsi_offset())), rsi);
1220
1221 __ mov(rdi, rcx); // c_rarg0
1222 __ mov(rsi, rdx); // c_rarg1
1223 __ mov(rdx, r8); // c_rarg2
1224 #else
1225 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1226 "unexpected argument registers");
1227 #endif
1228 DEBUG_ONLY(regs_in_thread = true;)
1229 }
1230
1231 void restore_arg_regs_using_thread() {
1232 assert(regs_in_thread, "wrong call to restore_arg_regs");
1233 const Register saved_r15 = r9;
1234 #ifdef _WIN64
1235 __ get_thread(r15_thread);
1236 __ movptr(rsi, Address(r15_thread, in_bytes(JavaThread::windows_saved_rsi_offset())));
1237 __ movptr(rdi, Address(r15_thread, in_bytes(JavaThread::windows_saved_rdi_offset())));
1238 __ mov(r15, saved_r15); // r15 is callee saved and needs to be restored
1239 #endif
1240 }
1241
1242 // Copy big chunks forward
1243 //
1244 // Inputs:
1245 // end_from - source arrays end address
1246 // end_to - destination array end address
1247 // qword_count - 64-bits element count, negative
1248 // to - scratch
1249 // L_copy_bytes - entry label
1250 // L_copy_8_bytes - exit label
1251 //
1252 void copy_bytes_forward(Register end_from, Register end_to,
1253 Register qword_count, Register to,
1254 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1255 DEBUG_ONLY(__ stop("enter at entry label, not here"));
1256 Label L_loop;
1257 __ align(OptoLoopAlignment);
1258 if (UseUnalignedLoadStores) {
1259 Label L_end;
1260 if (UseAVX > 2) {
1848 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1849 const Register from = rdi; // source array address
1850 const Register to = rsi; // destination array address
1851 const Register count = rdx; // elements count
1852 const Register dword_count = rcx;
1853 const Register qword_count = count;
1854 const Register end_from = from; // source array end address
1855 const Register end_to = to; // destination array end address
1856 // End pointers are inclusive, and if count is not zero they point
1857 // to the last unit copied: end_to[0] := end_from[0]
1858
1859 __ enter(); // required for proper stackwalking of RuntimeStub frame
1860 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1861
1862 if (entry != NULL) {
1863 *entry = __ pc();
1864 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1865 BLOCK_COMMENT("Entry:");
1866 }
1867
1868 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
1869 // r9 is used to save r15_thread
1870
1871 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
1872 if (dest_uninitialized) {
1873 decorators |= IS_DEST_UNINITIALIZED;
1874 }
1875 if (aligned) {
1876 decorators |= ARRAYCOPY_ALIGNED;
1877 }
1878
1879 BasicType type = is_oop ? T_OBJECT : T_INT;
1880 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1881 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1882
1883 // 'from', 'to' and 'count' are now valid
1884 __ movptr(dword_count, count);
1885 __ shrptr(count, 1); // count => qword_count
1886
1887 // Copy from low to high addresses. Use 'to' as scratch.
1888 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1889 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
1890 __ negptr(qword_count);
1891 __ jmp(L_copy_bytes);
1892
1893 // Copy trailing qwords
1894 __ BIND(L_copy_8_bytes);
1895 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1896 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1897 __ increment(qword_count);
1898 __ jcc(Assembler::notZero, L_copy_8_bytes);
1899
1900 // Check for and copy trailing dword
1901 __ BIND(L_copy_4_bytes);
1902 __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
1903 __ jccb(Assembler::zero, L_exit);
1904 __ movl(rax, Address(end_from, 8));
1905 __ movl(Address(end_to, 8), rax);
1906
1907 __ BIND(L_exit);
1908 bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
1909 restore_arg_regs_using_thread();
1910 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1911 __ vzeroupper();
1912 __ xorptr(rax, rax); // return 0
1913 __ leave(); // required for proper stackwalking of RuntimeStub frame
1914 __ ret(0);
1915
1916 // Copy in multi-bytes chunks
1917 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1918 __ jmp(L_copy_4_bytes);
1919
1920 return start;
1921 }
1922
1923 // Arguments:
1924 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1925 // ignored
1926 // is_oop - true => oop array, so generate store check code
1927 // name - stub name string
1928 //
1929 // Inputs:
1942 StubCodeMark mark(this, "StubRoutines", name);
1943 address start = __ pc();
1944
1945 Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1946 const Register from = rdi; // source array address
1947 const Register to = rsi; // destination array address
1948 const Register count = rdx; // elements count
1949 const Register dword_count = rcx;
1950 const Register qword_count = count;
1951
1952 __ enter(); // required for proper stackwalking of RuntimeStub frame
1953 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1954
1955 if (entry != NULL) {
1956 *entry = __ pc();
1957 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1958 BLOCK_COMMENT("Entry:");
1959 }
1960
1961 array_overlap_test(nooverlap_target, Address::times_4);
1962 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
1963 // r9 is used to save r15_thread
1964
1965 DecoratorSet decorators = IN_HEAP | IS_ARRAY;
1966 if (dest_uninitialized) {
1967 decorators |= IS_DEST_UNINITIALIZED;
1968 }
1969 if (aligned) {
1970 decorators |= ARRAYCOPY_ALIGNED;
1971 }
1972
1973 BasicType type = is_oop ? T_OBJECT : T_INT;
1974 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1975 // no registers are destroyed by this call
1976 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1977
1978 assert_clean_int(count, rax); // Make sure 'count' is clean int.
1979 // 'from', 'to' and 'count' are now valid
1980 __ movptr(dword_count, count);
1981 __ shrptr(count, 1); // count => qword_count
1982
1983 // Copy from high to low addresses. Use 'to' as scratch.
1984
1985 // Check for and copy trailing dword
1986 __ testl(dword_count, 1);
1987 __ jcc(Assembler::zero, L_copy_bytes);
1988 __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1989 __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1990 __ jmp(L_copy_bytes);
1991
1992 // Copy trailing qwords
1993 __ BIND(L_copy_8_bytes);
1994 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
1995 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
1996 __ decrement(qword_count);
1997 __ jcc(Assembler::notZero, L_copy_8_bytes);
1998
1999 if (is_oop) {
2000 __ jmp(L_exit);
2001 }
2002 restore_arg_regs_using_thread();
2003 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
2004 __ xorptr(rax, rax); // return 0
2005 __ vzeroupper();
2006 __ leave(); // required for proper stackwalking of RuntimeStub frame
2007 __ ret(0);
2008
2009 // Copy in multi-bytes chunks
2010 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2011
2012 __ BIND(L_exit);
2013 bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
2014 restore_arg_regs_using_thread();
2015 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
2016 __ xorptr(rax, rax); // return 0
2017 __ vzeroupper();
2018 __ leave(); // required for proper stackwalking of RuntimeStub frame
2019 __ ret(0);
2020
2021 return start;
2022 }
2023
2024 // Arguments:
2025 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2026 // ignored
2027 // is_oop - true => oop array, so generate store check code
2028 // name - stub name string
2029 //
2030 // Inputs:
2031 // c_rarg0 - source array address
2032 // c_rarg1 - destination array address
2033 // c_rarg2 - element count, treated as ssize_t, can be zero
2034 //
2045 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2046 const Register from = rdi; // source array address
2047 const Register to = rsi; // destination array address
2048 const Register qword_count = rdx; // elements count
2049 const Register end_from = from; // source array end address
2050 const Register end_to = rcx; // destination array end address
2051 const Register saved_count = r11;
2052 // End pointers are inclusive, and if count is not zero they point
2053 // to the last unit copied: end_to[0] := end_from[0]
2054
2055 __ enter(); // required for proper stackwalking of RuntimeStub frame
2056 // Save no-overlap entry point for generate_conjoint_long_oop_copy()
2057 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2058
2059 if (entry != NULL) {
2060 *entry = __ pc();
2061 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2062 BLOCK_COMMENT("Entry:");
2063 }
2064
2065 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2066 // r9 is used to save r15_thread
2067 // 'from', 'to' and 'qword_count' are now valid
2068
2069 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2070 if (dest_uninitialized) {
2071 decorators |= IS_DEST_UNINITIALIZED;
2072 }
2073 if (aligned) {
2074 decorators |= ARRAYCOPY_ALIGNED;
2075 }
2076
2077 BasicType type = is_oop ? T_OBJECT : T_LONG;
2078 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2079 bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2080
2081 // Copy from low to high addresses. Use 'to' as scratch.
2082 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
2083 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
2084 __ negptr(qword_count);
2085 __ jmp(L_copy_bytes);
2086
2087 // Copy trailing qwords
2088 __ BIND(L_copy_8_bytes);
2089 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2090 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2091 __ increment(qword_count);
2092 __ jcc(Assembler::notZero, L_copy_8_bytes);
2093
2094 if (is_oop) {
2095 __ jmp(L_exit);
2096 } else {
2097 restore_arg_regs_using_thread();
2098 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2099 __ xorptr(rax, rax); // return 0
2100 __ vzeroupper();
2101 __ leave(); // required for proper stackwalking of RuntimeStub frame
2102 __ ret(0);
2103 }
2104
2105 // Copy in multi-bytes chunks
2106 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2107
2108 __ BIND(L_exit);
2109 bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2110 restore_arg_regs_using_thread();
2111 if (is_oop) {
2112 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2113 } else {
2114 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2115 }
2116 __ vzeroupper();
2117 __ xorptr(rax, rax); // return 0
2118 __ leave(); // required for proper stackwalking of RuntimeStub frame
2119 __ ret(0);
2120
2121 return start;
2122 }
2123
2124 // Arguments:
2125 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2126 // ignored
2127 // is_oop - true => oop array, so generate store check code
2128 // name - stub name string
2129 //
2130 // Inputs:
2138 __ align(CodeEntryAlignment);
2139 StubCodeMark mark(this, "StubRoutines", name);
2140 address start = __ pc();
2141
2142 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2143 const Register from = rdi; // source array address
2144 const Register to = rsi; // destination array address
2145 const Register qword_count = rdx; // elements count
2146 const Register saved_count = rcx;
2147
2148 __ enter(); // required for proper stackwalking of RuntimeStub frame
2149 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2150
2151 if (entry != NULL) {
2152 *entry = __ pc();
2153 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2154 BLOCK_COMMENT("Entry:");
2155 }
2156
2157 array_overlap_test(nooverlap_target, Address::times_8);
2158 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2159 // r9 is used to save r15_thread
2160 // 'from', 'to' and 'qword_count' are now valid
2161
2162 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2163 if (dest_uninitialized) {
2164 decorators |= IS_DEST_UNINITIALIZED;
2165 }
2166 if (aligned) {
2167 decorators |= ARRAYCOPY_ALIGNED;
2168 }
2169
2170 BasicType type = is_oop ? T_OBJECT : T_LONG;
2171 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2172 bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2173
2174 __ jmp(L_copy_bytes);
2175
2176 // Copy trailing qwords
2177 __ BIND(L_copy_8_bytes);
2178 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2179 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2180 __ decrement(qword_count);
2181 __ jcc(Assembler::notZero, L_copy_8_bytes);
2182
2183 if (is_oop) {
2184 __ jmp(L_exit);
2185 } else {
2186 restore_arg_regs_using_thread();
2187 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2188 __ xorptr(rax, rax); // return 0
2189 __ vzeroupper();
2190 __ leave(); // required for proper stackwalking of RuntimeStub frame
2191 __ ret(0);
2192 }
2193
2194 // Copy in multi-bytes chunks
2195 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2196
2197 __ BIND(L_exit);
2198 bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2199 restore_arg_regs_using_thread();
2200 if (is_oop) {
2201 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2202 } else {
2203 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2204 }
2205 __ vzeroupper();
2206 __ xorptr(rax, rax); // return 0
2207 __ leave(); // required for proper stackwalking of RuntimeStub frame
2208 __ ret(0);
2209
2210 return start;
2211 }
2212
2213
2214 // Helper for generating a dynamic type check.
2215 // Smashes no registers.
2216 void generate_type_check(Register sub_klass,
2217 Register super_check_offset,
2218 Register super_klass,
2219 Label& L_success) {
2295 #endif //ASSERT
2296
2297 setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
2298 // ckoff => rcx, ckval => r8
2299 // r9 and r10 may be used to save non-volatile registers
2300 #ifdef _WIN64
2301 // last argument (#4) is on stack on Win64
2302 __ movptr(ckval, Address(rsp, 6 * wordSize));
2303 #endif
2304
2305 // Caller of this entry point must set up the argument registers.
2306 if (entry != NULL) {
2307 *entry = __ pc();
2308 BLOCK_COMMENT("Entry:");
2309 }
2310
2311 // allocate spill slots for r13, r14
2312 enum {
2313 saved_r13_offset,
2314 saved_r14_offset,
2315 saved_r10_offset,
2316 saved_rbp_offset
2317 };
2318 __ subptr(rsp, saved_rbp_offset * wordSize);
2319 __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
2320 __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
2321 __ movptr(Address(rsp, saved_r10_offset * wordSize), r10);
2322
2323 #ifdef ASSERT
2324 Label L2;
2325 __ get_thread(r14);
2326 __ cmpptr(r15_thread, r14);
2327 __ jcc(Assembler::equal, L2);
2328 __ stop("StubRoutines::call_stub: r15_thread is modified by call");
2329 __ bind(L2);
2330 #endif // ASSERT
2331
2332 // check that int operands are properly extended to size_t
2333 assert_clean_int(length, rax);
2334 assert_clean_int(ckoff, rax);
2335
2336 #ifdef ASSERT
2337 BLOCK_COMMENT("assert consistent ckoff/ckval");
2338 // The ckoff and ckval must be mutually consistent,
2339 // even though caller generates both.
2340 { Label L;
2341 int sco_offset = in_bytes(Klass::super_check_offset_offset());
2342 __ cmpl(ckoff, Address(ckval, sco_offset));
2343 __ jcc(Assembler::equal, L);
2344 __ stop("super_check_offset inconsistent");
2345 __ bind(L);
2346 }
2347 #endif //ASSERT
2348
2349 // Loop-invariant addresses. They are exclusive end pointers.
2350 Address end_from_addr(from, length, TIMES_OOP, 0);
2402 // and report their number to the caller.
2403 assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
2404 Label L_post_barrier;
2405 __ addptr(r14_length, count); // K = (original - remaining) oops
2406 __ movptr(rax, r14_length); // save the value
2407 __ notptr(rax); // report (-1^K) to caller (does not affect flags)
2408 __ jccb(Assembler::notZero, L_post_barrier);
2409 __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
2410
2411 // Come here on success only.
2412 __ BIND(L_do_card_marks);
2413 __ xorptr(rax, rax); // return 0 on success
2414
2415 __ BIND(L_post_barrier);
2416 bs->arraycopy_epilogue(_masm, decorators, type, from, to, r14_length);
2417
2418 // Common exit point (success or failure).
2419 __ BIND(L_done);
2420 __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
2421 __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
2422 __ movptr(r10, Address(rsp, saved_r10_offset * wordSize));
2423 restore_arg_regs();
2424 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
2425 __ leave(); // required for proper stackwalking of RuntimeStub frame
2426 __ ret(0);
2427
2428 return start;
2429 }
2430
2431 //
2432 // Generate 'unsafe' array copy stub
2433 // Though just as safe as the other stubs, it takes an unscaled
2434 // size_t argument instead of an element count.
2435 //
2436 // Input:
2437 // c_rarg0 - source array address
2438 // c_rarg1 - destination array address
2439 // c_rarg2 - byte count, treated as ssize_t, can be zero
2440 //
2441 // Examines the alignment of the operands and dispatches
2442 // to a long, int, short, or byte copy loop.
|