8 * This code is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 * version 2 for more details (a copy is included in the LICENSE file that
12 * accompanied this code).
13 *
14 * You should have received a copy of the GNU General Public License version
15 * 2 along with this work; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19 * or visit www.oracle.com if you need additional information or have any
20 * questions.
21 *
22 */
23
24 #include "precompiled.hpp"
25 #include "asm/macroAssembler.hpp"
26 #include "asm/macroAssembler.inline.hpp"
27 #include "ci/ciUtilities.hpp"
28 #include "gc/shared/cardTable.hpp"
29 #include "gc/shared/cardTableBarrierSet.hpp"
30 #include "interpreter/interpreter.hpp"
31 #include "nativeInst_x86.hpp"
32 #include "oops/instanceOop.hpp"
33 #include "oops/method.hpp"
34 #include "oops/objArrayKlass.hpp"
35 #include "oops/oop.inline.hpp"
36 #include "prims/methodHandles.hpp"
37 #include "runtime/frame.inline.hpp"
38 #include "runtime/handles.inline.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "runtime/stubCodeGenerator.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "runtime/thread.inline.hpp"
43 #ifdef COMPILER2
44 #include "opto/runtime.hpp"
45 #endif
46
47 // Declaration and definition of StubGenerator (no .hpp file).
48 // For a more detailed description of the stub routine structure
|
8 * This code is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 * version 2 for more details (a copy is included in the LICENSE file that
12 * accompanied this code).
13 *
14 * You should have received a copy of the GNU General Public License version
15 * 2 along with this work; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19 * or visit www.oracle.com if you need additional information or have any
20 * questions.
21 *
22 */
23
24 #include "precompiled.hpp"
25 #include "asm/macroAssembler.hpp"
26 #include "asm/macroAssembler.inline.hpp"
27 #include "ci/ciUtilities.hpp"
28 #include "gc/shared/barrierSet.hpp"
29 #include "gc/shared/barrierSetAssembler.hpp"
30 #include "interpreter/interpreter.hpp"
31 #include "nativeInst_x86.hpp"
32 #include "oops/instanceOop.hpp"
33 #include "oops/method.hpp"
34 #include "oops/objArrayKlass.hpp"
35 #include "oops/oop.inline.hpp"
36 #include "prims/methodHandles.hpp"
37 #include "runtime/frame.inline.hpp"
38 #include "runtime/handles.inline.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "runtime/stubCodeGenerator.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "runtime/thread.inline.hpp"
43 #ifdef COMPILER2
44 #include "opto/runtime.hpp"
45 #endif
46
47 // Declaration and definition of StubGenerator (no .hpp file).
48 // For a more detailed description of the stub routine structure
|
1172 __ mov(rdi, rcx); // c_rarg0
1173 __ mov(rsi, rdx); // c_rarg1
1174 __ mov(rdx, r8); // c_rarg2
1175 if (nargs >= 4)
1176 __ mov(rcx, rax); // c_rarg3 (via rax)
1177 #else
1178 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1179 "unexpected argument registers");
1180 #endif
1181 }
1182
1183 void restore_arg_regs() {
1184 const Register saved_rdi = r9;
1185 const Register saved_rsi = r10;
1186 #ifdef _WIN64
1187 __ movptr(rdi, saved_rdi);
1188 __ movptr(rsi, saved_rsi);
1189 #endif
1190 }
1191
1192 // Generate code for an array write pre barrier
1193 //
1194 // addr - starting address
1195 // count - element count
1196 // tmp - scratch register
1197 //
1198 // Destroy no registers!
1199 //
1200 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
1201 BarrierSet* bs = Universe::heap()->barrier_set();
1202 switch (bs->kind()) {
1203 case BarrierSet::G1BarrierSet:
1204 // With G1, don't generate the call if we statically know that the target in uninitialized
1205 if (!dest_uninitialized) {
1206 Label filtered;
1207 Address in_progress(r15_thread, in_bytes(JavaThread::satb_mark_queue_offset() +
1208 SATBMarkQueue::byte_offset_of_active()));
1209 // Is marking active?
1210 if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
1211 __ cmpl(in_progress, 0);
1212 } else {
1213 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
1214 __ cmpb(in_progress, 0);
1215 }
1216 __ jcc(Assembler::equal, filtered);
1217
1218 __ pusha(); // push registers
1219 if (count == c_rarg0) {
1220 if (addr == c_rarg1) {
1221 // exactly backwards!!
1222 __ xchgptr(c_rarg1, c_rarg0);
1223 } else {
1224 __ movptr(c_rarg1, count);
1225 __ movptr(c_rarg0, addr);
1226 }
1227 } else {
1228 __ movptr(c_rarg0, addr);
1229 __ movptr(c_rarg1, count);
1230 }
1231 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
1232 __ popa();
1233
1234 __ bind(filtered);
1235 }
1236 break;
1237 case BarrierSet::CardTableBarrierSet:
1238 break;
1239 default:
1240 ShouldNotReachHere();
1241
1242 }
1243 }
1244
1245 //
1246 // Generate code for an array write post barrier
1247 //
1248 // Input:
1249 // start - register containing starting address of destination array
1250 // count - elements count
1251 // scratch - scratch register
1252 //
1253 // The input registers are overwritten.
1254 //
1255 void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
1256 assert_different_registers(start, count, scratch);
1257 BarrierSet* bs = Universe::heap()->barrier_set();
1258 switch (bs->kind()) {
1259 case BarrierSet::G1BarrierSet:
1260 {
1261 __ pusha(); // push registers (overkill)
1262 if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
1263 assert_different_registers(c_rarg1, start);
1264 __ mov(c_rarg1, count);
1265 __ mov(c_rarg0, start);
1266 } else {
1267 assert_different_registers(c_rarg0, count);
1268 __ mov(c_rarg0, start);
1269 __ mov(c_rarg1, count);
1270 }
1271 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
1272 __ popa();
1273 }
1274 break;
1275 case BarrierSet::CardTableBarrierSet:
1276 {
1277 Label L_loop, L_done;
1278 const Register end = count;
1279
1280 __ testl(count, count);
1281 __ jcc(Assembler::zero, L_done); // zero count - nothing to do
1282
1283 __ leaq(end, Address(start, count, TIMES_OOP, 0)); // end == start+count*oop_size
1284 __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
1285 __ shrptr(start, CardTable::card_shift);
1286 __ shrptr(end, CardTable::card_shift);
1287 __ subptr(end, start); // end --> cards count
1288
1289 int64_t disp = ci_card_table_address_as<int64_t>();
1290 __ mov64(scratch, disp);
1291 __ addptr(start, scratch);
1292 __ BIND(L_loop);
1293 __ movb(Address(start, count, Address::times_1), 0);
1294 __ decrement(count);
1295 __ jcc(Assembler::greaterEqual, L_loop);
1296 __ BIND(L_done);
1297 }
1298 break;
1299 default:
1300 ShouldNotReachHere();
1301
1302 }
1303 }
1304
1305
1306 // Copy big chunks forward
1307 //
1308 // Inputs:
1309 // end_from - source arrays end address
1310 // end_to - destination array end address
1311 // qword_count - 64-bits element count, negative
1312 // to - scratch
1313 // L_copy_bytes - entry label
1314 // L_copy_8_bytes - exit label
1315 //
1316 void copy_bytes_forward(Register end_from, Register end_to,
1317 Register qword_count, Register to,
1318 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1319 DEBUG_ONLY(__ stop("enter at entry label, not here"));
1320 Label L_loop;
1321 __ align(OptoLoopAlignment);
1322 if (UseUnalignedLoadStores) {
1323 Label L_end;
|
1172 __ mov(rdi, rcx); // c_rarg0
1173 __ mov(rsi, rdx); // c_rarg1
1174 __ mov(rdx, r8); // c_rarg2
1175 if (nargs >= 4)
1176 __ mov(rcx, rax); // c_rarg3 (via rax)
1177 #else
1178 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
1179 "unexpected argument registers");
1180 #endif
1181 }
1182
1183 void restore_arg_regs() {
1184 const Register saved_rdi = r9;
1185 const Register saved_rsi = r10;
1186 #ifdef _WIN64
1187 __ movptr(rdi, saved_rdi);
1188 __ movptr(rsi, saved_rsi);
1189 #endif
1190 }
1191
1192
1193 // Copy big chunks forward
1194 //
1195 // Inputs:
1196 // end_from - source arrays end address
1197 // end_to - destination array end address
1198 // qword_count - 64-bits element count, negative
1199 // to - scratch
1200 // L_copy_bytes - entry label
1201 // L_copy_8_bytes - exit label
1202 //
1203 void copy_bytes_forward(Register end_from, Register end_to,
1204 Register qword_count, Register to,
1205 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1206 DEBUG_ONLY(__ stop("enter at entry label, not here"));
1207 Label L_loop;
1208 __ align(OptoLoopAlignment);
1209 if (UseUnalignedLoadStores) {
1210 Label L_end;
|
1900 // cache line boundaries will still be loaded and stored atomicly.
1901 //
1902 // Side Effects:
1903 // disjoint_int_copy_entry is set to the no-overlap entry point
1904 // used by generate_conjoint_int_oop_copy().
1905 //
1906 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
1907 const char *name, bool dest_uninitialized = false) {
1908 __ align(CodeEntryAlignment);
1909 StubCodeMark mark(this, "StubRoutines", name);
1910 address start = __ pc();
1911
1912 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1913 const Register from = rdi; // source array address
1914 const Register to = rsi; // destination array address
1915 const Register count = rdx; // elements count
1916 const Register dword_count = rcx;
1917 const Register qword_count = count;
1918 const Register end_from = from; // source array end address
1919 const Register end_to = to; // destination array end address
1920 const Register saved_to = r11; // saved destination array address
1921 // End pointers are inclusive, and if count is not zero they point
1922 // to the last unit copied: end_to[0] := end_from[0]
1923
1924 __ enter(); // required for proper stackwalking of RuntimeStub frame
1925 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1926
1927 if (entry != NULL) {
1928 *entry = __ pc();
1929 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1930 BLOCK_COMMENT("Entry:");
1931 }
1932
1933 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1934 // r9 and r10 may be used to save non-volatile registers
1935 if (is_oop) {
1936 __ movq(saved_to, to);
1937 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
1938 }
1939
1940 // 'from', 'to' and 'count' are now valid
1941 __ movptr(dword_count, count);
1942 __ shrptr(count, 1); // count => qword_count
1943
1944 // Copy from low to high addresses. Use 'to' as scratch.
1945 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1946 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
1947 __ negptr(qword_count);
1948 __ jmp(L_copy_bytes);
1949
1950 // Copy trailing qwords
1951 __ BIND(L_copy_8_bytes);
1952 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1953 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1954 __ increment(qword_count);
1955 __ jcc(Assembler::notZero, L_copy_8_bytes);
1956
1957 // Check for and copy trailing dword
1958 __ BIND(L_copy_4_bytes);
1959 __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
1960 __ jccb(Assembler::zero, L_exit);
1961 __ movl(rax, Address(end_from, 8));
1962 __ movl(Address(end_to, 8), rax);
1963
1964 __ BIND(L_exit);
1965 if (is_oop) {
1966 gen_write_ref_array_post_barrier(saved_to, dword_count, rax);
1967 }
1968 restore_arg_regs();
1969 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1970 __ vzeroupper();
1971 __ xorptr(rax, rax); // return 0
1972 __ leave(); // required for proper stackwalking of RuntimeStub frame
1973 __ ret(0);
1974
1975 // Copy in multi-bytes chunks
1976 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1977 __ jmp(L_copy_4_bytes);
1978
1979 return start;
1980 }
1981
1982 // Arguments:
1983 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1984 // ignored
1985 // is_oop - true => oop array, so generate store check code
1986 // name - stub name string
|
1787 // cache line boundaries will still be loaded and stored atomicly.
1788 //
1789 // Side Effects:
1790 // disjoint_int_copy_entry is set to the no-overlap entry point
1791 // used by generate_conjoint_int_oop_copy().
1792 //
1793 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
1794 const char *name, bool dest_uninitialized = false) {
1795 __ align(CodeEntryAlignment);
1796 StubCodeMark mark(this, "StubRoutines", name);
1797 address start = __ pc();
1798
1799 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1800 const Register from = rdi; // source array address
1801 const Register to = rsi; // destination array address
1802 const Register count = rdx; // elements count
1803 const Register dword_count = rcx;
1804 const Register qword_count = count;
1805 const Register end_from = from; // source array end address
1806 const Register end_to = to; // destination array end address
1807 // End pointers are inclusive, and if count is not zero they point
1808 // to the last unit copied: end_to[0] := end_from[0]
1809
1810 __ enter(); // required for proper stackwalking of RuntimeStub frame
1811 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1812
1813 if (entry != NULL) {
1814 *entry = __ pc();
1815 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1816 BLOCK_COMMENT("Entry:");
1817 }
1818
1819 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1820 // r9 and r10 may be used to save non-volatile registers
1821
1822 DecoratorSet decorators = ARRAYCOPY_DISJOINT;
1823 if (dest_uninitialized) {
1824 decorators |= AS_DEST_NOT_INITIALIZED;
1825 }
1826 if (aligned) {
1827 decorators |= ARRAYCOPY_ALIGNED;
1828 }
1829
1830 BasicType type = is_oop ? T_OBJECT : T_INT;
1831 BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
1832 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1833
1834 // 'from', 'to' and 'count' are now valid
1835 __ movptr(dword_count, count);
1836 __ shrptr(count, 1); // count => qword_count
1837
1838 // Copy from low to high addresses. Use 'to' as scratch.
1839 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1840 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
1841 __ negptr(qword_count);
1842 __ jmp(L_copy_bytes);
1843
1844 // Copy trailing qwords
1845 __ BIND(L_copy_8_bytes);
1846 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1847 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1848 __ increment(qword_count);
1849 __ jcc(Assembler::notZero, L_copy_8_bytes);
1850
1851 // Check for and copy trailing dword
1852 __ BIND(L_copy_4_bytes);
1853 __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
1854 __ jccb(Assembler::zero, L_exit);
1855 __ movl(rax, Address(end_from, 8));
1856 __ movl(Address(end_to, 8), rax);
1857
1858 __ BIND(L_exit);
1859 bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
1860 restore_arg_regs();
1861 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1862 __ vzeroupper();
1863 __ xorptr(rax, rax); // return 0
1864 __ leave(); // required for proper stackwalking of RuntimeStub frame
1865 __ ret(0);
1866
1867 // Copy in multi-bytes chunks
1868 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1869 __ jmp(L_copy_4_bytes);
1870
1871 return start;
1872 }
1873
1874 // Arguments:
1875 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1876 // ignored
1877 // is_oop - true => oop array, so generate store check code
1878 // name - stub name string
|
2004 Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
2005 const Register from = rdi; // source array address
2006 const Register to = rsi; // destination array address
2007 const Register count = rdx; // elements count
2008 const Register dword_count = rcx;
2009 const Register qword_count = count;
2010
2011 __ enter(); // required for proper stackwalking of RuntimeStub frame
2012 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2013
2014 if (entry != NULL) {
2015 *entry = __ pc();
2016 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2017 BLOCK_COMMENT("Entry:");
2018 }
2019
2020 array_overlap_test(nooverlap_target, Address::times_4);
2021 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2022 // r9 and r10 may be used to save non-volatile registers
2023
2024 if (is_oop) {
2025 // no registers are destroyed by this call
2026 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
2027 }
2028
2029 assert_clean_int(count, rax); // Make sure 'count' is clean int.
2030 // 'from', 'to' and 'count' are now valid
2031 __ movptr(dword_count, count);
2032 __ shrptr(count, 1); // count => qword_count
2033
2034 // Copy from high to low addresses. Use 'to' as scratch.
2035
2036 // Check for and copy trailing dword
2037 __ testl(dword_count, 1);
2038 __ jcc(Assembler::zero, L_copy_bytes);
2039 __ movl(rax, Address(from, dword_count, Address::times_4, -4));
2040 __ movl(Address(to, dword_count, Address::times_4, -4), rax);
2041 __ jmp(L_copy_bytes);
2042
2043 // Copy trailing qwords
2044 __ BIND(L_copy_8_bytes);
2045 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2046 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2047 __ decrement(qword_count);
2048 __ jcc(Assembler::notZero, L_copy_8_bytes);
2049
2050 if (is_oop) {
2051 __ jmp(L_exit);
2052 }
2053 restore_arg_regs();
2054 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
2055 __ xorptr(rax, rax); // return 0
2056 __ vzeroupper();
2057 __ leave(); // required for proper stackwalking of RuntimeStub frame
2058 __ ret(0);
2059
2060 // Copy in multi-bytes chunks
2061 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2062
2063 __ BIND(L_exit);
2064 if (is_oop) {
2065 gen_write_ref_array_post_barrier(to, dword_count, rax);
2066 }
2067 restore_arg_regs();
2068 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
2069 __ xorptr(rax, rax); // return 0
2070 __ vzeroupper();
2071 __ leave(); // required for proper stackwalking of RuntimeStub frame
2072 __ ret(0);
2073
2074 return start;
2075 }
2076
2077 // Arguments:
2078 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2079 // ignored
2080 // is_oop - true => oop array, so generate store check code
2081 // name - stub name string
2082 //
2083 // Inputs:
2084 // c_rarg0 - source array address
2085 // c_rarg1 - destination array address
2086 // c_rarg2 - element count, treated as ssize_t, can be zero
2087 //
2088 // Side Effects:
2089 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
2090 // no-overlap entry point used by generate_conjoint_long_oop_copy().
2091 //
2092 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
2093 const char *name, bool dest_uninitialized = false) {
2094 __ align(CodeEntryAlignment);
2095 StubCodeMark mark(this, "StubRoutines", name);
2096 address start = __ pc();
2097
2098 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2099 const Register from = rdi; // source array address
2100 const Register to = rsi; // destination array address
2101 const Register qword_count = rdx; // elements count
2102 const Register end_from = from; // source array end address
2103 const Register end_to = rcx; // destination array end address
2104 const Register saved_to = to;
2105 const Register saved_count = r11;
2106 // End pointers are inclusive, and if count is not zero they point
2107 // to the last unit copied: end_to[0] := end_from[0]
2108
2109 __ enter(); // required for proper stackwalking of RuntimeStub frame
2110 // Save no-overlap entry point for generate_conjoint_long_oop_copy()
2111 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2112
2113 if (entry != NULL) {
2114 *entry = __ pc();
2115 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2116 BLOCK_COMMENT("Entry:");
2117 }
2118
2119 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2120 // r9 and r10 may be used to save non-volatile registers
2121 // 'from', 'to' and 'qword_count' are now valid
2122 if (is_oop) {
2123 // Save to and count for store barrier
2124 __ movptr(saved_count, qword_count);
2125 // no registers are destroyed by this call
2126 gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
2127 }
2128
2129 // Copy from low to high addresses. Use 'to' as scratch.
2130 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
2131 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
2132 __ negptr(qword_count);
2133 __ jmp(L_copy_bytes);
2134
2135 // Copy trailing qwords
2136 __ BIND(L_copy_8_bytes);
2137 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2138 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2139 __ increment(qword_count);
2140 __ jcc(Assembler::notZero, L_copy_8_bytes);
2141
2142 if (is_oop) {
2143 __ jmp(L_exit);
2144 } else {
2145 restore_arg_regs();
2146 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2147 __ xorptr(rax, rax); // return 0
2148 __ vzeroupper();
2149 __ leave(); // required for proper stackwalking of RuntimeStub frame
2150 __ ret(0);
2151 }
2152
2153 // Copy in multi-bytes chunks
2154 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2155
2156 if (is_oop) {
2157 __ BIND(L_exit);
2158 gen_write_ref_array_post_barrier(saved_to, saved_count, rax);
2159 }
2160 restore_arg_regs();
2161 if (is_oop) {
2162 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2163 } else {
2164 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2165 }
2166 __ vzeroupper();
2167 __ xorptr(rax, rax); // return 0
2168 __ leave(); // required for proper stackwalking of RuntimeStub frame
2169 __ ret(0);
2170
2171 return start;
2172 }
2173
2174 // Arguments:
2175 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2176 // ignored
2177 // is_oop - true => oop array, so generate store check code
2178 // name - stub name string
|
1896 Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1897 const Register from = rdi; // source array address
1898 const Register to = rsi; // destination array address
1899 const Register count = rdx; // elements count
1900 const Register dword_count = rcx;
1901 const Register qword_count = count;
1902
1903 __ enter(); // required for proper stackwalking of RuntimeStub frame
1904 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1905
1906 if (entry != NULL) {
1907 *entry = __ pc();
1908 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1909 BLOCK_COMMENT("Entry:");
1910 }
1911
1912 array_overlap_test(nooverlap_target, Address::times_4);
1913 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1914 // r9 and r10 may be used to save non-volatile registers
1915
1916 DecoratorSet decorators = 0;
1917 if (dest_uninitialized) {
1918 decorators |= AS_DEST_NOT_INITIALIZED;
1919 }
1920 if (aligned) {
1921 decorators |= ARRAYCOPY_ALIGNED;
1922 }
1923
1924 BasicType type = is_oop ? T_OBJECT : T_INT;
1925 BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
1926 // no registers are destroyed by this call
1927 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1928
1929 assert_clean_int(count, rax); // Make sure 'count' is clean int.
1930 // 'from', 'to' and 'count' are now valid
1931 __ movptr(dword_count, count);
1932 __ shrptr(count, 1); // count => qword_count
1933
1934 // Copy from high to low addresses. Use 'to' as scratch.
1935
1936 // Check for and copy trailing dword
1937 __ testl(dword_count, 1);
1938 __ jcc(Assembler::zero, L_copy_bytes);
1939 __ movl(rax, Address(from, dword_count, Address::times_4, -4));
1940 __ movl(Address(to, dword_count, Address::times_4, -4), rax);
1941 __ jmp(L_copy_bytes);
1942
1943 // Copy trailing qwords
1944 __ BIND(L_copy_8_bytes);
1945 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
1946 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
1947 __ decrement(qword_count);
1948 __ jcc(Assembler::notZero, L_copy_8_bytes);
1949
1950 if (is_oop) {
1951 __ jmp(L_exit);
1952 }
1953 restore_arg_regs();
1954 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1955 __ xorptr(rax, rax); // return 0
1956 __ vzeroupper();
1957 __ leave(); // required for proper stackwalking of RuntimeStub frame
1958 __ ret(0);
1959
1960 // Copy in multi-bytes chunks
1961 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1962
1963 __ BIND(L_exit);
1964 bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
1965 restore_arg_regs();
1966 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
1967 __ xorptr(rax, rax); // return 0
1968 __ vzeroupper();
1969 __ leave(); // required for proper stackwalking of RuntimeStub frame
1970 __ ret(0);
1971
1972 return start;
1973 }
1974
1975 // Arguments:
1976 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
1977 // ignored
1978 // is_oop - true => oop array, so generate store check code
1979 // name - stub name string
1980 //
1981 // Inputs:
1982 // c_rarg0 - source array address
1983 // c_rarg1 - destination array address
1984 // c_rarg2 - element count, treated as ssize_t, can be zero
1985 //
1986 // Side Effects:
1987 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1988 // no-overlap entry point used by generate_conjoint_long_oop_copy().
1989 //
1990 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
1991 const char *name, bool dest_uninitialized = false) {
1992 __ align(CodeEntryAlignment);
1993 StubCodeMark mark(this, "StubRoutines", name);
1994 address start = __ pc();
1995
1996 Label L_copy_bytes, L_copy_8_bytes, L_exit;
1997 const Register from = rdi; // source array address
1998 const Register to = rsi; // destination array address
1999 const Register qword_count = rdx; // elements count
2000 const Register end_from = from; // source array end address
2001 const Register end_to = rcx; // destination array end address
2002 const Register saved_count = r11;
2003 // End pointers are inclusive, and if count is not zero they point
2004 // to the last unit copied: end_to[0] := end_from[0]
2005
2006 __ enter(); // required for proper stackwalking of RuntimeStub frame
2007 // Save no-overlap entry point for generate_conjoint_long_oop_copy()
2008 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2009
2010 if (entry != NULL) {
2011 *entry = __ pc();
2012 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2013 BLOCK_COMMENT("Entry:");
2014 }
2015
2016 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2017 // r9 and r10 may be used to save non-volatile registers
2018 // 'from', 'to' and 'qword_count' are now valid
2019
2020 DecoratorSet decorators = ARRAYCOPY_DISJOINT;
2021 if (dest_uninitialized) {
2022 decorators |= AS_DEST_NOT_INITIALIZED;
2023 }
2024 if (aligned) {
2025 decorators |= ARRAYCOPY_ALIGNED;
2026 }
2027
2028 BasicType type = is_oop ? T_OBJECT : T_LONG;
2029 BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
2030 bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2031
2032 // Copy from low to high addresses. Use 'to' as scratch.
2033 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
2034 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
2035 __ negptr(qword_count);
2036 __ jmp(L_copy_bytes);
2037
2038 // Copy trailing qwords
2039 __ BIND(L_copy_8_bytes);
2040 __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2041 __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2042 __ increment(qword_count);
2043 __ jcc(Assembler::notZero, L_copy_8_bytes);
2044
2045 if (is_oop) {
2046 __ jmp(L_exit);
2047 } else {
2048 restore_arg_regs();
2049 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2050 __ xorptr(rax, rax); // return 0
2051 __ vzeroupper();
2052 __ leave(); // required for proper stackwalking of RuntimeStub frame
2053 __ ret(0);
2054 }
2055
2056 // Copy in multi-bytes chunks
2057 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2058
2059 __ BIND(L_exit);
2060 bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2061 restore_arg_regs();
2062 if (is_oop) {
2063 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2064 } else {
2065 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2066 }
2067 __ vzeroupper();
2068 __ xorptr(rax, rax); // return 0
2069 __ leave(); // required for proper stackwalking of RuntimeStub frame
2070 __ ret(0);
2071
2072 return start;
2073 }
2074
2075 // Arguments:
2076 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2077 // ignored
2078 // is_oop - true => oop array, so generate store check code
2079 // name - stub name string
|
2191
2192 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2193 const Register from = rdi; // source array address
2194 const Register to = rsi; // destination array address
2195 const Register qword_count = rdx; // elements count
2196 const Register saved_count = rcx;
2197
2198 __ enter(); // required for proper stackwalking of RuntimeStub frame
2199 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2200
2201 if (entry != NULL) {
2202 *entry = __ pc();
2203 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2204 BLOCK_COMMENT("Entry:");
2205 }
2206
2207 array_overlap_test(nooverlap_target, Address::times_8);
2208 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2209 // r9 and r10 may be used to save non-volatile registers
2210 // 'from', 'to' and 'qword_count' are now valid
2211 if (is_oop) {
2212 // Save to and count for store barrier
2213 __ movptr(saved_count, qword_count);
2214 // No registers are destroyed by this call
2215 gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
2216 }
2217
2218 __ jmp(L_copy_bytes);
2219
2220 // Copy trailing qwords
2221 __ BIND(L_copy_8_bytes);
2222 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2223 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2224 __ decrement(qword_count);
2225 __ jcc(Assembler::notZero, L_copy_8_bytes);
2226
2227 if (is_oop) {
2228 __ jmp(L_exit);
2229 } else {
2230 restore_arg_regs();
2231 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2232 __ xorptr(rax, rax); // return 0
2233 __ vzeroupper();
2234 __ leave(); // required for proper stackwalking of RuntimeStub frame
2235 __ ret(0);
2236 }
2237
2238 // Copy in multi-bytes chunks
2239 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2240
2241 if (is_oop) {
2242 __ BIND(L_exit);
2243 gen_write_ref_array_post_barrier(to, saved_count, rax);
2244 }
2245 restore_arg_regs();
2246 if (is_oop) {
2247 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2248 } else {
2249 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2250 }
2251 __ vzeroupper();
2252 __ xorptr(rax, rax); // return 0
2253 __ leave(); // required for proper stackwalking of RuntimeStub frame
2254 __ ret(0);
2255
2256 return start;
2257 }
2258
2259
2260 // Helper for generating a dynamic type check.
2261 // Smashes no registers.
2262 void generate_type_check(Register sub_klass,
2263 Register super_check_offset,
|
2092
2093 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2094 const Register from = rdi; // source array address
2095 const Register to = rsi; // destination array address
2096 const Register qword_count = rdx; // elements count
2097 const Register saved_count = rcx;
2098
2099 __ enter(); // required for proper stackwalking of RuntimeStub frame
2100 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2101
2102 if (entry != NULL) {
2103 *entry = __ pc();
2104 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2105 BLOCK_COMMENT("Entry:");
2106 }
2107
2108 array_overlap_test(nooverlap_target, Address::times_8);
2109 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2110 // r9 and r10 may be used to save non-volatile registers
2111 // 'from', 'to' and 'qword_count' are now valid
2112
2113 DecoratorSet decorators = ARRAYCOPY_DISJOINT;
2114 if (dest_uninitialized) {
2115 decorators |= AS_DEST_NOT_INITIALIZED;
2116 }
2117 if (aligned) {
2118 decorators |= ARRAYCOPY_ALIGNED;
2119 }
2120
2121 BasicType type = is_oop ? T_OBJECT : T_LONG;
2122 BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
2123 bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2124
2125 __ jmp(L_copy_bytes);
2126
2127 // Copy trailing qwords
2128 __ BIND(L_copy_8_bytes);
2129 __ movq(rax, Address(from, qword_count, Address::times_8, -8));
2130 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
2131 __ decrement(qword_count);
2132 __ jcc(Assembler::notZero, L_copy_8_bytes);
2133
2134 if (is_oop) {
2135 __ jmp(L_exit);
2136 } else {
2137 restore_arg_regs();
2138 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2139 __ xorptr(rax, rax); // return 0
2140 __ vzeroupper();
2141 __ leave(); // required for proper stackwalking of RuntimeStub frame
2142 __ ret(0);
2143 }
2144
2145 // Copy in multi-bytes chunks
2146 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2147
2148 __ BIND(L_exit);
2149 bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2150 restore_arg_regs();
2151 if (is_oop) {
2152 inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
2153 } else {
2154 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
2155 }
2156 __ vzeroupper();
2157 __ xorptr(rax, rax); // return 0
2158 __ leave(); // required for proper stackwalking of RuntimeStub frame
2159 __ ret(0);
2160
2161 return start;
2162 }
2163
2164
2165 // Helper for generating a dynamic type check.
2166 // Smashes no registers.
2167 void generate_type_check(Register sub_klass,
2168 Register super_check_offset,
|
2371 #ifdef ASSERT
2372 BLOCK_COMMENT("assert consistent ckoff/ckval");
2373 // The ckoff and ckval must be mutually consistent,
2374 // even though caller generates both.
2375 { Label L;
2376 int sco_offset = in_bytes(Klass::super_check_offset_offset());
2377 __ cmpl(ckoff, Address(ckval, sco_offset));
2378 __ jcc(Assembler::equal, L);
2379 __ stop("super_check_offset inconsistent");
2380 __ bind(L);
2381 }
2382 #endif //ASSERT
2383
2384 // Loop-invariant addresses. They are exclusive end pointers.
2385 Address end_from_addr(from, length, TIMES_OOP, 0);
2386 Address end_to_addr(to, length, TIMES_OOP, 0);
2387 // Loop-variant addresses. They assume post-incremented count < 0.
2388 Address from_element_addr(end_from, count, TIMES_OOP, 0);
2389 Address to_element_addr(end_to, count, TIMES_OOP, 0);
2390
2391 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
2392
2393 // Copy from low to high addresses, indexed from the end of each array.
2394 __ lea(end_from, end_from_addr);
2395 __ lea(end_to, end_to_addr);
2396 __ movptr(r14_length, length); // save a copy of the length
2397 assert(length == count, ""); // else fix next line:
2398 __ negptr(count); // negate and test the length
2399 __ jcc(Assembler::notZero, L_load_element);
2400
2401 // Empty array: Nothing to do.
2402 __ xorptr(rax, rax); // return 0 on (trivial) success
2403 __ jmp(L_done);
2404
2405 // ======== begin loop ========
2406 // (Loop is rotated; its entry is L_load_element.)
2407 // Loop control:
2408 // for (count = -count; count != 0; count++)
2409 // Base pointers src, dst are biased by 8*(count-1),to last element.
2410 __ align(OptoLoopAlignment);
|
2276 #ifdef ASSERT
2277 BLOCK_COMMENT("assert consistent ckoff/ckval");
2278 // The ckoff and ckval must be mutually consistent,
2279 // even though caller generates both.
2280 { Label L;
2281 int sco_offset = in_bytes(Klass::super_check_offset_offset());
2282 __ cmpl(ckoff, Address(ckval, sco_offset));
2283 __ jcc(Assembler::equal, L);
2284 __ stop("super_check_offset inconsistent");
2285 __ bind(L);
2286 }
2287 #endif //ASSERT
2288
2289 // Loop-invariant addresses. They are exclusive end pointers.
2290 Address end_from_addr(from, length, TIMES_OOP, 0);
2291 Address end_to_addr(to, length, TIMES_OOP, 0);
2292 // Loop-variant addresses. They assume post-incremented count < 0.
2293 Address from_element_addr(end_from, count, TIMES_OOP, 0);
2294 Address to_element_addr(end_to, count, TIMES_OOP, 0);
2295
2296 DecoratorSet decorators = ARRAYCOPY_CHECKCAST;
2297 if (dest_uninitialized) {
2298 decorators |= AS_DEST_NOT_INITIALIZED;
2299 }
2300
2301 BasicType type = T_OBJECT;
2302 BarrierSetAssembler *bs = Universe::heap()->barrier_set()->barrier_set_assembler();
2303 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
2304
2305 // Copy from low to high addresses, indexed from the end of each array.
2306 __ lea(end_from, end_from_addr);
2307 __ lea(end_to, end_to_addr);
2308 __ movptr(r14_length, length); // save a copy of the length
2309 assert(length == count, ""); // else fix next line:
2310 __ negptr(count); // negate and test the length
2311 __ jcc(Assembler::notZero, L_load_element);
2312
2313 // Empty array: Nothing to do.
2314 __ xorptr(rax, rax); // return 0 on (trivial) success
2315 __ jmp(L_done);
2316
2317 // ======== begin loop ========
2318 // (Loop is rotated; its entry is L_load_element.)
2319 // Loop control:
2320 // for (count = -count; count != 0; count++)
2321 // Base pointers src, dst are biased by 8*(count-1),to last element.
2322 __ align(OptoLoopAlignment);
|
2424 generate_type_check(r11_klass, ckoff, ckval, L_store_element);
2425 // ======== end loop ========
2426
2427 // It was a real error; we must depend on the caller to finish the job.
2428 // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
2429 // Emit GC store barriers for the oops we have copied (r14 + rdx),
2430 // and report their number to the caller.
2431 assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
2432 Label L_post_barrier;
2433 __ addptr(r14_length, count); // K = (original - remaining) oops
2434 __ movptr(rax, r14_length); // save the value
2435 __ notptr(rax); // report (-1^K) to caller (does not affect flags)
2436 __ jccb(Assembler::notZero, L_post_barrier);
2437 __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
2438
2439 // Come here on success only.
2440 __ BIND(L_do_card_marks);
2441 __ xorptr(rax, rax); // return 0 on success
2442
2443 __ BIND(L_post_barrier);
2444 gen_write_ref_array_post_barrier(to, r14_length, rscratch1);
2445
2446 // Common exit point (success or failure).
2447 __ BIND(L_done);
2448 __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
2449 __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
2450 restore_arg_regs();
2451 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
2452 __ leave(); // required for proper stackwalking of RuntimeStub frame
2453 __ ret(0);
2454
2455 return start;
2456 }
2457
2458 //
2459 // Generate 'unsafe' array copy stub
2460 // Though just as safe as the other stubs, it takes an unscaled
2461 // size_t argument instead of an element count.
2462 //
2463 // Input:
|
2336 generate_type_check(r11_klass, ckoff, ckval, L_store_element);
2337 // ======== end loop ========
2338
2339 // It was a real error; we must depend on the caller to finish the job.
2340 // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
2341 // Emit GC store barriers for the oops we have copied (r14 + rdx),
2342 // and report their number to the caller.
2343 assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
2344 Label L_post_barrier;
2345 __ addptr(r14_length, count); // K = (original - remaining) oops
2346 __ movptr(rax, r14_length); // save the value
2347 __ notptr(rax); // report (-1^K) to caller (does not affect flags)
2348 __ jccb(Assembler::notZero, L_post_barrier);
2349 __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
2350
2351 // Come here on success only.
2352 __ BIND(L_do_card_marks);
2353 __ xorptr(rax, rax); // return 0 on success
2354
2355 __ BIND(L_post_barrier);
2356 bs->arraycopy_epilogue(_masm, decorators, type, from, to, r14_length);
2357
2358 // Common exit point (success or failure).
2359 __ BIND(L_done);
2360 __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
2361 __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
2362 restore_arg_regs();
2363 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
2364 __ leave(); // required for proper stackwalking of RuntimeStub frame
2365 __ ret(0);
2366
2367 return start;
2368 }
2369
2370 //
2371 // Generate 'unsafe' array copy stub
2372 // Though just as safe as the other stubs, it takes an unscaled
2373 // size_t argument instead of an element count.
2374 //
2375 // Input:
|