1 /*
2 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
199 __ br(Assembler::greater, false, Assembler::pt, loop);
200 __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
201
202 // done
203 __ BIND(exit);
204 }
205
206 // setup parameters, method & call Java function
207 #ifdef ASSERT
208 // layout_activation_impl checks it's notion of saved SP against
209 // this register, so if this changes update it as well.
210 const Register saved_SP = Lscratch;
211 __ mov(SP, saved_SP); // keep track of SP before call
212 #endif
213
214 // setup parameters
215 const Register t = G3_scratch;
216 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
217 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes
218 __ sub(FP, t, Gargs); // setup parameter pointer
219 #ifdef _LP64
220 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias
221 #endif
222 __ mov(SP, O5_savedSP);
223
224
225 // do the call
226 //
227 // the following register must be setup:
228 //
229 // G2_thread
230 // G5_method
231 // Gargs
232 BLOCK_COMMENT("call Java function");
233 __ jmpl(entry_point.as_in().as_register(), G0, O7);
234 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method
235
236 BLOCK_COMMENT("call_stub_return_address:");
237 return_pc = __ pc();
238
239 // The callee, if it wasn't interpreted, can return with SP changed so
240 // we can no longer assert of change of SP.
241
254 // store int result
255 __ st(O0, addr, G0);
256
257 __ BIND(exit);
258 __ ret();
259 __ delayed()->restore();
260
261 __ BIND(is_object);
262 __ ba(exit);
263 __ delayed()->st_ptr(O0, addr, G0);
264
265 __ BIND(is_float);
266 __ ba(exit);
267 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
268
269 __ BIND(is_double);
270 __ ba(exit);
271 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
272
273 __ BIND(is_long);
274 #ifdef _LP64
275 __ ba(exit);
276 __ delayed()->st_long(O0, addr, G0); // store entire long
277 #else
278 #if defined(COMPILER2)
279 // All return values are where we want them, except for Longs. C2 returns
280 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
281 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
282 // build we simply always use G1.
283 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
284 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
285 // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
286
287 __ ba(exit);
288 __ delayed()->stx(G1, addr, G0); // store entire long
289 #else
290 __ st(O1, addr, BytesPerInt);
291 __ ba(exit);
292 __ delayed()->st(O0, addr, G0);
293 #endif /* COMPILER2 */
294 #endif /* _LP64 */
295 }
296 return start;
297 }
298
299
300 //----------------------------------------------------------------------------------------------------
301 // Return point for a Java call if there's an exception thrown in Java code.
302 // The exception is caught and transformed into a pending exception stored in
303 // JavaThread that can be tested from within the VM.
304 //
305 // Oexception: exception oop
306
307 address generate_catch_exception() {
308 StubCodeMark mark(this, "StubRoutines", "catch_exception");
309
310 address start = __ pc();
311 // verify that thread corresponds
312 __ verify_thread();
313
314 const Register& temp_reg = Gtemp;
729
730 return start;
731 }
732 Label _atomic_add_stub; // called from other stubs
733
734
735 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
736 // Arguments :
737 //
738 // ret : O0, returned
739 // icc/xcc: set as O0 (depending on wordSize)
740 // sub : O1, argument, not changed
741 // super: O2, argument, not changed
742 // raddr: O7, blown by call
743 address generate_partial_subtype_check() {
744 __ align(CodeEntryAlignment);
745 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
746 address start = __ pc();
747 Label miss;
748
749 #if defined(COMPILER2) && !defined(_LP64)
750 // Do not use a 'save' because it blows the 64-bit O registers.
751 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned)
752 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize);
753 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize);
754 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize);
755 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize);
756 Register Rret = O0;
757 Register Rsub = O1;
758 Register Rsuper = O2;
759 #else
760 __ save_frame(0);
761 Register Rret = I0;
762 Register Rsub = I1;
763 Register Rsuper = I2;
764 #endif
765
766 Register L0_ary_len = L0;
767 Register L1_ary_ptr = L1;
768 Register L2_super = L2;
769 Register L3_index = L3;
770
771 __ check_klass_subtype_slow_path(Rsub, Rsuper,
772 L0, L1, L2, L3,
773 NULL, &miss);
774
775 // Match falls through here.
776 __ addcc(G0,0,Rret); // set Z flags, Z result
777
778 #if defined(COMPILER2) && !defined(_LP64)
779 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
780 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
781 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
782 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
783 __ retl(); // Result in Rret is zero; flags set to Z
784 __ delayed()->add(SP,4*wordSize,SP);
785 #else
786 __ ret(); // Result in Rret is zero; flags set to Z
787 __ delayed()->restore();
788 #endif
789
790 __ BIND(miss);
791 __ addcc(G0,1,Rret); // set NZ flags, NZ result
792
793 #if defined(COMPILER2) && !defined(_LP64)
794 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
795 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
796 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
797 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
798 __ retl(); // Result in Rret is != 0; flags set to NZ
799 __ delayed()->add(SP,4*wordSize,SP);
800 #else
801 __ ret(); // Result in Rret is != 0; flags set to NZ
802 __ delayed()->restore();
803 #endif
804
805 return start;
806 }
807
808
809 // Called from MacroAssembler::verify_oop
810 //
811 address generate_verify_oop_subroutine() {
812 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
813
814 address start = __ pc();
815
816 __ verify_oop_subroutine();
817
818 return start;
819 }
820
821
822 //
823 // Verify that a register contains clean 32-bits positive value
824 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
825 //
826 // Input:
827 // Rint - 32-bits value
828 // Rtmp - scratch
829 //
830 void assert_clean_int(Register Rint, Register Rtmp) {
831 #if defined(ASSERT) && defined(_LP64)
832 __ signx(Rint, Rtmp);
833 __ cmp(Rint, Rtmp);
834 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc);
835 #endif
836 }
837
838 //
839 // Generate overlap test for array copy stubs
840 //
841 // Input:
842 // O0 - array1
843 // O1 - array2
844 // O2 - element count
845 //
846 // Kills temps: O3, O4
847 //
848 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
849 assert(no_overlap_target != NULL, "must be generated");
850 array_overlap_test(no_overlap_target, NULL, log2_elem_size);
851 }
852 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
853 array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
854 }
855 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {
1252
1253 if (entry != NULL) {
1254 *entry = __ pc();
1255 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1256 BLOCK_COMMENT("Entry:");
1257 }
1258
1259 // for short arrays, just do single element copy
1260 __ cmp(count, 23); // 16 + 7
1261 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1262 __ delayed()->mov(G0, offset);
1263
1264 if (aligned) {
1265 // 'aligned' == true when it is known statically during compilation
1266 // of this arraycopy call site that both 'from' and 'to' addresses
1267 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1268 //
1269 // Aligned arrays have 4 bytes alignment in 32-bits VM
1270 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
1271 //
1272 #ifndef _LP64
1273 // copy a 4-bytes word if necessary to align 'to' to 8 bytes
1274 __ andcc(to, 7, G0);
1275 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment);
1276 __ delayed()->ld(from, 0, O3);
1277 __ inc(from, 4);
1278 __ inc(to, 4);
1279 __ dec(count, 4);
1280 __ st(O3, to, -4);
1281 __ BIND(L_skip_alignment);
1282 #endif
1283 } else {
1284 // copy bytes to align 'to' on 8 byte boundary
1285 __ andcc(to, 7, G1); // misaligned bytes
1286 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1287 __ delayed()->neg(G1);
1288 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment
1289 __ sub(count, G1, count);
1290 __ BIND(L_align);
1291 __ ldub(from, 0, O3);
1292 __ deccc(G1);
1293 __ inc(from);
1294 __ stb(O3, to, 0);
1295 __ br(Assembler::notZero, false, Assembler::pt, L_align);
1296 __ delayed()->inc(to);
1297 __ BIND(L_skip_alignment);
1298 }
1299 #ifdef _LP64
1300 if (!aligned)
1301 #endif
1302 {
1303 // Copy with shift 16 bytes per iteration if arrays do not have
1304 // the same alignment mod 8, otherwise fall through to the next
1305 // code for aligned copy.
1306 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1307 // Also jump over aligned copy after the copy with shift completed.
1308
1309 copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
1310 }
1311
1312 // Both array are 8 bytes aligned, copy 16 bytes at a time
1313 __ and3(count, 7, G4); // Save count
1314 __ srl(count, 3, count);
1315 generate_disjoint_long_copy_core(aligned);
1316 __ mov(G4, count); // Restore count
1317
1318 // copy tailing bytes
1319 __ BIND(L_copy_byte);
1320 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1321 __ align(OptoLoopAlignment);
1378 __ delayed()->add(from, count, end_from);
1379
1380 {
1381 // Align end of arrays since they could be not aligned even
1382 // when arrays itself are aligned.
1383
1384 // copy bytes to align 'end_to' on 8 byte boundary
1385 __ andcc(end_to, 7, G1); // misaligned bytes
1386 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1387 __ delayed()->nop();
1388 __ sub(count, G1, count);
1389 __ BIND(L_align);
1390 __ dec(end_from);
1391 __ dec(end_to);
1392 __ ldub(end_from, 0, O3);
1393 __ deccc(G1);
1394 __ brx(Assembler::notZero, false, Assembler::pt, L_align);
1395 __ delayed()->stb(O3, end_to, 0);
1396 __ BIND(L_skip_alignment);
1397 }
1398 #ifdef _LP64
1399 if (aligned) {
1400 // Both arrays are aligned to 8-bytes in 64-bits VM.
1401 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1402 // in unaligned case.
1403 __ dec(count, 16);
1404 } else
1405 #endif
1406 {
1407 // Copy with shift 16 bytes per iteration if arrays do not have
1408 // the same alignment mod 8, otherwise jump to the next
1409 // code for aligned copy (and substracting 16 from 'count' before jump).
1410 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1411 // Also jump over aligned copy after the copy with shift completed.
1412
1413 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1414 L_aligned_copy, L_copy_byte);
1415 }
1416 // copy 4 elements (16 bytes) at a time
1417 __ align(OptoLoopAlignment);
1418 __ BIND(L_aligned_copy);
1419 __ dec(end_from, 16);
1420 __ ldx(end_from, 8, O3);
1421 __ ldx(end_from, 0, O4);
1422 __ dec(end_to, 16);
1423 __ deccc(count, 16);
1424 __ stx(O3, end_to, 8);
1425 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1473
1474 if (entry != NULL) {
1475 *entry = __ pc();
1476 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1477 BLOCK_COMMENT("Entry:");
1478 }
1479
1480 // for short arrays, just do single element copy
1481 __ cmp(count, 11); // 8 + 3 (22 bytes)
1482 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1483 __ delayed()->mov(G0, offset);
1484
1485 if (aligned) {
1486 // 'aligned' == true when it is known statically during compilation
1487 // of this arraycopy call site that both 'from' and 'to' addresses
1488 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1489 //
1490 // Aligned arrays have 4 bytes alignment in 32-bits VM
1491 // and 8 bytes - in 64-bits VM.
1492 //
1493 #ifndef _LP64
1494 // copy a 2-elements word if necessary to align 'to' to 8 bytes
1495 __ andcc(to, 7, G0);
1496 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1497 __ delayed()->ld(from, 0, O3);
1498 __ inc(from, 4);
1499 __ inc(to, 4);
1500 __ dec(count, 2);
1501 __ st(O3, to, -4);
1502 __ BIND(L_skip_alignment);
1503 #endif
1504 } else {
1505 // copy 1 element if necessary to align 'to' on an 4 bytes
1506 __ andcc(to, 3, G0);
1507 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1508 __ delayed()->lduh(from, 0, O3);
1509 __ inc(from, 2);
1510 __ inc(to, 2);
1511 __ dec(count);
1512 __ sth(O3, to, -2);
1513 __ BIND(L_skip_alignment);
1514
1515 // copy 2 elements to align 'to' on an 8 byte boundary
1516 __ andcc(to, 7, G0);
1517 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1518 __ delayed()->lduh(from, 0, O3);
1519 __ dec(count, 2);
1520 __ lduh(from, 2, O4);
1521 __ inc(from, 4);
1522 __ inc(to, 4);
1523 __ sth(O3, to, -4);
1524 __ sth(O4, to, -2);
1525 __ BIND(L_skip_alignment2);
1526 }
1527 #ifdef _LP64
1528 if (!aligned)
1529 #endif
1530 {
1531 // Copy with shift 16 bytes per iteration if arrays do not have
1532 // the same alignment mod 8, otherwise fall through to the next
1533 // code for aligned copy.
1534 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1535 // Also jump over aligned copy after the copy with shift completed.
1536
1537 copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
1538 }
1539
1540 // Both array are 8 bytes aligned, copy 16 bytes at a time
1541 __ and3(count, 3, G4); // Save
1542 __ srl(count, 2, count);
1543 generate_disjoint_long_copy_core(aligned);
1544 __ mov(G4, count); // restore
1545
1546 // copy 1 element at a time
1547 __ BIND(L_copy_2_bytes);
1548 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1549 __ align(OptoLoopAlignment);
1626 // align source address at 4 bytes address boundary
1627 if (t == T_BYTE) {
1628 // One byte misalignment happens only for byte arrays
1629 __ andcc(to, 1, G0);
1630 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
1631 __ delayed()->nop();
1632 __ stb(value, to, 0);
1633 __ inc(to, 1);
1634 __ dec(count, 1);
1635 __ BIND(L_skip_align1);
1636 }
1637 // Two bytes misalignment happens only for byte and short (char) arrays
1638 __ andcc(to, 2, G0);
1639 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
1640 __ delayed()->nop();
1641 __ sth(value, to, 0);
1642 __ inc(to, 2);
1643 __ dec(count, 1 << (shift - 1));
1644 __ BIND(L_skip_align2);
1645 }
1646 #ifdef _LP64
1647 if (!aligned) {
1648 #endif
1649 // align to 8 bytes, we know we are 4 byte aligned to start
1650 __ andcc(to, 7, G0);
1651 __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
1652 __ delayed()->nop();
1653 __ stw(value, to, 0);
1654 __ inc(to, 4);
1655 __ dec(count, 1 << shift);
1656 __ BIND(L_fill_32_bytes);
1657 #ifdef _LP64
1658 }
1659 #endif
1660
1661 if (t == T_INT) {
1662 // Zero extend value
1663 __ srl(value, 0, value);
1664 }
1665 if (t == T_BYTE || t == T_SHORT || t == T_INT) {
1666 __ sllx(value, 32, O3);
1667 __ or3(value, O3, value);
1668 }
1669
1670 Label L_check_fill_8_bytes;
1671 // Fill 32-byte chunks
1672 __ subcc(count, 8 << shift, count);
1673 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
1674 __ delayed()->nop();
1675
1676 Label L_fill_32_bytes_loop, L_fill_4_bytes;
1677 __ align(16);
1678 __ BIND(L_fill_32_bytes_loop);
1679
1840 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1841 __ delayed()->lduh(end_from, -2, O3);
1842 __ dec(end_from, 2);
1843 __ dec(end_to, 2);
1844 __ dec(count);
1845 __ sth(O3, end_to, 0);
1846 __ BIND(L_skip_alignment);
1847
1848 // copy 2 elements to align 'end_to' on an 8 byte boundary
1849 __ andcc(end_to, 7, G0);
1850 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1851 __ delayed()->lduh(end_from, -2, O3);
1852 __ dec(count, 2);
1853 __ lduh(end_from, -4, O4);
1854 __ dec(end_from, 4);
1855 __ dec(end_to, 4);
1856 __ sth(O3, end_to, 2);
1857 __ sth(O4, end_to, 0);
1858 __ BIND(L_skip_alignment2);
1859 }
1860 #ifdef _LP64
1861 if (aligned) {
1862 // Both arrays are aligned to 8-bytes in 64-bits VM.
1863 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1864 // in unaligned case.
1865 __ dec(count, 8);
1866 } else
1867 #endif
1868 {
1869 // Copy with shift 16 bytes per iteration if arrays do not have
1870 // the same alignment mod 8, otherwise jump to the next
1871 // code for aligned copy (and substracting 8 from 'count' before jump).
1872 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1873 // Also jump over aligned copy after the copy with shift completed.
1874
1875 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1876 L_aligned_copy, L_copy_2_bytes);
1877 }
1878 // copy 4 elements (16 bytes) at a time
1879 __ align(OptoLoopAlignment);
1880 __ BIND(L_aligned_copy);
1881 __ dec(end_from, 16);
1882 __ ldx(end_from, 8, O3);
1883 __ ldx(end_from, 0, O4);
1884 __ dec(end_to, 16);
1885 __ deccc(count, 8);
1886 __ stx(O3, end_to, 8);
1887 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1957 // count: O2 treated as signed
1958 //
1959 void generate_disjoint_int_copy_core(bool aligned) {
1960
1961 Label L_skip_alignment, L_aligned_copy;
1962 Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
1963
1964 const Register from = O0; // source array address
1965 const Register to = O1; // destination array address
1966 const Register count = O2; // elements count
1967 const Register offset = O5; // offset from start of arrays
1968 // O3, O4, G3, G4 are used as temp registers
1969
1970 // 'aligned' == true when it is known statically during compilation
1971 // of this arraycopy call site that both 'from' and 'to' addresses
1972 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1973 //
1974 // Aligned arrays have 4 bytes alignment in 32-bits VM
1975 // and 8 bytes - in 64-bits VM.
1976 //
1977 #ifdef _LP64
1978 if (!aligned)
1979 #endif
1980 {
1981 // The next check could be put under 'ifndef' since the code in
1982 // generate_disjoint_long_copy_core() has own checks and set 'offset'.
1983
1984 // for short arrays, just do single element copy
1985 __ cmp(count, 5); // 4 + 1 (20 bytes)
1986 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
1987 __ delayed()->mov(G0, offset);
1988
1989 // copy 1 element to align 'to' on an 8 byte boundary
1990 __ andcc(to, 7, G0);
1991 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1992 __ delayed()->ld(from, 0, O3);
1993 __ inc(from, 4);
1994 __ inc(to, 4);
1995 __ dec(count);
1996 __ st(O3, to, -4);
1997 __ BIND(L_skip_alignment);
1998
1999 // if arrays have same alignment mod 8, do 4 elements copy
2446 const Register from = O0; // source array address
2447 const Register to = O1; // destination array address
2448 const Register count = O2; // elements count
2449
2450 __ align(CodeEntryAlignment);
2451 StubCodeMark mark(this, "StubRoutines", name);
2452 address start = __ pc();
2453
2454 assert_clean_int(count, O3); // Make sure 'count' is clean int.
2455
2456 if (entry != NULL) {
2457 *entry = __ pc();
2458 // caller can pass a 64-bit byte count here
2459 BLOCK_COMMENT("Entry:");
2460 }
2461
2462 // save arguments for barrier generation
2463 __ mov(to, G1);
2464 __ mov(count, G5);
2465 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2466 #ifdef _LP64
2467 assert_clean_int(count, O3); // Make sure 'count' is clean int.
2468 if (UseCompressedOops) {
2469 generate_disjoint_int_copy_core(aligned);
2470 } else {
2471 generate_disjoint_long_copy_core(aligned);
2472 }
2473 #else
2474 generate_disjoint_int_copy_core(aligned);
2475 #endif
2476 // O0 is used as temp register
2477 gen_write_ref_array_post_barrier(G1, G5, O0);
2478
2479 // O3, O4 are used as temp registers
2480 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2481 __ retl();
2482 __ delayed()->mov(G0, O0); // return 0
2483 return start;
2484 }
2485
2486 // Generate stub for conjoint oop copy. If "aligned" is true, the
2487 // "from" and "to" addresses are assumed to be heapword aligned.
2488 //
2489 // Arguments for generated stub:
2490 // from: O0
2491 // to: O1
2492 // count: O2 treated as signed
2493 //
2494 address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
2495 address *entry, const char *name,
2501
2502 __ align(CodeEntryAlignment);
2503 StubCodeMark mark(this, "StubRoutines", name);
2504 address start = __ pc();
2505
2506 assert_clean_int(count, O3); // Make sure 'count' is clean int.
2507
2508 if (entry != NULL) {
2509 *entry = __ pc();
2510 // caller can pass a 64-bit byte count here
2511 BLOCK_COMMENT("Entry:");
2512 }
2513
2514 array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
2515
2516 // save arguments for barrier generation
2517 __ mov(to, G1);
2518 __ mov(count, G5);
2519 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2520
2521 #ifdef _LP64
2522 if (UseCompressedOops) {
2523 generate_conjoint_int_copy_core(aligned);
2524 } else {
2525 generate_conjoint_long_copy_core(aligned);
2526 }
2527 #else
2528 generate_conjoint_int_copy_core(aligned);
2529 #endif
2530
2531 // O0 is used as temp register
2532 gen_write_ref_array_post_barrier(G1, G5, O0);
2533
2534 // O3, O4 are used as temp registers
2535 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2536 __ retl();
2537 __ delayed()->mov(G0, O0); // return 0
2538 return start;
2539 }
2540
2541
2542 // Helper for generating a dynamic type check.
2543 // Smashes only the given temp registers.
2544 void generate_type_check(Register sub_klass,
2545 Register super_check_offset,
2546 Register super_klass,
2547 Register temp,
2548 Label& L_success) {
2549 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
3121 "arrayof_jbyte_arraycopy");
3122
3123 //*** jshort
3124 // Always need aligned and unaligned versions
3125 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
3126 "jshort_disjoint_arraycopy");
3127 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry,
3128 &entry_jshort_arraycopy,
3129 "jshort_arraycopy");
3130 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
3131 "arrayof_jshort_disjoint_arraycopy");
3132 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL,
3133 "arrayof_jshort_arraycopy");
3134
3135 //*** jint
3136 // Aligned versions
3137 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
3138 "arrayof_jint_disjoint_arraycopy");
3139 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
3140 "arrayof_jint_arraycopy");
3141 #ifdef _LP64
3142 // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
3143 // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
3144 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry,
3145 "jint_disjoint_arraycopy");
3146 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry,
3147 &entry_jint_arraycopy,
3148 "jint_arraycopy");
3149 #else
3150 // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version
3151 // (in fact in 32bit we always have a pre-loop part even in the aligned version,
3152 // because it uses 64-bit loads/stores, so the aligned flag is actually ignored).
3153 StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy;
3154 StubRoutines::_jint_arraycopy = StubRoutines::_arrayof_jint_arraycopy;
3155 #endif
3156
3157
3158 //*** jlong
3159 // It is always aligned
3160 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
3161 "arrayof_jlong_disjoint_arraycopy");
3162 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
3163 "arrayof_jlong_arraycopy");
3164 StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
3165 StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy;
3166
3167
3168 //*** oops
3169 // Aligned versions
3170 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, &entry,
3171 "arrayof_oop_disjoint_arraycopy");
3172 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
3173 "arrayof_oop_arraycopy");
3174 // Aligned versions without pre-barriers
3175 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
3176 "arrayof_oop_disjoint_arraycopy_uninit",
3177 /*dest_uninitialized*/true);
3178 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, entry, NULL,
3179 "arrayof_oop_arraycopy_uninit",
3180 /*dest_uninitialized*/true);
3181 #ifdef _LP64
3182 if (UseCompressedOops) {
3183 // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
3184 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, &entry,
3185 "oop_disjoint_arraycopy");
3186 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
3187 "oop_arraycopy");
3188 // Unaligned versions without pre-barriers
3189 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, &entry,
3190 "oop_disjoint_arraycopy_uninit",
3191 /*dest_uninitialized*/true);
3192 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, entry, NULL,
3193 "oop_arraycopy_uninit",
3194 /*dest_uninitialized*/true);
3195 } else
3196 #endif
3197 {
3198 // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
3199 StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy;
3200 StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy;
3201 StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
3202 StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit;
3203 }
3204
3205 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
3206 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
3207 /*dest_uninitialized*/true);
3208
3209 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
3210 entry_jbyte_arraycopy,
3211 entry_jshort_arraycopy,
3212 entry_jint_arraycopy,
3213 entry_jlong_arraycopy);
3214 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
3215 entry_jbyte_arraycopy,
3216 entry_jshort_arraycopy,
5087
5088 void generate_initial() {
5089 // Generates all stubs and initializes the entry points
5090
5091 //------------------------------------------------------------------------------------------------------------------------
5092 // entry points that exist in all platforms
5093 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
5094 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
5095 StubRoutines::_forward_exception_entry = generate_forward_exception();
5096
5097 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
5098 StubRoutines::_catch_exception_entry = generate_catch_exception();
5099
5100 //------------------------------------------------------------------------------------------------------------------------
5101 // entry points that are platform specific
5102 StubRoutines::Sparc::_test_stop_entry = generate_test_stop();
5103
5104 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine();
5105 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
5106
5107 #if !defined(COMPILER2) && !defined(_LP64)
5108 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
5109 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
5110 StubRoutines::_atomic_add_entry = generate_atomic_add();
5111 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry;
5112 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry;
5113 StubRoutines::_atomic_cmpxchg_byte_entry = ShouldNotCallThisStub();
5114 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
5115 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry;
5116 #endif // COMPILER2 !=> _LP64
5117
5118 // Build this early so it's available for the interpreter.
5119 StubRoutines::_throw_StackOverflowError_entry =
5120 generate_throw_exception("StackOverflowError throw_exception",
5121 CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
5122 StubRoutines::_throw_delayed_StackOverflowError_entry =
5123 generate_throw_exception("delayed StackOverflowError throw_exception",
5124 CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError));
5125
5126 if (UseCRC32Intrinsics) {
5127 // set table address before stub generation which use it
5128 StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
5129 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
5130 }
5131
5132 if (UseCRC32CIntrinsics) {
5133 // set table address before stub generation which use it
5134 StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
5135 StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
5136 }
5137 }
5205 _stub_count = !all ? 0x100 : 0x200;
5206 if (all) {
5207 generate_all();
5208 } else {
5209 generate_initial();
5210 }
5211
5212 // make sure this stub is available for all local calls
5213 if (_atomic_add_stub.is_unbound()) {
5214 // generate a second time, if necessary
5215 (void) generate_atomic_add();
5216 }
5217 }
5218
5219
5220 private:
5221 int _stub_count;
5222 void stub_prolog(StubCodeDesc* cdesc) {
5223 # ifdef ASSERT
5224 // put extra information in the stub code, to make it more readable
5225 #ifdef _LP64
5226 // Write the high part of the address
5227 // [RGV] Check if there is a dependency on the size of this prolog
5228 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none);
5229 #endif
5230 __ emit_data((intptr_t)cdesc, relocInfo::none);
5231 __ emit_data(++_stub_count, relocInfo::none);
5232 # endif
5233 align(true);
5234 }
5235
5236 void align(bool at_header = false) {
5237 // %%%%% move this constant somewhere else
5238 // UltraSPARC cache line size is 8 instructions:
5239 const unsigned int icache_line_size = 32;
5240 const unsigned int icache_half_line_size = 16;
5241
5242 if (at_header) {
5243 while ((intptr_t)(__ pc()) % icache_line_size != 0) {
5244 __ emit_data(0, relocInfo::none);
5245 }
5246 } else {
5247 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
5248 __ nop();
5249 }
|
1 /*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
199 __ br(Assembler::greater, false, Assembler::pt, loop);
200 __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
201
202 // done
203 __ BIND(exit);
204 }
205
206 // setup parameters, method & call Java function
207 #ifdef ASSERT
208 // layout_activation_impl checks it's notion of saved SP against
209 // this register, so if this changes update it as well.
210 const Register saved_SP = Lscratch;
211 __ mov(SP, saved_SP); // keep track of SP before call
212 #endif
213
214 // setup parameters
215 const Register t = G3_scratch;
216 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
217 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes
218 __ sub(FP, t, Gargs); // setup parameter pointer
219 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias
220 __ mov(SP, O5_savedSP);
221
222
223 // do the call
224 //
225 // the following register must be setup:
226 //
227 // G2_thread
228 // G5_method
229 // Gargs
230 BLOCK_COMMENT("call Java function");
231 __ jmpl(entry_point.as_in().as_register(), G0, O7);
232 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method
233
234 BLOCK_COMMENT("call_stub_return_address:");
235 return_pc = __ pc();
236
237 // The callee, if it wasn't interpreted, can return with SP changed so
238 // we can no longer assert of change of SP.
239
252 // store int result
253 __ st(O0, addr, G0);
254
255 __ BIND(exit);
256 __ ret();
257 __ delayed()->restore();
258
259 __ BIND(is_object);
260 __ ba(exit);
261 __ delayed()->st_ptr(O0, addr, G0);
262
263 __ BIND(is_float);
264 __ ba(exit);
265 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
266
267 __ BIND(is_double);
268 __ ba(exit);
269 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
270
271 __ BIND(is_long);
272 __ ba(exit);
273 __ delayed()->st_long(O0, addr, G0); // store entire long
274 }
275 return start;
276 }
277
278
279 //----------------------------------------------------------------------------------------------------
280 // Return point for a Java call if there's an exception thrown in Java code.
281 // The exception is caught and transformed into a pending exception stored in
282 // JavaThread that can be tested from within the VM.
283 //
284 // Oexception: exception oop
285
286 address generate_catch_exception() {
287 StubCodeMark mark(this, "StubRoutines", "catch_exception");
288
289 address start = __ pc();
290 // verify that thread corresponds
291 __ verify_thread();
292
293 const Register& temp_reg = Gtemp;
708
709 return start;
710 }
711 Label _atomic_add_stub; // called from other stubs
712
713
714 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
715 // Arguments :
716 //
717 // ret : O0, returned
718 // icc/xcc: set as O0 (depending on wordSize)
719 // sub : O1, argument, not changed
720 // super: O2, argument, not changed
721 // raddr: O7, blown by call
722 address generate_partial_subtype_check() {
723 __ align(CodeEntryAlignment);
724 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
725 address start = __ pc();
726 Label miss;
727
728 __ save_frame(0);
729 Register Rret = I0;
730 Register Rsub = I1;
731 Register Rsuper = I2;
732
733 Register L0_ary_len = L0;
734 Register L1_ary_ptr = L1;
735 Register L2_super = L2;
736 Register L3_index = L3;
737
738 __ check_klass_subtype_slow_path(Rsub, Rsuper,
739 L0, L1, L2, L3,
740 NULL, &miss);
741
742 // Match falls through here.
743 __ addcc(G0,0,Rret); // set Z flags, Z result
744
745 __ ret(); // Result in Rret is zero; flags set to Z
746 __ delayed()->restore();
747
748 __ BIND(miss);
749 __ addcc(G0,1,Rret); // set NZ flags, NZ result
750
751 __ ret(); // Result in Rret is != 0; flags set to NZ
752 __ delayed()->restore();
753
754 return start;
755 }
756
757
758 // Called from MacroAssembler::verify_oop
759 //
760 address generate_verify_oop_subroutine() {
761 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
762
763 address start = __ pc();
764
765 __ verify_oop_subroutine();
766
767 return start;
768 }
769
770
771 //
772 // Verify that a register contains clean 32-bits positive value
773 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
774 //
775 // Input:
776 // Rint - 32-bits value
777 // Rtmp - scratch
778 //
779 void assert_clean_int(Register Rint, Register Rtmp) {
780 #if defined(ASSERT)
781 __ signx(Rint, Rtmp);
782 __ cmp(Rint, Rtmp);
783 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc);
784 #endif
785 }
786
787 //
788 // Generate overlap test for array copy stubs
789 //
790 // Input:
791 // O0 - array1
792 // O1 - array2
793 // O2 - element count
794 //
795 // Kills temps: O3, O4
796 //
797 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
798 assert(no_overlap_target != NULL, "must be generated");
799 array_overlap_test(no_overlap_target, NULL, log2_elem_size);
800 }
801 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
802 array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
803 }
804 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {
1201
1202 if (entry != NULL) {
1203 *entry = __ pc();
1204 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1205 BLOCK_COMMENT("Entry:");
1206 }
1207
1208 // for short arrays, just do single element copy
1209 __ cmp(count, 23); // 16 + 7
1210 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1211 __ delayed()->mov(G0, offset);
1212
1213 if (aligned) {
1214 // 'aligned' == true when it is known statically during compilation
1215 // of this arraycopy call site that both 'from' and 'to' addresses
1216 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1217 //
1218 // Aligned arrays have 4 bytes alignment in 32-bits VM
1219 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
1220 //
1221 } else {
1222 // copy bytes to align 'to' on 8 byte boundary
1223 __ andcc(to, 7, G1); // misaligned bytes
1224 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1225 __ delayed()->neg(G1);
1226 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment
1227 __ sub(count, G1, count);
1228 __ BIND(L_align);
1229 __ ldub(from, 0, O3);
1230 __ deccc(G1);
1231 __ inc(from);
1232 __ stb(O3, to, 0);
1233 __ br(Assembler::notZero, false, Assembler::pt, L_align);
1234 __ delayed()->inc(to);
1235 __ BIND(L_skip_alignment);
1236 }
1237 if (!aligned)
1238 {
1239 // Copy with shift 16 bytes per iteration if arrays do not have
1240 // the same alignment mod 8, otherwise fall through to the next
1241 // code for aligned copy.
1242 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1243 // Also jump over aligned copy after the copy with shift completed.
1244
1245 copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
1246 }
1247
1248 // Both array are 8 bytes aligned, copy 16 bytes at a time
1249 __ and3(count, 7, G4); // Save count
1250 __ srl(count, 3, count);
1251 generate_disjoint_long_copy_core(aligned);
1252 __ mov(G4, count); // Restore count
1253
1254 // copy tailing bytes
1255 __ BIND(L_copy_byte);
1256 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1257 __ align(OptoLoopAlignment);
1314 __ delayed()->add(from, count, end_from);
1315
1316 {
1317 // Align end of arrays since they could be not aligned even
1318 // when arrays itself are aligned.
1319
1320 // copy bytes to align 'end_to' on 8 byte boundary
1321 __ andcc(end_to, 7, G1); // misaligned bytes
1322 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1323 __ delayed()->nop();
1324 __ sub(count, G1, count);
1325 __ BIND(L_align);
1326 __ dec(end_from);
1327 __ dec(end_to);
1328 __ ldub(end_from, 0, O3);
1329 __ deccc(G1);
1330 __ brx(Assembler::notZero, false, Assembler::pt, L_align);
1331 __ delayed()->stb(O3, end_to, 0);
1332 __ BIND(L_skip_alignment);
1333 }
1334 if (aligned) {
1335 // Both arrays are aligned to 8-bytes in 64-bits VM.
1336 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1337 // in unaligned case.
1338 __ dec(count, 16);
1339 } else
1340 {
1341 // Copy with shift 16 bytes per iteration if arrays do not have
1342 // the same alignment mod 8, otherwise jump to the next
1343 // code for aligned copy (and substracting 16 from 'count' before jump).
1344 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1345 // Also jump over aligned copy after the copy with shift completed.
1346
1347 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1348 L_aligned_copy, L_copy_byte);
1349 }
1350 // copy 4 elements (16 bytes) at a time
1351 __ align(OptoLoopAlignment);
1352 __ BIND(L_aligned_copy);
1353 __ dec(end_from, 16);
1354 __ ldx(end_from, 8, O3);
1355 __ ldx(end_from, 0, O4);
1356 __ dec(end_to, 16);
1357 __ deccc(count, 16);
1358 __ stx(O3, end_to, 8);
1359 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1407
1408 if (entry != NULL) {
1409 *entry = __ pc();
1410 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1411 BLOCK_COMMENT("Entry:");
1412 }
1413
1414 // for short arrays, just do single element copy
1415 __ cmp(count, 11); // 8 + 3 (22 bytes)
1416 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1417 __ delayed()->mov(G0, offset);
1418
1419 if (aligned) {
1420 // 'aligned' == true when it is known statically during compilation
1421 // of this arraycopy call site that both 'from' and 'to' addresses
1422 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1423 //
1424 // Aligned arrays have 4 bytes alignment in 32-bits VM
1425 // and 8 bytes - in 64-bits VM.
1426 //
1427 } else {
1428 // copy 1 element if necessary to align 'to' on an 4 bytes
1429 __ andcc(to, 3, G0);
1430 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1431 __ delayed()->lduh(from, 0, O3);
1432 __ inc(from, 2);
1433 __ inc(to, 2);
1434 __ dec(count);
1435 __ sth(O3, to, -2);
1436 __ BIND(L_skip_alignment);
1437
1438 // copy 2 elements to align 'to' on an 8 byte boundary
1439 __ andcc(to, 7, G0);
1440 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1441 __ delayed()->lduh(from, 0, O3);
1442 __ dec(count, 2);
1443 __ lduh(from, 2, O4);
1444 __ inc(from, 4);
1445 __ inc(to, 4);
1446 __ sth(O3, to, -4);
1447 __ sth(O4, to, -2);
1448 __ BIND(L_skip_alignment2);
1449 }
1450 if (!aligned)
1451 {
1452 // Copy with shift 16 bytes per iteration if arrays do not have
1453 // the same alignment mod 8, otherwise fall through to the next
1454 // code for aligned copy.
1455 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1456 // Also jump over aligned copy after the copy with shift completed.
1457
1458 copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
1459 }
1460
1461 // Both array are 8 bytes aligned, copy 16 bytes at a time
1462 __ and3(count, 3, G4); // Save
1463 __ srl(count, 2, count);
1464 generate_disjoint_long_copy_core(aligned);
1465 __ mov(G4, count); // restore
1466
1467 // copy 1 element at a time
1468 __ BIND(L_copy_2_bytes);
1469 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1470 __ align(OptoLoopAlignment);
1547 // align source address at 4 bytes address boundary
1548 if (t == T_BYTE) {
1549 // One byte misalignment happens only for byte arrays
1550 __ andcc(to, 1, G0);
1551 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
1552 __ delayed()->nop();
1553 __ stb(value, to, 0);
1554 __ inc(to, 1);
1555 __ dec(count, 1);
1556 __ BIND(L_skip_align1);
1557 }
1558 // Two bytes misalignment happens only for byte and short (char) arrays
1559 __ andcc(to, 2, G0);
1560 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
1561 __ delayed()->nop();
1562 __ sth(value, to, 0);
1563 __ inc(to, 2);
1564 __ dec(count, 1 << (shift - 1));
1565 __ BIND(L_skip_align2);
1566 }
1567 if (!aligned) {
1568 // align to 8 bytes, we know we are 4 byte aligned to start
1569 __ andcc(to, 7, G0);
1570 __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
1571 __ delayed()->nop();
1572 __ stw(value, to, 0);
1573 __ inc(to, 4);
1574 __ dec(count, 1 << shift);
1575 __ BIND(L_fill_32_bytes);
1576 }
1577
1578 if (t == T_INT) {
1579 // Zero extend value
1580 __ srl(value, 0, value);
1581 }
1582 if (t == T_BYTE || t == T_SHORT || t == T_INT) {
1583 __ sllx(value, 32, O3);
1584 __ or3(value, O3, value);
1585 }
1586
1587 Label L_check_fill_8_bytes;
1588 // Fill 32-byte chunks
1589 __ subcc(count, 8 << shift, count);
1590 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
1591 __ delayed()->nop();
1592
1593 Label L_fill_32_bytes_loop, L_fill_4_bytes;
1594 __ align(16);
1595 __ BIND(L_fill_32_bytes_loop);
1596
1757 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1758 __ delayed()->lduh(end_from, -2, O3);
1759 __ dec(end_from, 2);
1760 __ dec(end_to, 2);
1761 __ dec(count);
1762 __ sth(O3, end_to, 0);
1763 __ BIND(L_skip_alignment);
1764
1765 // copy 2 elements to align 'end_to' on an 8 byte boundary
1766 __ andcc(end_to, 7, G0);
1767 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1768 __ delayed()->lduh(end_from, -2, O3);
1769 __ dec(count, 2);
1770 __ lduh(end_from, -4, O4);
1771 __ dec(end_from, 4);
1772 __ dec(end_to, 4);
1773 __ sth(O3, end_to, 2);
1774 __ sth(O4, end_to, 0);
1775 __ BIND(L_skip_alignment2);
1776 }
1777 if (aligned) {
1778 // Both arrays are aligned to 8-bytes in 64-bits VM.
1779 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1780 // in unaligned case.
1781 __ dec(count, 8);
1782 } else
1783 {
1784 // Copy with shift 16 bytes per iteration if arrays do not have
1785 // the same alignment mod 8, otherwise jump to the next
1786 // code for aligned copy (and substracting 8 from 'count' before jump).
1787 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1788 // Also jump over aligned copy after the copy with shift completed.
1789
1790 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1791 L_aligned_copy, L_copy_2_bytes);
1792 }
1793 // copy 4 elements (16 bytes) at a time
1794 __ align(OptoLoopAlignment);
1795 __ BIND(L_aligned_copy);
1796 __ dec(end_from, 16);
1797 __ ldx(end_from, 8, O3);
1798 __ ldx(end_from, 0, O4);
1799 __ dec(end_to, 16);
1800 __ deccc(count, 8);
1801 __ stx(O3, end_to, 8);
1802 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1872 // count: O2 treated as signed
1873 //
1874 void generate_disjoint_int_copy_core(bool aligned) {
1875
1876 Label L_skip_alignment, L_aligned_copy;
1877 Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
1878
1879 const Register from = O0; // source array address
1880 const Register to = O1; // destination array address
1881 const Register count = O2; // elements count
1882 const Register offset = O5; // offset from start of arrays
1883 // O3, O4, G3, G4 are used as temp registers
1884
1885 // 'aligned' == true when it is known statically during compilation
1886 // of this arraycopy call site that both 'from' and 'to' addresses
1887 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1888 //
1889 // Aligned arrays have 4 bytes alignment in 32-bits VM
1890 // and 8 bytes - in 64-bits VM.
1891 //
1892 if (!aligned)
1893 {
1894 // The next check could be put under 'ifndef' since the code in
1895 // generate_disjoint_long_copy_core() has own checks and set 'offset'.
1896
1897 // for short arrays, just do single element copy
1898 __ cmp(count, 5); // 4 + 1 (20 bytes)
1899 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
1900 __ delayed()->mov(G0, offset);
1901
1902 // copy 1 element to align 'to' on an 8 byte boundary
1903 __ andcc(to, 7, G0);
1904 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1905 __ delayed()->ld(from, 0, O3);
1906 __ inc(from, 4);
1907 __ inc(to, 4);
1908 __ dec(count);
1909 __ st(O3, to, -4);
1910 __ BIND(L_skip_alignment);
1911
1912 // if arrays have same alignment mod 8, do 4 elements copy
2359 const Register from = O0; // source array address
2360 const Register to = O1; // destination array address
2361 const Register count = O2; // elements count
2362
2363 __ align(CodeEntryAlignment);
2364 StubCodeMark mark(this, "StubRoutines", name);
2365 address start = __ pc();
2366
2367 assert_clean_int(count, O3); // Make sure 'count' is clean int.
2368
2369 if (entry != NULL) {
2370 *entry = __ pc();
2371 // caller can pass a 64-bit byte count here
2372 BLOCK_COMMENT("Entry:");
2373 }
2374
2375 // save arguments for barrier generation
2376 __ mov(to, G1);
2377 __ mov(count, G5);
2378 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2379 assert_clean_int(count, O3); // Make sure 'count' is clean int.
2380 if (UseCompressedOops) {
2381 generate_disjoint_int_copy_core(aligned);
2382 } else {
2383 generate_disjoint_long_copy_core(aligned);
2384 }
2385 // O0 is used as temp register
2386 gen_write_ref_array_post_barrier(G1, G5, O0);
2387
2388 // O3, O4 are used as temp registers
2389 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2390 __ retl();
2391 __ delayed()->mov(G0, O0); // return 0
2392 return start;
2393 }
2394
2395 // Generate stub for conjoint oop copy. If "aligned" is true, the
2396 // "from" and "to" addresses are assumed to be heapword aligned.
2397 //
2398 // Arguments for generated stub:
2399 // from: O0
2400 // to: O1
2401 // count: O2 treated as signed
2402 //
2403 address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
2404 address *entry, const char *name,
2410
2411 __ align(CodeEntryAlignment);
2412 StubCodeMark mark(this, "StubRoutines", name);
2413 address start = __ pc();
2414
2415 assert_clean_int(count, O3); // Make sure 'count' is clean int.
2416
2417 if (entry != NULL) {
2418 *entry = __ pc();
2419 // caller can pass a 64-bit byte count here
2420 BLOCK_COMMENT("Entry:");
2421 }
2422
2423 array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
2424
2425 // save arguments for barrier generation
2426 __ mov(to, G1);
2427 __ mov(count, G5);
2428 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2429
2430 if (UseCompressedOops) {
2431 generate_conjoint_int_copy_core(aligned);
2432 } else {
2433 generate_conjoint_long_copy_core(aligned);
2434 }
2435
2436 // O0 is used as temp register
2437 gen_write_ref_array_post_barrier(G1, G5, O0);
2438
2439 // O3, O4 are used as temp registers
2440 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2441 __ retl();
2442 __ delayed()->mov(G0, O0); // return 0
2443 return start;
2444 }
2445
2446
2447 // Helper for generating a dynamic type check.
2448 // Smashes only the given temp registers.
2449 void generate_type_check(Register sub_klass,
2450 Register super_check_offset,
2451 Register super_klass,
2452 Register temp,
2453 Label& L_success) {
2454 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
3026 "arrayof_jbyte_arraycopy");
3027
3028 //*** jshort
3029 // Always need aligned and unaligned versions
3030 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
3031 "jshort_disjoint_arraycopy");
3032 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry,
3033 &entry_jshort_arraycopy,
3034 "jshort_arraycopy");
3035 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
3036 "arrayof_jshort_disjoint_arraycopy");
3037 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL,
3038 "arrayof_jshort_arraycopy");
3039
3040 //*** jint
3041 // Aligned versions
3042 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
3043 "arrayof_jint_disjoint_arraycopy");
3044 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
3045 "arrayof_jint_arraycopy");
3046 // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
3047 // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
3048 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry,
3049 "jint_disjoint_arraycopy");
3050 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry,
3051 &entry_jint_arraycopy,
3052 "jint_arraycopy");
3053
3054 //*** jlong
3055 // It is always aligned
3056 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
3057 "arrayof_jlong_disjoint_arraycopy");
3058 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
3059 "arrayof_jlong_arraycopy");
3060 StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
3061 StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy;
3062
3063
3064 //*** oops
3065 // Aligned versions
3066 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, &entry,
3067 "arrayof_oop_disjoint_arraycopy");
3068 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
3069 "arrayof_oop_arraycopy");
3070 // Aligned versions without pre-barriers
3071 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
3072 "arrayof_oop_disjoint_arraycopy_uninit",
3073 /*dest_uninitialized*/true);
3074 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, entry, NULL,
3075 "arrayof_oop_arraycopy_uninit",
3076 /*dest_uninitialized*/true);
3077 if (UseCompressedOops) {
3078 // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
3079 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, &entry,
3080 "oop_disjoint_arraycopy");
3081 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
3082 "oop_arraycopy");
3083 // Unaligned versions without pre-barriers
3084 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, &entry,
3085 "oop_disjoint_arraycopy_uninit",
3086 /*dest_uninitialized*/true);
3087 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, entry, NULL,
3088 "oop_arraycopy_uninit",
3089 /*dest_uninitialized*/true);
3090 } else
3091 {
3092 // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
3093 StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy;
3094 StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy;
3095 StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
3096 StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit;
3097 }
3098
3099 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
3100 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
3101 /*dest_uninitialized*/true);
3102
3103 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
3104 entry_jbyte_arraycopy,
3105 entry_jshort_arraycopy,
3106 entry_jint_arraycopy,
3107 entry_jlong_arraycopy);
3108 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
3109 entry_jbyte_arraycopy,
3110 entry_jshort_arraycopy,
4981
4982 void generate_initial() {
4983 // Generates all stubs and initializes the entry points
4984
4985 //------------------------------------------------------------------------------------------------------------------------
4986 // entry points that exist in all platforms
4987 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
4988 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
4989 StubRoutines::_forward_exception_entry = generate_forward_exception();
4990
4991 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
4992 StubRoutines::_catch_exception_entry = generate_catch_exception();
4993
4994 //------------------------------------------------------------------------------------------------------------------------
4995 // entry points that are platform specific
4996 StubRoutines::Sparc::_test_stop_entry = generate_test_stop();
4997
4998 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine();
4999 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
5000
5001 // Build this early so it's available for the interpreter.
5002 StubRoutines::_throw_StackOverflowError_entry =
5003 generate_throw_exception("StackOverflowError throw_exception",
5004 CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
5005 StubRoutines::_throw_delayed_StackOverflowError_entry =
5006 generate_throw_exception("delayed StackOverflowError throw_exception",
5007 CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError));
5008
5009 if (UseCRC32Intrinsics) {
5010 // set table address before stub generation which use it
5011 StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
5012 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
5013 }
5014
5015 if (UseCRC32CIntrinsics) {
5016 // set table address before stub generation which use it
5017 StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
5018 StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
5019 }
5020 }
5088 _stub_count = !all ? 0x100 : 0x200;
5089 if (all) {
5090 generate_all();
5091 } else {
5092 generate_initial();
5093 }
5094
5095 // make sure this stub is available for all local calls
5096 if (_atomic_add_stub.is_unbound()) {
5097 // generate a second time, if necessary
5098 (void) generate_atomic_add();
5099 }
5100 }
5101
5102
5103 private:
5104 int _stub_count;
5105 void stub_prolog(StubCodeDesc* cdesc) {
5106 # ifdef ASSERT
5107 // put extra information in the stub code, to make it more readable
5108 // Write the high part of the address
5109 // [RGV] Check if there is a dependency on the size of this prolog
5110 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none);
5111 __ emit_data((intptr_t)cdesc, relocInfo::none);
5112 __ emit_data(++_stub_count, relocInfo::none);
5113 # endif
5114 align(true);
5115 }
5116
5117 void align(bool at_header = false) {
5118 // %%%%% move this constant somewhere else
5119 // UltraSPARC cache line size is 8 instructions:
5120 const unsigned int icache_line_size = 32;
5121 const unsigned int icache_half_line_size = 16;
5122
5123 if (at_header) {
5124 while ((intptr_t)(__ pc()) % icache_line_size != 0) {
5125 __ emit_data(0, relocInfo::none);
5126 }
5127 } else {
5128 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
5129 __ nop();
5130 }
|