199 __ br(Assembler::greater, false, Assembler::pt, loop); 200 __ delayed()->sub(dst, Interpreter::stackElementSize, dst); 201 202 // done 203 __ BIND(exit); 204 } 205 206 // setup parameters, method & call Java function 207 #ifdef ASSERT 208 // layout_activation_impl checks it's notion of saved SP against 209 // this register, so if this changes update it as well. 210 const Register saved_SP = Lscratch; 211 __ mov(SP, saved_SP); // keep track of SP before call 212 #endif 213 214 // setup parameters 215 const Register t = G3_scratch; 216 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words) 217 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes 218 __ sub(FP, t, Gargs); // setup parameter pointer 219 #ifdef _LP64 220 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias 221 #endif 222 __ mov(SP, O5_savedSP); 223 224 225 // do the call 226 // 227 // the following register must be setup: 228 // 229 // G2_thread 230 // G5_method 231 // Gargs 232 BLOCK_COMMENT("call Java function"); 233 __ jmpl(entry_point.as_in().as_register(), G0, O7); 234 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method 235 236 BLOCK_COMMENT("call_stub_return_address:"); 237 return_pc = __ pc(); 238 239 // The callee, if it wasn't interpreted, can return with SP changed so 240 // we can no longer assert of change of SP. 241 254 // store int result 255 __ st(O0, addr, G0); 256 257 __ BIND(exit); 258 __ ret(); 259 __ delayed()->restore(); 260 261 __ BIND(is_object); 262 __ ba(exit); 263 __ delayed()->st_ptr(O0, addr, G0); 264 265 __ BIND(is_float); 266 __ ba(exit); 267 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); 268 269 __ BIND(is_double); 270 __ ba(exit); 271 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); 272 273 __ BIND(is_long); 274 #ifdef _LP64 275 __ ba(exit); 276 __ delayed()->st_long(O0, addr, G0); // store entire long 277 #else 278 #if defined(COMPILER2) 279 // All return values are where we want them, except for Longs. C2 returns 280 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. 281 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit 282 // build we simply always use G1. 283 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to 284 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node 285 // first which would move g1 -> O0/O1 and destroy the exception we were throwing. 286 287 __ ba(exit); 288 __ delayed()->stx(G1, addr, G0); // store entire long 289 #else 290 __ st(O1, addr, BytesPerInt); 291 __ ba(exit); 292 __ delayed()->st(O0, addr, G0); 293 #endif /* COMPILER2 */ 294 #endif /* _LP64 */ 295 } 296 return start; 297 } 298 299 300 //---------------------------------------------------------------------------------------------------- 301 // Return point for a Java call if there's an exception thrown in Java code. 302 // The exception is caught and transformed into a pending exception stored in 303 // JavaThread that can be tested from within the VM. 304 // 305 // Oexception: exception oop 306 307 address generate_catch_exception() { 308 StubCodeMark mark(this, "StubRoutines", "catch_exception"); 309 310 address start = __ pc(); 311 // verify that thread corresponds 312 __ verify_thread(); 313 314 const Register& temp_reg = Gtemp; 729 730 return start; 731 } 732 Label _atomic_add_stub; // called from other stubs 733 734 735 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super ); 736 // Arguments : 737 // 738 // ret : O0, returned 739 // icc/xcc: set as O0 (depending on wordSize) 740 // sub : O1, argument, not changed 741 // super: O2, argument, not changed 742 // raddr: O7, blown by call 743 address generate_partial_subtype_check() { 744 __ align(CodeEntryAlignment); 745 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 746 address start = __ pc(); 747 Label miss; 748 749 #if defined(COMPILER2) && !defined(_LP64) 750 // Do not use a 'save' because it blows the 64-bit O registers. 751 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) 752 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); 753 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize); 754 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize); 755 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize); 756 Register Rret = O0; 757 Register Rsub = O1; 758 Register Rsuper = O2; 759 #else 760 __ save_frame(0); 761 Register Rret = I0; 762 Register Rsub = I1; 763 Register Rsuper = I2; 764 #endif 765 766 Register L0_ary_len = L0; 767 Register L1_ary_ptr = L1; 768 Register L2_super = L2; 769 Register L3_index = L3; 770 771 __ check_klass_subtype_slow_path(Rsub, Rsuper, 772 L0, L1, L2, L3, 773 NULL, &miss); 774 775 // Match falls through here. 776 __ addcc(G0,0,Rret); // set Z flags, Z result 777 778 #if defined(COMPILER2) && !defined(_LP64) 779 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 780 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 781 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 782 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); 783 __ retl(); // Result in Rret is zero; flags set to Z 784 __ delayed()->add(SP,4*wordSize,SP); 785 #else 786 __ ret(); // Result in Rret is zero; flags set to Z 787 __ delayed()->restore(); 788 #endif 789 790 __ BIND(miss); 791 __ addcc(G0,1,Rret); // set NZ flags, NZ result 792 793 #if defined(COMPILER2) && !defined(_LP64) 794 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 795 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 796 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 797 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3); 798 __ retl(); // Result in Rret is != 0; flags set to NZ 799 __ delayed()->add(SP,4*wordSize,SP); 800 #else 801 __ ret(); // Result in Rret is != 0; flags set to NZ 802 __ delayed()->restore(); 803 #endif 804 805 return start; 806 } 807 808 809 // Called from MacroAssembler::verify_oop 810 // 811 address generate_verify_oop_subroutine() { 812 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); 813 814 address start = __ pc(); 815 816 __ verify_oop_subroutine(); 817 818 return start; 819 } 820 821 822 // 823 // Verify that a register contains clean 32-bits positive value 824 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax). 825 // 826 // Input: 827 // Rint - 32-bits value 828 // Rtmp - scratch 829 // 830 void assert_clean_int(Register Rint, Register Rtmp) { 831 #if defined(ASSERT) && defined(_LP64) 832 __ signx(Rint, Rtmp); 833 __ cmp(Rint, Rtmp); 834 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc); 835 #endif 836 } 837 838 // 839 // Generate overlap test for array copy stubs 840 // 841 // Input: 842 // O0 - array1 843 // O1 - array2 844 // O2 - element count 845 // 846 // Kills temps: O3, O4 847 // 848 void array_overlap_test(address no_overlap_target, int log2_elem_size) { 849 assert(no_overlap_target != NULL, "must be generated"); 850 array_overlap_test(no_overlap_target, NULL, log2_elem_size); 851 } 852 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) { 853 array_overlap_test(NULL, &L_no_overlap, log2_elem_size); 854 } 855 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) { 1252 1253 if (entry != NULL) { 1254 *entry = __ pc(); 1255 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1256 BLOCK_COMMENT("Entry:"); 1257 } 1258 1259 // for short arrays, just do single element copy 1260 __ cmp(count, 23); // 16 + 7 1261 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); 1262 __ delayed()->mov(G0, offset); 1263 1264 if (aligned) { 1265 // 'aligned' == true when it is known statically during compilation 1266 // of this arraycopy call site that both 'from' and 'to' addresses 1267 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1268 // 1269 // Aligned arrays have 4 bytes alignment in 32-bits VM 1270 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM 1271 // 1272 #ifndef _LP64 1273 // copy a 4-bytes word if necessary to align 'to' to 8 bytes 1274 __ andcc(to, 7, G0); 1275 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment); 1276 __ delayed()->ld(from, 0, O3); 1277 __ inc(from, 4); 1278 __ inc(to, 4); 1279 __ dec(count, 4); 1280 __ st(O3, to, -4); 1281 __ BIND(L_skip_alignment); 1282 #endif 1283 } else { 1284 // copy bytes to align 'to' on 8 byte boundary 1285 __ andcc(to, 7, G1); // misaligned bytes 1286 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1287 __ delayed()->neg(G1); 1288 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment 1289 __ sub(count, G1, count); 1290 __ BIND(L_align); 1291 __ ldub(from, 0, O3); 1292 __ deccc(G1); 1293 __ inc(from); 1294 __ stb(O3, to, 0); 1295 __ br(Assembler::notZero, false, Assembler::pt, L_align); 1296 __ delayed()->inc(to); 1297 __ BIND(L_skip_alignment); 1298 } 1299 #ifdef _LP64 1300 if (!aligned) 1301 #endif 1302 { 1303 // Copy with shift 16 bytes per iteration if arrays do not have 1304 // the same alignment mod 8, otherwise fall through to the next 1305 // code for aligned copy. 1306 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. 1307 // Also jump over aligned copy after the copy with shift completed. 1308 1309 copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); 1310 } 1311 1312 // Both array are 8 bytes aligned, copy 16 bytes at a time 1313 __ and3(count, 7, G4); // Save count 1314 __ srl(count, 3, count); 1315 generate_disjoint_long_copy_core(aligned); 1316 __ mov(G4, count); // Restore count 1317 1318 // copy tailing bytes 1319 __ BIND(L_copy_byte); 1320 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 1321 __ align(OptoLoopAlignment); 1378 __ delayed()->add(from, count, end_from); 1379 1380 { 1381 // Align end of arrays since they could be not aligned even 1382 // when arrays itself are aligned. 1383 1384 // copy bytes to align 'end_to' on 8 byte boundary 1385 __ andcc(end_to, 7, G1); // misaligned bytes 1386 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1387 __ delayed()->nop(); 1388 __ sub(count, G1, count); 1389 __ BIND(L_align); 1390 __ dec(end_from); 1391 __ dec(end_to); 1392 __ ldub(end_from, 0, O3); 1393 __ deccc(G1); 1394 __ brx(Assembler::notZero, false, Assembler::pt, L_align); 1395 __ delayed()->stb(O3, end_to, 0); 1396 __ BIND(L_skip_alignment); 1397 } 1398 #ifdef _LP64 1399 if (aligned) { 1400 // Both arrays are aligned to 8-bytes in 64-bits VM. 1401 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 1402 // in unaligned case. 1403 __ dec(count, 16); 1404 } else 1405 #endif 1406 { 1407 // Copy with shift 16 bytes per iteration if arrays do not have 1408 // the same alignment mod 8, otherwise jump to the next 1409 // code for aligned copy (and substracting 16 from 'count' before jump). 1410 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1411 // Also jump over aligned copy after the copy with shift completed. 1412 1413 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, 1414 L_aligned_copy, L_copy_byte); 1415 } 1416 // copy 4 elements (16 bytes) at a time 1417 __ align(OptoLoopAlignment); 1418 __ BIND(L_aligned_copy); 1419 __ dec(end_from, 16); 1420 __ ldx(end_from, 8, O3); 1421 __ ldx(end_from, 0, O4); 1422 __ dec(end_to, 16); 1423 __ deccc(count, 16); 1424 __ stx(O3, end_to, 8); 1425 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1473 1474 if (entry != NULL) { 1475 *entry = __ pc(); 1476 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1477 BLOCK_COMMENT("Entry:"); 1478 } 1479 1480 // for short arrays, just do single element copy 1481 __ cmp(count, 11); // 8 + 3 (22 bytes) 1482 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); 1483 __ delayed()->mov(G0, offset); 1484 1485 if (aligned) { 1486 // 'aligned' == true when it is known statically during compilation 1487 // of this arraycopy call site that both 'from' and 'to' addresses 1488 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1489 // 1490 // Aligned arrays have 4 bytes alignment in 32-bits VM 1491 // and 8 bytes - in 64-bits VM. 1492 // 1493 #ifndef _LP64 1494 // copy a 2-elements word if necessary to align 'to' to 8 bytes 1495 __ andcc(to, 7, G0); 1496 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1497 __ delayed()->ld(from, 0, O3); 1498 __ inc(from, 4); 1499 __ inc(to, 4); 1500 __ dec(count, 2); 1501 __ st(O3, to, -4); 1502 __ BIND(L_skip_alignment); 1503 #endif 1504 } else { 1505 // copy 1 element if necessary to align 'to' on an 4 bytes 1506 __ andcc(to, 3, G0); 1507 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1508 __ delayed()->lduh(from, 0, O3); 1509 __ inc(from, 2); 1510 __ inc(to, 2); 1511 __ dec(count); 1512 __ sth(O3, to, -2); 1513 __ BIND(L_skip_alignment); 1514 1515 // copy 2 elements to align 'to' on an 8 byte boundary 1516 __ andcc(to, 7, G0); 1517 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1518 __ delayed()->lduh(from, 0, O3); 1519 __ dec(count, 2); 1520 __ lduh(from, 2, O4); 1521 __ inc(from, 4); 1522 __ inc(to, 4); 1523 __ sth(O3, to, -4); 1524 __ sth(O4, to, -2); 1525 __ BIND(L_skip_alignment2); 1526 } 1527 #ifdef _LP64 1528 if (!aligned) 1529 #endif 1530 { 1531 // Copy with shift 16 bytes per iteration if arrays do not have 1532 // the same alignment mod 8, otherwise fall through to the next 1533 // code for aligned copy. 1534 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1535 // Also jump over aligned copy after the copy with shift completed. 1536 1537 copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); 1538 } 1539 1540 // Both array are 8 bytes aligned, copy 16 bytes at a time 1541 __ and3(count, 3, G4); // Save 1542 __ srl(count, 2, count); 1543 generate_disjoint_long_copy_core(aligned); 1544 __ mov(G4, count); // restore 1545 1546 // copy 1 element at a time 1547 __ BIND(L_copy_2_bytes); 1548 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 1549 __ align(OptoLoopAlignment); 1626 // align source address at 4 bytes address boundary 1627 if (t == T_BYTE) { 1628 // One byte misalignment happens only for byte arrays 1629 __ andcc(to, 1, G0); 1630 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1); 1631 __ delayed()->nop(); 1632 __ stb(value, to, 0); 1633 __ inc(to, 1); 1634 __ dec(count, 1); 1635 __ BIND(L_skip_align1); 1636 } 1637 // Two bytes misalignment happens only for byte and short (char) arrays 1638 __ andcc(to, 2, G0); 1639 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2); 1640 __ delayed()->nop(); 1641 __ sth(value, to, 0); 1642 __ inc(to, 2); 1643 __ dec(count, 1 << (shift - 1)); 1644 __ BIND(L_skip_align2); 1645 } 1646 #ifdef _LP64 1647 if (!aligned) { 1648 #endif 1649 // align to 8 bytes, we know we are 4 byte aligned to start 1650 __ andcc(to, 7, G0); 1651 __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes); 1652 __ delayed()->nop(); 1653 __ stw(value, to, 0); 1654 __ inc(to, 4); 1655 __ dec(count, 1 << shift); 1656 __ BIND(L_fill_32_bytes); 1657 #ifdef _LP64 1658 } 1659 #endif 1660 1661 if (t == T_INT) { 1662 // Zero extend value 1663 __ srl(value, 0, value); 1664 } 1665 if (t == T_BYTE || t == T_SHORT || t == T_INT) { 1666 __ sllx(value, 32, O3); 1667 __ or3(value, O3, value); 1668 } 1669 1670 Label L_check_fill_8_bytes; 1671 // Fill 32-byte chunks 1672 __ subcc(count, 8 << shift, count); 1673 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes); 1674 __ delayed()->nop(); 1675 1676 Label L_fill_32_bytes_loop, L_fill_4_bytes; 1677 __ align(16); 1678 __ BIND(L_fill_32_bytes_loop); 1679 1840 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1841 __ delayed()->lduh(end_from, -2, O3); 1842 __ dec(end_from, 2); 1843 __ dec(end_to, 2); 1844 __ dec(count); 1845 __ sth(O3, end_to, 0); 1846 __ BIND(L_skip_alignment); 1847 1848 // copy 2 elements to align 'end_to' on an 8 byte boundary 1849 __ andcc(end_to, 7, G0); 1850 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1851 __ delayed()->lduh(end_from, -2, O3); 1852 __ dec(count, 2); 1853 __ lduh(end_from, -4, O4); 1854 __ dec(end_from, 4); 1855 __ dec(end_to, 4); 1856 __ sth(O3, end_to, 2); 1857 __ sth(O4, end_to, 0); 1858 __ BIND(L_skip_alignment2); 1859 } 1860 #ifdef _LP64 1861 if (aligned) { 1862 // Both arrays are aligned to 8-bytes in 64-bits VM. 1863 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 1864 // in unaligned case. 1865 __ dec(count, 8); 1866 } else 1867 #endif 1868 { 1869 // Copy with shift 16 bytes per iteration if arrays do not have 1870 // the same alignment mod 8, otherwise jump to the next 1871 // code for aligned copy (and substracting 8 from 'count' before jump). 1872 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1873 // Also jump over aligned copy after the copy with shift completed. 1874 1875 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, 1876 L_aligned_copy, L_copy_2_bytes); 1877 } 1878 // copy 4 elements (16 bytes) at a time 1879 __ align(OptoLoopAlignment); 1880 __ BIND(L_aligned_copy); 1881 __ dec(end_from, 16); 1882 __ ldx(end_from, 8, O3); 1883 __ ldx(end_from, 0, O4); 1884 __ dec(end_to, 16); 1885 __ deccc(count, 8); 1886 __ stx(O3, end_to, 8); 1887 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1957 // count: O2 treated as signed 1958 // 1959 void generate_disjoint_int_copy_core(bool aligned) { 1960 1961 Label L_skip_alignment, L_aligned_copy; 1962 Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 1963 1964 const Register from = O0; // source array address 1965 const Register to = O1; // destination array address 1966 const Register count = O2; // elements count 1967 const Register offset = O5; // offset from start of arrays 1968 // O3, O4, G3, G4 are used as temp registers 1969 1970 // 'aligned' == true when it is known statically during compilation 1971 // of this arraycopy call site that both 'from' and 'to' addresses 1972 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1973 // 1974 // Aligned arrays have 4 bytes alignment in 32-bits VM 1975 // and 8 bytes - in 64-bits VM. 1976 // 1977 #ifdef _LP64 1978 if (!aligned) 1979 #endif 1980 { 1981 // The next check could be put under 'ifndef' since the code in 1982 // generate_disjoint_long_copy_core() has own checks and set 'offset'. 1983 1984 // for short arrays, just do single element copy 1985 __ cmp(count, 5); // 4 + 1 (20 bytes) 1986 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); 1987 __ delayed()->mov(G0, offset); 1988 1989 // copy 1 element to align 'to' on an 8 byte boundary 1990 __ andcc(to, 7, G0); 1991 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1992 __ delayed()->ld(from, 0, O3); 1993 __ inc(from, 4); 1994 __ inc(to, 4); 1995 __ dec(count); 1996 __ st(O3, to, -4); 1997 __ BIND(L_skip_alignment); 1998 1999 // if arrays have same alignment mod 8, do 4 elements copy 2446 const Register from = O0; // source array address 2447 const Register to = O1; // destination array address 2448 const Register count = O2; // elements count 2449 2450 __ align(CodeEntryAlignment); 2451 StubCodeMark mark(this, "StubRoutines", name); 2452 address start = __ pc(); 2453 2454 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2455 2456 if (entry != NULL) { 2457 *entry = __ pc(); 2458 // caller can pass a 64-bit byte count here 2459 BLOCK_COMMENT("Entry:"); 2460 } 2461 2462 // save arguments for barrier generation 2463 __ mov(to, G1); 2464 __ mov(count, G5); 2465 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized); 2466 #ifdef _LP64 2467 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2468 if (UseCompressedOops) { 2469 generate_disjoint_int_copy_core(aligned); 2470 } else { 2471 generate_disjoint_long_copy_core(aligned); 2472 } 2473 #else 2474 generate_disjoint_int_copy_core(aligned); 2475 #endif 2476 // O0 is used as temp register 2477 gen_write_ref_array_post_barrier(G1, G5, O0); 2478 2479 // O3, O4 are used as temp registers 2480 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2481 __ retl(); 2482 __ delayed()->mov(G0, O0); // return 0 2483 return start; 2484 } 2485 2486 // Generate stub for conjoint oop copy. If "aligned" is true, the 2487 // "from" and "to" addresses are assumed to be heapword aligned. 2488 // 2489 // Arguments for generated stub: 2490 // from: O0 2491 // to: O1 2492 // count: O2 treated as signed 2493 // 2494 address generate_conjoint_oop_copy(bool aligned, address nooverlap_target, 2495 address *entry, const char *name, 2501 2502 __ align(CodeEntryAlignment); 2503 StubCodeMark mark(this, "StubRoutines", name); 2504 address start = __ pc(); 2505 2506 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2507 2508 if (entry != NULL) { 2509 *entry = __ pc(); 2510 // caller can pass a 64-bit byte count here 2511 BLOCK_COMMENT("Entry:"); 2512 } 2513 2514 array_overlap_test(nooverlap_target, LogBytesPerHeapOop); 2515 2516 // save arguments for barrier generation 2517 __ mov(to, G1); 2518 __ mov(count, G5); 2519 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized); 2520 2521 #ifdef _LP64 2522 if (UseCompressedOops) { 2523 generate_conjoint_int_copy_core(aligned); 2524 } else { 2525 generate_conjoint_long_copy_core(aligned); 2526 } 2527 #else 2528 generate_conjoint_int_copy_core(aligned); 2529 #endif 2530 2531 // O0 is used as temp register 2532 gen_write_ref_array_post_barrier(G1, G5, O0); 2533 2534 // O3, O4 are used as temp registers 2535 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2536 __ retl(); 2537 __ delayed()->mov(G0, O0); // return 0 2538 return start; 2539 } 2540 2541 2542 // Helper for generating a dynamic type check. 2543 // Smashes only the given temp registers. 2544 void generate_type_check(Register sub_klass, 2545 Register super_check_offset, 2546 Register super_klass, 2547 Register temp, 2548 Label& L_success) { 2549 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); 3121 "arrayof_jbyte_arraycopy"); 3122 3123 //*** jshort 3124 // Always need aligned and unaligned versions 3125 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, 3126 "jshort_disjoint_arraycopy"); 3127 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, 3128 &entry_jshort_arraycopy, 3129 "jshort_arraycopy"); 3130 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, 3131 "arrayof_jshort_disjoint_arraycopy"); 3132 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, 3133 "arrayof_jshort_arraycopy"); 3134 3135 //*** jint 3136 // Aligned versions 3137 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, 3138 "arrayof_jint_disjoint_arraycopy"); 3139 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, 3140 "arrayof_jint_arraycopy"); 3141 #ifdef _LP64 3142 // In 64 bit we need both aligned and unaligned versions of jint arraycopy. 3143 // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it). 3144 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, 3145 "jint_disjoint_arraycopy"); 3146 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, 3147 &entry_jint_arraycopy, 3148 "jint_arraycopy"); 3149 #else 3150 // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version 3151 // (in fact in 32bit we always have a pre-loop part even in the aligned version, 3152 // because it uses 64-bit loads/stores, so the aligned flag is actually ignored). 3153 StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy; 3154 StubRoutines::_jint_arraycopy = StubRoutines::_arrayof_jint_arraycopy; 3155 #endif 3156 3157 3158 //*** jlong 3159 // It is always aligned 3160 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, 3161 "arrayof_jlong_disjoint_arraycopy"); 3162 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, 3163 "arrayof_jlong_arraycopy"); 3164 StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; 3165 StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; 3166 3167 3168 //*** oops 3169 // Aligned versions 3170 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, &entry, 3171 "arrayof_oop_disjoint_arraycopy"); 3172 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy, 3173 "arrayof_oop_arraycopy"); 3174 // Aligned versions without pre-barriers 3175 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry, 3176 "arrayof_oop_disjoint_arraycopy_uninit", 3177 /*dest_uninitialized*/true); 3178 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, entry, NULL, 3179 "arrayof_oop_arraycopy_uninit", 3180 /*dest_uninitialized*/true); 3181 #ifdef _LP64 3182 if (UseCompressedOops) { 3183 // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy. 3184 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, &entry, 3185 "oop_disjoint_arraycopy"); 3186 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy, 3187 "oop_arraycopy"); 3188 // Unaligned versions without pre-barriers 3189 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, &entry, 3190 "oop_disjoint_arraycopy_uninit", 3191 /*dest_uninitialized*/true); 3192 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, entry, NULL, 3193 "oop_arraycopy_uninit", 3194 /*dest_uninitialized*/true); 3195 } else 3196 #endif 3197 { 3198 // oop arraycopy is always aligned on 32bit and 64bit without compressed oops 3199 StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; 3200 StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; 3201 StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; 3202 StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; 3203 } 3204 3205 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); 3206 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, 3207 /*dest_uninitialized*/true); 3208 3209 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", 3210 entry_jbyte_arraycopy, 3211 entry_jshort_arraycopy, 3212 entry_jint_arraycopy, 3213 entry_jlong_arraycopy); 3214 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", 3215 entry_jbyte_arraycopy, 3216 entry_jshort_arraycopy, 5087 5088 void generate_initial() { 5089 // Generates all stubs and initializes the entry points 5090 5091 //------------------------------------------------------------------------------------------------------------------------ 5092 // entry points that exist in all platforms 5093 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than 5094 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. 5095 StubRoutines::_forward_exception_entry = generate_forward_exception(); 5096 5097 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); 5098 StubRoutines::_catch_exception_entry = generate_catch_exception(); 5099 5100 //------------------------------------------------------------------------------------------------------------------------ 5101 // entry points that are platform specific 5102 StubRoutines::Sparc::_test_stop_entry = generate_test_stop(); 5103 5104 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine(); 5105 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows(); 5106 5107 #if !defined(COMPILER2) && !defined(_LP64) 5108 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); 5109 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg(); 5110 StubRoutines::_atomic_add_entry = generate_atomic_add(); 5111 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry; 5112 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry; 5113 StubRoutines::_atomic_cmpxchg_byte_entry = ShouldNotCallThisStub(); 5114 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); 5115 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry; 5116 #endif // COMPILER2 !=> _LP64 5117 5118 // Build this early so it's available for the interpreter. 5119 StubRoutines::_throw_StackOverflowError_entry = 5120 generate_throw_exception("StackOverflowError throw_exception", 5121 CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); 5122 StubRoutines::_throw_delayed_StackOverflowError_entry = 5123 generate_throw_exception("delayed StackOverflowError throw_exception", 5124 CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError)); 5125 5126 if (UseCRC32Intrinsics) { 5127 // set table address before stub generation which use it 5128 StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table; 5129 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); 5130 } 5131 5132 if (UseCRC32CIntrinsics) { 5133 // set table address before stub generation which use it 5134 StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table; 5135 StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); 5136 } 5137 } 5205 _stub_count = !all ? 0x100 : 0x200; 5206 if (all) { 5207 generate_all(); 5208 } else { 5209 generate_initial(); 5210 } 5211 5212 // make sure this stub is available for all local calls 5213 if (_atomic_add_stub.is_unbound()) { 5214 // generate a second time, if necessary 5215 (void) generate_atomic_add(); 5216 } 5217 } 5218 5219 5220 private: 5221 int _stub_count; 5222 void stub_prolog(StubCodeDesc* cdesc) { 5223 # ifdef ASSERT 5224 // put extra information in the stub code, to make it more readable 5225 #ifdef _LP64 5226 // Write the high part of the address 5227 // [RGV] Check if there is a dependency on the size of this prolog 5228 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none); 5229 #endif 5230 __ emit_data((intptr_t)cdesc, relocInfo::none); 5231 __ emit_data(++_stub_count, relocInfo::none); 5232 # endif 5233 align(true); 5234 } 5235 5236 void align(bool at_header = false) { 5237 // %%%%% move this constant somewhere else 5238 // UltraSPARC cache line size is 8 instructions: 5239 const unsigned int icache_line_size = 32; 5240 const unsigned int icache_half_line_size = 16; 5241 5242 if (at_header) { 5243 while ((intptr_t)(__ pc()) % icache_line_size != 0) { 5244 __ emit_data(0, relocInfo::none); 5245 } 5246 } else { 5247 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { 5248 __ nop(); 5249 } | 199 __ br(Assembler::greater, false, Assembler::pt, loop); 200 __ delayed()->sub(dst, Interpreter::stackElementSize, dst); 201 202 // done 203 __ BIND(exit); 204 } 205 206 // setup parameters, method & call Java function 207 #ifdef ASSERT 208 // layout_activation_impl checks it's notion of saved SP against 209 // this register, so if this changes update it as well. 210 const Register saved_SP = Lscratch; 211 __ mov(SP, saved_SP); // keep track of SP before call 212 #endif 213 214 // setup parameters 215 const Register t = G3_scratch; 216 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words) 217 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes 218 __ sub(FP, t, Gargs); // setup parameter pointer 219 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias 220 __ mov(SP, O5_savedSP); 221 222 223 // do the call 224 // 225 // the following register must be setup: 226 // 227 // G2_thread 228 // G5_method 229 // Gargs 230 BLOCK_COMMENT("call Java function"); 231 __ jmpl(entry_point.as_in().as_register(), G0, O7); 232 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method 233 234 BLOCK_COMMENT("call_stub_return_address:"); 235 return_pc = __ pc(); 236 237 // The callee, if it wasn't interpreted, can return with SP changed so 238 // we can no longer assert of change of SP. 239 252 // store int result 253 __ st(O0, addr, G0); 254 255 __ BIND(exit); 256 __ ret(); 257 __ delayed()->restore(); 258 259 __ BIND(is_object); 260 __ ba(exit); 261 __ delayed()->st_ptr(O0, addr, G0); 262 263 __ BIND(is_float); 264 __ ba(exit); 265 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0); 266 267 __ BIND(is_double); 268 __ ba(exit); 269 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0); 270 271 __ BIND(is_long); 272 __ ba(exit); 273 __ delayed()->st_long(O0, addr, G0); // store entire long 274 } 275 return start; 276 } 277 278 279 //---------------------------------------------------------------------------------------------------- 280 // Return point for a Java call if there's an exception thrown in Java code. 281 // The exception is caught and transformed into a pending exception stored in 282 // JavaThread that can be tested from within the VM. 283 // 284 // Oexception: exception oop 285 286 address generate_catch_exception() { 287 StubCodeMark mark(this, "StubRoutines", "catch_exception"); 288 289 address start = __ pc(); 290 // verify that thread corresponds 291 __ verify_thread(); 292 293 const Register& temp_reg = Gtemp; 708 709 return start; 710 } 711 Label _atomic_add_stub; // called from other stubs 712 713 714 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super ); 715 // Arguments : 716 // 717 // ret : O0, returned 718 // icc/xcc: set as O0 (depending on wordSize) 719 // sub : O1, argument, not changed 720 // super: O2, argument, not changed 721 // raddr: O7, blown by call 722 address generate_partial_subtype_check() { 723 __ align(CodeEntryAlignment); 724 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 725 address start = __ pc(); 726 Label miss; 727 728 __ save_frame(0); 729 Register Rret = I0; 730 Register Rsub = I1; 731 Register Rsuper = I2; 732 733 Register L0_ary_len = L0; 734 Register L1_ary_ptr = L1; 735 Register L2_super = L2; 736 Register L3_index = L3; 737 738 __ check_klass_subtype_slow_path(Rsub, Rsuper, 739 L0, L1, L2, L3, 740 NULL, &miss); 741 742 // Match falls through here. 743 __ addcc(G0,0,Rret); // set Z flags, Z result 744 745 __ ret(); // Result in Rret is zero; flags set to Z 746 __ delayed()->restore(); 747 748 __ BIND(miss); 749 __ addcc(G0,1,Rret); // set NZ flags, NZ result 750 751 __ ret(); // Result in Rret is != 0; flags set to NZ 752 __ delayed()->restore(); 753 754 return start; 755 } 756 757 758 // Called from MacroAssembler::verify_oop 759 // 760 address generate_verify_oop_subroutine() { 761 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); 762 763 address start = __ pc(); 764 765 __ verify_oop_subroutine(); 766 767 return start; 768 } 769 770 771 // 772 // Verify that a register contains clean 32-bits positive value 773 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax). 774 // 775 // Input: 776 // Rint - 32-bits value 777 // Rtmp - scratch 778 // 779 void assert_clean_int(Register Rint, Register Rtmp) { 780 } 781 782 // 783 // Generate overlap test for array copy stubs 784 // 785 // Input: 786 // O0 - array1 787 // O1 - array2 788 // O2 - element count 789 // 790 // Kills temps: O3, O4 791 // 792 void array_overlap_test(address no_overlap_target, int log2_elem_size) { 793 assert(no_overlap_target != NULL, "must be generated"); 794 array_overlap_test(no_overlap_target, NULL, log2_elem_size); 795 } 796 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) { 797 array_overlap_test(NULL, &L_no_overlap, log2_elem_size); 798 } 799 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) { 1196 1197 if (entry != NULL) { 1198 *entry = __ pc(); 1199 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1200 BLOCK_COMMENT("Entry:"); 1201 } 1202 1203 // for short arrays, just do single element copy 1204 __ cmp(count, 23); // 16 + 7 1205 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); 1206 __ delayed()->mov(G0, offset); 1207 1208 if (aligned) { 1209 // 'aligned' == true when it is known statically during compilation 1210 // of this arraycopy call site that both 'from' and 'to' addresses 1211 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1212 // 1213 // Aligned arrays have 4 bytes alignment in 32-bits VM 1214 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM 1215 // 1216 } else { 1217 // copy bytes to align 'to' on 8 byte boundary 1218 __ andcc(to, 7, G1); // misaligned bytes 1219 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1220 __ delayed()->neg(G1); 1221 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment 1222 __ sub(count, G1, count); 1223 __ BIND(L_align); 1224 __ ldub(from, 0, O3); 1225 __ deccc(G1); 1226 __ inc(from); 1227 __ stb(O3, to, 0); 1228 __ br(Assembler::notZero, false, Assembler::pt, L_align); 1229 __ delayed()->inc(to); 1230 __ BIND(L_skip_alignment); 1231 } 1232 if (!aligned) 1233 { 1234 // Copy with shift 16 bytes per iteration if arrays do not have 1235 // the same alignment mod 8, otherwise fall through to the next 1236 // code for aligned copy. 1237 // The compare above (count >= 23) guarantes 'count' >= 16 bytes. 1238 // Also jump over aligned copy after the copy with shift completed. 1239 1240 copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); 1241 } 1242 1243 // Both array are 8 bytes aligned, copy 16 bytes at a time 1244 __ and3(count, 7, G4); // Save count 1245 __ srl(count, 3, count); 1246 generate_disjoint_long_copy_core(aligned); 1247 __ mov(G4, count); // Restore count 1248 1249 // copy tailing bytes 1250 __ BIND(L_copy_byte); 1251 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 1252 __ align(OptoLoopAlignment); 1309 __ delayed()->add(from, count, end_from); 1310 1311 { 1312 // Align end of arrays since they could be not aligned even 1313 // when arrays itself are aligned. 1314 1315 // copy bytes to align 'end_to' on 8 byte boundary 1316 __ andcc(end_to, 7, G1); // misaligned bytes 1317 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1318 __ delayed()->nop(); 1319 __ sub(count, G1, count); 1320 __ BIND(L_align); 1321 __ dec(end_from); 1322 __ dec(end_to); 1323 __ ldub(end_from, 0, O3); 1324 __ deccc(G1); 1325 __ brx(Assembler::notZero, false, Assembler::pt, L_align); 1326 __ delayed()->stb(O3, end_to, 0); 1327 __ BIND(L_skip_alignment); 1328 } 1329 if (aligned) { 1330 // Both arrays are aligned to 8-bytes in 64-bits VM. 1331 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 1332 // in unaligned case. 1333 __ dec(count, 16); 1334 } else 1335 { 1336 // Copy with shift 16 bytes per iteration if arrays do not have 1337 // the same alignment mod 8, otherwise jump to the next 1338 // code for aligned copy (and substracting 16 from 'count' before jump). 1339 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1340 // Also jump over aligned copy after the copy with shift completed. 1341 1342 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, 1343 L_aligned_copy, L_copy_byte); 1344 } 1345 // copy 4 elements (16 bytes) at a time 1346 __ align(OptoLoopAlignment); 1347 __ BIND(L_aligned_copy); 1348 __ dec(end_from, 16); 1349 __ ldx(end_from, 8, O3); 1350 __ ldx(end_from, 0, O4); 1351 __ dec(end_to, 16); 1352 __ deccc(count, 16); 1353 __ stx(O3, end_to, 8); 1354 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1402 1403 if (entry != NULL) { 1404 *entry = __ pc(); 1405 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1406 BLOCK_COMMENT("Entry:"); 1407 } 1408 1409 // for short arrays, just do single element copy 1410 __ cmp(count, 11); // 8 + 3 (22 bytes) 1411 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); 1412 __ delayed()->mov(G0, offset); 1413 1414 if (aligned) { 1415 // 'aligned' == true when it is known statically during compilation 1416 // of this arraycopy call site that both 'from' and 'to' addresses 1417 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1418 // 1419 // Aligned arrays have 4 bytes alignment in 32-bits VM 1420 // and 8 bytes - in 64-bits VM. 1421 // 1422 } else { 1423 // copy 1 element if necessary to align 'to' on an 4 bytes 1424 __ andcc(to, 3, G0); 1425 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1426 __ delayed()->lduh(from, 0, O3); 1427 __ inc(from, 2); 1428 __ inc(to, 2); 1429 __ dec(count); 1430 __ sth(O3, to, -2); 1431 __ BIND(L_skip_alignment); 1432 1433 // copy 2 elements to align 'to' on an 8 byte boundary 1434 __ andcc(to, 7, G0); 1435 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1436 __ delayed()->lduh(from, 0, O3); 1437 __ dec(count, 2); 1438 __ lduh(from, 2, O4); 1439 __ inc(from, 4); 1440 __ inc(to, 4); 1441 __ sth(O3, to, -4); 1442 __ sth(O4, to, -2); 1443 __ BIND(L_skip_alignment2); 1444 } 1445 if (!aligned) 1446 { 1447 // Copy with shift 16 bytes per iteration if arrays do not have 1448 // the same alignment mod 8, otherwise fall through to the next 1449 // code for aligned copy. 1450 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1451 // Also jump over aligned copy after the copy with shift completed. 1452 1453 copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); 1454 } 1455 1456 // Both array are 8 bytes aligned, copy 16 bytes at a time 1457 __ and3(count, 3, G4); // Save 1458 __ srl(count, 2, count); 1459 generate_disjoint_long_copy_core(aligned); 1460 __ mov(G4, count); // restore 1461 1462 // copy 1 element at a time 1463 __ BIND(L_copy_2_bytes); 1464 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); 1465 __ align(OptoLoopAlignment); 1542 // align source address at 4 bytes address boundary 1543 if (t == T_BYTE) { 1544 // One byte misalignment happens only for byte arrays 1545 __ andcc(to, 1, G0); 1546 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1); 1547 __ delayed()->nop(); 1548 __ stb(value, to, 0); 1549 __ inc(to, 1); 1550 __ dec(count, 1); 1551 __ BIND(L_skip_align1); 1552 } 1553 // Two bytes misalignment happens only for byte and short (char) arrays 1554 __ andcc(to, 2, G0); 1555 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2); 1556 __ delayed()->nop(); 1557 __ sth(value, to, 0); 1558 __ inc(to, 2); 1559 __ dec(count, 1 << (shift - 1)); 1560 __ BIND(L_skip_align2); 1561 } 1562 if (!aligned) { 1563 // align to 8 bytes, we know we are 4 byte aligned to start 1564 __ andcc(to, 7, G0); 1565 __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes); 1566 __ delayed()->nop(); 1567 __ stw(value, to, 0); 1568 __ inc(to, 4); 1569 __ dec(count, 1 << shift); 1570 __ BIND(L_fill_32_bytes); 1571 } 1572 1573 if (t == T_INT) { 1574 // Zero extend value 1575 __ srl(value, 0, value); 1576 } 1577 if (t == T_BYTE || t == T_SHORT || t == T_INT) { 1578 __ sllx(value, 32, O3); 1579 __ or3(value, O3, value); 1580 } 1581 1582 Label L_check_fill_8_bytes; 1583 // Fill 32-byte chunks 1584 __ subcc(count, 8 << shift, count); 1585 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes); 1586 __ delayed()->nop(); 1587 1588 Label L_fill_32_bytes_loop, L_fill_4_bytes; 1589 __ align(16); 1590 __ BIND(L_fill_32_bytes_loop); 1591 1752 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1753 __ delayed()->lduh(end_from, -2, O3); 1754 __ dec(end_from, 2); 1755 __ dec(end_to, 2); 1756 __ dec(count); 1757 __ sth(O3, end_to, 0); 1758 __ BIND(L_skip_alignment); 1759 1760 // copy 2 elements to align 'end_to' on an 8 byte boundary 1761 __ andcc(end_to, 7, G0); 1762 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); 1763 __ delayed()->lduh(end_from, -2, O3); 1764 __ dec(count, 2); 1765 __ lduh(end_from, -4, O4); 1766 __ dec(end_from, 4); 1767 __ dec(end_to, 4); 1768 __ sth(O3, end_to, 2); 1769 __ sth(O4, end_to, 0); 1770 __ BIND(L_skip_alignment2); 1771 } 1772 if (aligned) { 1773 // Both arrays are aligned to 8-bytes in 64-bits VM. 1774 // The 'count' is decremented in copy_16_bytes_backward_with_shift() 1775 // in unaligned case. 1776 __ dec(count, 8); 1777 } else 1778 { 1779 // Copy with shift 16 bytes per iteration if arrays do not have 1780 // the same alignment mod 8, otherwise jump to the next 1781 // code for aligned copy (and substracting 8 from 'count' before jump). 1782 // The compare above (count >= 11) guarantes 'count' >= 16 bytes. 1783 // Also jump over aligned copy after the copy with shift completed. 1784 1785 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, 1786 L_aligned_copy, L_copy_2_bytes); 1787 } 1788 // copy 4 elements (16 bytes) at a time 1789 __ align(OptoLoopAlignment); 1790 __ BIND(L_aligned_copy); 1791 __ dec(end_from, 16); 1792 __ ldx(end_from, 8, O3); 1793 __ ldx(end_from, 0, O4); 1794 __ dec(end_to, 16); 1795 __ deccc(count, 8); 1796 __ stx(O3, end_to, 8); 1797 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); 1867 // count: O2 treated as signed 1868 // 1869 void generate_disjoint_int_copy_core(bool aligned) { 1870 1871 Label L_skip_alignment, L_aligned_copy; 1872 Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit; 1873 1874 const Register from = O0; // source array address 1875 const Register to = O1; // destination array address 1876 const Register count = O2; // elements count 1877 const Register offset = O5; // offset from start of arrays 1878 // O3, O4, G3, G4 are used as temp registers 1879 1880 // 'aligned' == true when it is known statically during compilation 1881 // of this arraycopy call site that both 'from' and 'to' addresses 1882 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). 1883 // 1884 // Aligned arrays have 4 bytes alignment in 32-bits VM 1885 // and 8 bytes - in 64-bits VM. 1886 // 1887 if (!aligned) 1888 { 1889 // The next check could be put under 'ifndef' since the code in 1890 // generate_disjoint_long_copy_core() has own checks and set 'offset'. 1891 1892 // for short arrays, just do single element copy 1893 __ cmp(count, 5); // 4 + 1 (20 bytes) 1894 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes); 1895 __ delayed()->mov(G0, offset); 1896 1897 // copy 1 element to align 'to' on an 8 byte boundary 1898 __ andcc(to, 7, G0); 1899 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); 1900 __ delayed()->ld(from, 0, O3); 1901 __ inc(from, 4); 1902 __ inc(to, 4); 1903 __ dec(count); 1904 __ st(O3, to, -4); 1905 __ BIND(L_skip_alignment); 1906 1907 // if arrays have same alignment mod 8, do 4 elements copy 2354 const Register from = O0; // source array address 2355 const Register to = O1; // destination array address 2356 const Register count = O2; // elements count 2357 2358 __ align(CodeEntryAlignment); 2359 StubCodeMark mark(this, "StubRoutines", name); 2360 address start = __ pc(); 2361 2362 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2363 2364 if (entry != NULL) { 2365 *entry = __ pc(); 2366 // caller can pass a 64-bit byte count here 2367 BLOCK_COMMENT("Entry:"); 2368 } 2369 2370 // save arguments for barrier generation 2371 __ mov(to, G1); 2372 __ mov(count, G5); 2373 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized); 2374 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2375 if (UseCompressedOops) { 2376 generate_disjoint_int_copy_core(aligned); 2377 } else { 2378 generate_disjoint_long_copy_core(aligned); 2379 } 2380 // O0 is used as temp register 2381 gen_write_ref_array_post_barrier(G1, G5, O0); 2382 2383 // O3, O4 are used as temp registers 2384 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2385 __ retl(); 2386 __ delayed()->mov(G0, O0); // return 0 2387 return start; 2388 } 2389 2390 // Generate stub for conjoint oop copy. If "aligned" is true, the 2391 // "from" and "to" addresses are assumed to be heapword aligned. 2392 // 2393 // Arguments for generated stub: 2394 // from: O0 2395 // to: O1 2396 // count: O2 treated as signed 2397 // 2398 address generate_conjoint_oop_copy(bool aligned, address nooverlap_target, 2399 address *entry, const char *name, 2405 2406 __ align(CodeEntryAlignment); 2407 StubCodeMark mark(this, "StubRoutines", name); 2408 address start = __ pc(); 2409 2410 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2411 2412 if (entry != NULL) { 2413 *entry = __ pc(); 2414 // caller can pass a 64-bit byte count here 2415 BLOCK_COMMENT("Entry:"); 2416 } 2417 2418 array_overlap_test(nooverlap_target, LogBytesPerHeapOop); 2419 2420 // save arguments for barrier generation 2421 __ mov(to, G1); 2422 __ mov(count, G5); 2423 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized); 2424 2425 if (UseCompressedOops) { 2426 generate_conjoint_int_copy_core(aligned); 2427 } else { 2428 generate_conjoint_long_copy_core(aligned); 2429 } 2430 2431 // O0 is used as temp register 2432 gen_write_ref_array_post_barrier(G1, G5, O0); 2433 2434 // O3, O4 are used as temp registers 2435 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4); 2436 __ retl(); 2437 __ delayed()->mov(G0, O0); // return 0 2438 return start; 2439 } 2440 2441 2442 // Helper for generating a dynamic type check. 2443 // Smashes only the given temp registers. 2444 void generate_type_check(Register sub_klass, 2445 Register super_check_offset, 2446 Register super_klass, 2447 Register temp, 2448 Label& L_success) { 2449 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); 3021 "arrayof_jbyte_arraycopy"); 3022 3023 //*** jshort 3024 // Always need aligned and unaligned versions 3025 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, 3026 "jshort_disjoint_arraycopy"); 3027 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, 3028 &entry_jshort_arraycopy, 3029 "jshort_arraycopy"); 3030 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, 3031 "arrayof_jshort_disjoint_arraycopy"); 3032 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, 3033 "arrayof_jshort_arraycopy"); 3034 3035 //*** jint 3036 // Aligned versions 3037 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, 3038 "arrayof_jint_disjoint_arraycopy"); 3039 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, 3040 "arrayof_jint_arraycopy"); 3041 // In 64 bit we need both aligned and unaligned versions of jint arraycopy. 3042 // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it). 3043 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, 3044 "jint_disjoint_arraycopy"); 3045 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, 3046 &entry_jint_arraycopy, 3047 "jint_arraycopy"); 3048 3049 //*** jlong 3050 // It is always aligned 3051 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, 3052 "arrayof_jlong_disjoint_arraycopy"); 3053 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, 3054 "arrayof_jlong_arraycopy"); 3055 StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; 3056 StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; 3057 3058 3059 //*** oops 3060 // Aligned versions 3061 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, &entry, 3062 "arrayof_oop_disjoint_arraycopy"); 3063 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy, 3064 "arrayof_oop_arraycopy"); 3065 // Aligned versions without pre-barriers 3066 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry, 3067 "arrayof_oop_disjoint_arraycopy_uninit", 3068 /*dest_uninitialized*/true); 3069 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, entry, NULL, 3070 "arrayof_oop_arraycopy_uninit", 3071 /*dest_uninitialized*/true); 3072 if (UseCompressedOops) { 3073 // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy. 3074 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, &entry, 3075 "oop_disjoint_arraycopy"); 3076 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy, 3077 "oop_arraycopy"); 3078 // Unaligned versions without pre-barriers 3079 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, &entry, 3080 "oop_disjoint_arraycopy_uninit", 3081 /*dest_uninitialized*/true); 3082 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, entry, NULL, 3083 "oop_arraycopy_uninit", 3084 /*dest_uninitialized*/true); 3085 } else 3086 { 3087 // oop arraycopy is always aligned on 32bit and 64bit without compressed oops 3088 StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; 3089 StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; 3090 StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; 3091 StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; 3092 } 3093 3094 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); 3095 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, 3096 /*dest_uninitialized*/true); 3097 3098 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", 3099 entry_jbyte_arraycopy, 3100 entry_jshort_arraycopy, 3101 entry_jint_arraycopy, 3102 entry_jlong_arraycopy); 3103 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", 3104 entry_jbyte_arraycopy, 3105 entry_jshort_arraycopy, 4976 4977 void generate_initial() { 4978 // Generates all stubs and initializes the entry points 4979 4980 //------------------------------------------------------------------------------------------------------------------------ 4981 // entry points that exist in all platforms 4982 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than 4983 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. 4984 StubRoutines::_forward_exception_entry = generate_forward_exception(); 4985 4986 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); 4987 StubRoutines::_catch_exception_entry = generate_catch_exception(); 4988 4989 //------------------------------------------------------------------------------------------------------------------------ 4990 // entry points that are platform specific 4991 StubRoutines::Sparc::_test_stop_entry = generate_test_stop(); 4992 4993 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine(); 4994 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows(); 4995 4996 // Build this early so it's available for the interpreter. 4997 StubRoutines::_throw_StackOverflowError_entry = 4998 generate_throw_exception("StackOverflowError throw_exception", 4999 CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); 5000 StubRoutines::_throw_delayed_StackOverflowError_entry = 5001 generate_throw_exception("delayed StackOverflowError throw_exception", 5002 CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError)); 5003 5004 if (UseCRC32Intrinsics) { 5005 // set table address before stub generation which use it 5006 StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table; 5007 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); 5008 } 5009 5010 if (UseCRC32CIntrinsics) { 5011 // set table address before stub generation which use it 5012 StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table; 5013 StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); 5014 } 5015 } 5083 _stub_count = !all ? 0x100 : 0x200; 5084 if (all) { 5085 generate_all(); 5086 } else { 5087 generate_initial(); 5088 } 5089 5090 // make sure this stub is available for all local calls 5091 if (_atomic_add_stub.is_unbound()) { 5092 // generate a second time, if necessary 5093 (void) generate_atomic_add(); 5094 } 5095 } 5096 5097 5098 private: 5099 int _stub_count; 5100 void stub_prolog(StubCodeDesc* cdesc) { 5101 # ifdef ASSERT 5102 // put extra information in the stub code, to make it more readable 5103 // Write the high part of the address 5104 // [RGV] Check if there is a dependency on the size of this prolog 5105 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none); 5106 __ emit_data((intptr_t)cdesc, relocInfo::none); 5107 __ emit_data(++_stub_count, relocInfo::none); 5108 # endif 5109 align(true); 5110 } 5111 5112 void align(bool at_header = false) { 5113 // %%%%% move this constant somewhere else 5114 // UltraSPARC cache line size is 8 instructions: 5115 const unsigned int icache_line_size = 32; 5116 const unsigned int icache_half_line_size = 16; 5117 5118 if (at_header) { 5119 while ((intptr_t)(__ pc()) % icache_line_size != 0) { 5120 __ emit_data(0, relocInfo::none); 5121 } 5122 } else { 5123 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { 5124 __ nop(); 5125 } |