hotspot Sdiff src/cpu/sparc/vm

src/cpu/sparc/vm/stubGenerator_sparc.cpp

 199       __ br(Assembler::greater, false, Assembler::pt, loop);
 200       __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
 201 
 202       // done
 203       __ BIND(exit);
 204     }
 205 
 206     // setup parameters, method & call Java function
 207 #ifdef ASSERT
 208     // layout_activation_impl checks it's notion of saved SP against
 209     // this register, so if this changes update it as well.
 210     const Register saved_SP = Lscratch;
 211     __ mov(SP, saved_SP);                               // keep track of SP before call
 212 #endif
 213 
 214     // setup parameters
 215     const Register t = G3_scratch;
 216     __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
 217     __ sll(t, Interpreter::logStackElementSize, t);    // compute number of bytes
 218     __ sub(FP, t, Gargs);                              // setup parameter pointer
 219 #ifdef _LP64
 220     __ add( Gargs, STACK_BIAS, Gargs );                // Account for LP64 stack bias
 221 #endif
 222     __ mov(SP, O5_savedSP);
 223 
 224 
 225     // do the call
 226     //
 227     // the following register must be setup:
 228     //
 229     // G2_thread
 230     // G5_method
 231     // Gargs
 232     BLOCK_COMMENT("call Java function");
 233     __ jmpl(entry_point.as_in().as_register(), G0, O7);
 234     __ delayed()->mov(method.as_in().as_register(), G5_method);   // setup method
 235 
 236     BLOCK_COMMENT("call_stub_return_address:");
 237     return_pc = __ pc();
 238 
 239     // The callee, if it wasn't interpreted, can return with SP changed so
 240     // we can no longer assert of change of SP.
 241

 254       // store int result
 255       __ st(O0, addr, G0);
 256 
 257       __ BIND(exit);
 258       __ ret();
 259       __ delayed()->restore();
 260 
 261       __ BIND(is_object);
 262       __ ba(exit);
 263       __ delayed()->st_ptr(O0, addr, G0);
 264 
 265       __ BIND(is_float);
 266       __ ba(exit);
 267       __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
 268 
 269       __ BIND(is_double);
 270       __ ba(exit);
 271       __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
 272 
 273       __ BIND(is_long);
 274 #ifdef _LP64
 275       __ ba(exit);
 276       __ delayed()->st_long(O0, addr, G0);      // store entire long
 277 #else
 278 #if defined(COMPILER2)
 279   // All return values are where we want them, except for Longs.  C2 returns
 280   // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
 281   // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
 282   // build we simply always use G1.
 283   // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
 284   // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
 285   // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
 286 
 287       __ ba(exit);
 288       __ delayed()->stx(G1, addr, G0);  // store entire long
 289 #else
 290       __ st(O1, addr, BytesPerInt);
 291       __ ba(exit);
 292       __ delayed()->st(O0, addr, G0);
 293 #endif /* COMPILER2 */
 294 #endif /* _LP64 */
 295      }
 296      return start;
 297   }
 298 
 299 
 300   //----------------------------------------------------------------------------------------------------
 301   // Return point for a Java call if there's an exception thrown in Java code.
 302   // The exception is caught and transformed into a pending exception stored in
 303   // JavaThread that can be tested from within the VM.
 304   //
 305   // Oexception: exception oop
 306 
 307   address generate_catch_exception() {
 308     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 309 
 310     address start = __ pc();
 311     // verify that thread corresponds
 312     __ verify_thread();
 313 
 314     const Register& temp_reg = Gtemp;

 729 
 730     return start;
 731   }
 732   Label _atomic_add_stub;  // called from other stubs
 733 
 734 
 735   // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
 736   // Arguments :
 737   //
 738   //      ret  : O0, returned
 739   //      icc/xcc: set as O0 (depending on wordSize)
 740   //      sub  : O1, argument, not changed
 741   //      super: O2, argument, not changed
 742   //      raddr: O7, blown by call
 743   address generate_partial_subtype_check() {
 744     __ align(CodeEntryAlignment);
 745     StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
 746     address start = __ pc();
 747     Label miss;
 748 
 749 #if defined(COMPILER2) && !defined(_LP64)
 750     // Do not use a 'save' because it blows the 64-bit O registers.
 751     __ add(SP,-4*wordSize,SP);  // Make space for 4 temps (stack must be 2 words aligned)
 752     __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize);
 753     __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize);
 754     __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize);
 755     __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize);
 756     Register Rret   = O0;
 757     Register Rsub   = O1;
 758     Register Rsuper = O2;
 759 #else
 760     __ save_frame(0);
 761     Register Rret   = I0;
 762     Register Rsub   = I1;
 763     Register Rsuper = I2;
 764 #endif
 765 
 766     Register L0_ary_len = L0;
 767     Register L1_ary_ptr = L1;
 768     Register L2_super   = L2;
 769     Register L3_index   = L3;
 770 
 771     __ check_klass_subtype_slow_path(Rsub, Rsuper,
 772                                      L0, L1, L2, L3,
 773                                      NULL, &miss);
 774 
 775     // Match falls through here.
 776     __ addcc(G0,0,Rret);        // set Z flags, Z result
 777 
 778 #if defined(COMPILER2) && !defined(_LP64)
 779     __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
 780     __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
 781     __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
 782     __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
 783     __ retl();                  // Result in Rret is zero; flags set to Z
 784     __ delayed()->add(SP,4*wordSize,SP);
 785 #else
 786     __ ret();                   // Result in Rret is zero; flags set to Z
 787     __ delayed()->restore();
 788 #endif
 789 
 790     __ BIND(miss);
 791     __ addcc(G0,1,Rret);        // set NZ flags, NZ result
 792 
 793 #if defined(COMPILER2) && !defined(_LP64)
 794     __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
 795     __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
 796     __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
 797     __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
 798     __ retl();                  // Result in Rret is != 0; flags set to NZ
 799     __ delayed()->add(SP,4*wordSize,SP);
 800 #else
 801     __ ret();                   // Result in Rret is != 0; flags set to NZ
 802     __ delayed()->restore();
 803 #endif
 804 
 805     return start;
 806   }
 807 
 808 
 809   // Called from MacroAssembler::verify_oop
 810   //
 811   address generate_verify_oop_subroutine() {
 812     StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
 813 
 814     address start = __ pc();
 815 
 816     __ verify_oop_subroutine();
 817 
 818     return start;
 819   }
 820 
 821 
 822   //
 823   // Verify that a register contains clean 32-bits positive value
 824   // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
 825   //
 826   //  Input:
 827   //    Rint  -  32-bits value
 828   //    Rtmp  -  scratch
 829   //
 830   void assert_clean_int(Register Rint, Register Rtmp) {
 831 #if defined(ASSERT) && defined(_LP64)
 832     __ signx(Rint, Rtmp);
 833     __ cmp(Rint, Rtmp);
 834     __ breakpoint_trap(Assembler::notEqual, Assembler::xcc);
 835 #endif
 836   }
 837 
 838   //
 839   //  Generate overlap test for array copy stubs
 840   //
 841   //  Input:
 842   //    O0    -  array1
 843   //    O1    -  array2
 844   //    O2    -  element count
 845   //
 846   //  Kills temps:  O3, O4
 847   //
 848   void array_overlap_test(address no_overlap_target, int log2_elem_size) {
 849     assert(no_overlap_target != NULL, "must be generated");
 850     array_overlap_test(no_overlap_target, NULL, log2_elem_size);
 851   }
 852   void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
 853     array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
 854   }
 855   void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {

1252 
1253     if (entry != NULL) {
1254       *entry = __ pc();
1255       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1256       BLOCK_COMMENT("Entry:");
1257     }
1258 
1259     // for short arrays, just do single element copy
1260     __ cmp(count, 23); // 16 + 7
1261     __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1262     __ delayed()->mov(G0, offset);
1263 
1264     if (aligned) {
1265       // 'aligned' == true when it is known statically during compilation
1266       // of this arraycopy call site that both 'from' and 'to' addresses
1267       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1268       //
1269       // Aligned arrays have 4 bytes alignment in 32-bits VM
1270       // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
1271       //
1272 #ifndef _LP64
1273       // copy a 4-bytes word if necessary to align 'to' to 8 bytes
1274       __ andcc(to, 7, G0);
1275       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment);
1276       __ delayed()->ld(from, 0, O3);
1277       __ inc(from, 4);
1278       __ inc(to, 4);
1279       __ dec(count, 4);
1280       __ st(O3, to, -4);
1281     __ BIND(L_skip_alignment);
1282 #endif
1283     } else {
1284       // copy bytes to align 'to' on 8 byte boundary
1285       __ andcc(to, 7, G1); // misaligned bytes
1286       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1287       __ delayed()->neg(G1);
1288       __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
1289       __ sub(count, G1, count);
1290     __ BIND(L_align);
1291       __ ldub(from, 0, O3);
1292       __ deccc(G1);
1293       __ inc(from);
1294       __ stb(O3, to, 0);
1295       __ br(Assembler::notZero, false, Assembler::pt, L_align);
1296       __ delayed()->inc(to);
1297     __ BIND(L_skip_alignment);
1298     }
1299 #ifdef _LP64
1300     if (!aligned)
1301 #endif
1302     {
1303       // Copy with shift 16 bytes per iteration if arrays do not have
1304       // the same alignment mod 8, otherwise fall through to the next
1305       // code for aligned copy.
1306       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1307       // Also jump over aligned copy after the copy with shift completed.
1308 
1309       copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
1310     }
1311 
1312     // Both array are 8 bytes aligned, copy 16 bytes at a time
1313       __ and3(count, 7, G4); // Save count
1314       __ srl(count, 3, count);
1315      generate_disjoint_long_copy_core(aligned);
1316       __ mov(G4, count);     // Restore count
1317 
1318     // copy tailing bytes
1319     __ BIND(L_copy_byte);
1320       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1321       __ align(OptoLoopAlignment);

1378     __ delayed()->add(from, count, end_from);
1379 
1380     {
1381       // Align end of arrays since they could be not aligned even
1382       // when arrays itself are aligned.
1383 
1384       // copy bytes to align 'end_to' on 8 byte boundary
1385       __ andcc(end_to, 7, G1); // misaligned bytes
1386       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1387       __ delayed()->nop();
1388       __ sub(count, G1, count);
1389     __ BIND(L_align);
1390       __ dec(end_from);
1391       __ dec(end_to);
1392       __ ldub(end_from, 0, O3);
1393       __ deccc(G1);
1394       __ brx(Assembler::notZero, false, Assembler::pt, L_align);
1395       __ delayed()->stb(O3, end_to, 0);
1396     __ BIND(L_skip_alignment);
1397     }
1398 #ifdef _LP64
1399     if (aligned) {
1400       // Both arrays are aligned to 8-bytes in 64-bits VM.
1401       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1402       // in unaligned case.
1403       __ dec(count, 16);
1404     } else
1405 #endif
1406     {
1407       // Copy with shift 16 bytes per iteration if arrays do not have
1408       // the same alignment mod 8, otherwise jump to the next
1409       // code for aligned copy (and substracting 16 from 'count' before jump).
1410       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1411       // Also jump over aligned copy after the copy with shift completed.
1412 
1413       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1414                                         L_aligned_copy, L_copy_byte);
1415     }
1416     // copy 4 elements (16 bytes) at a time
1417       __ align(OptoLoopAlignment);
1418     __ BIND(L_aligned_copy);
1419       __ dec(end_from, 16);
1420       __ ldx(end_from, 8, O3);
1421       __ ldx(end_from, 0, O4);
1422       __ dec(end_to, 16);
1423       __ deccc(count, 16);
1424       __ stx(O3, end_to, 8);
1425       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);

1473 
1474     if (entry != NULL) {
1475       *entry = __ pc();
1476       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1477       BLOCK_COMMENT("Entry:");
1478     }
1479 
1480     // for short arrays, just do single element copy
1481     __ cmp(count, 11); // 8 + 3  (22 bytes)
1482     __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1483     __ delayed()->mov(G0, offset);
1484 
1485     if (aligned) {
1486       // 'aligned' == true when it is known statically during compilation
1487       // of this arraycopy call site that both 'from' and 'to' addresses
1488       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1489       //
1490       // Aligned arrays have 4 bytes alignment in 32-bits VM
1491       // and 8 bytes - in 64-bits VM.
1492       //
1493 #ifndef _LP64
1494       // copy a 2-elements word if necessary to align 'to' to 8 bytes
1495       __ andcc(to, 7, G0);
1496       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1497       __ delayed()->ld(from, 0, O3);
1498       __ inc(from, 4);
1499       __ inc(to, 4);
1500       __ dec(count, 2);
1501       __ st(O3, to, -4);
1502     __ BIND(L_skip_alignment);
1503 #endif
1504     } else {
1505       // copy 1 element if necessary to align 'to' on an 4 bytes
1506       __ andcc(to, 3, G0);
1507       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1508       __ delayed()->lduh(from, 0, O3);
1509       __ inc(from, 2);
1510       __ inc(to, 2);
1511       __ dec(count);
1512       __ sth(O3, to, -2);
1513     __ BIND(L_skip_alignment);
1514 
1515       // copy 2 elements to align 'to' on an 8 byte boundary
1516       __ andcc(to, 7, G0);
1517       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1518       __ delayed()->lduh(from, 0, O3);
1519       __ dec(count, 2);
1520       __ lduh(from, 2, O4);
1521       __ inc(from, 4);
1522       __ inc(to, 4);
1523       __ sth(O3, to, -4);
1524       __ sth(O4, to, -2);
1525     __ BIND(L_skip_alignment2);
1526     }
1527 #ifdef _LP64
1528     if (!aligned)
1529 #endif
1530     {
1531       // Copy with shift 16 bytes per iteration if arrays do not have
1532       // the same alignment mod 8, otherwise fall through to the next
1533       // code for aligned copy.
1534       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1535       // Also jump over aligned copy after the copy with shift completed.
1536 
1537       copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
1538     }
1539 
1540     // Both array are 8 bytes aligned, copy 16 bytes at a time
1541       __ and3(count, 3, G4); // Save
1542       __ srl(count, 2, count);
1543      generate_disjoint_long_copy_core(aligned);
1544       __ mov(G4, count); // restore
1545 
1546     // copy 1 element at a time
1547     __ BIND(L_copy_2_bytes);
1548       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1549       __ align(OptoLoopAlignment);

1626       // align source address at 4 bytes address boundary
1627       if (t == T_BYTE) {
1628         // One byte misalignment happens only for byte arrays
1629         __ andcc(to, 1, G0);
1630         __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
1631         __ delayed()->nop();
1632         __ stb(value, to, 0);
1633         __ inc(to, 1);
1634         __ dec(count, 1);
1635         __ BIND(L_skip_align1);
1636       }
1637       // Two bytes misalignment happens only for byte and short (char) arrays
1638       __ andcc(to, 2, G0);
1639       __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
1640       __ delayed()->nop();
1641       __ sth(value, to, 0);
1642       __ inc(to, 2);
1643       __ dec(count, 1 << (shift - 1));
1644       __ BIND(L_skip_align2);
1645     }
1646 #ifdef _LP64
1647     if (!aligned) {
1648 #endif
1649     // align to 8 bytes, we know we are 4 byte aligned to start
1650     __ andcc(to, 7, G0);
1651     __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
1652     __ delayed()->nop();
1653     __ stw(value, to, 0);
1654     __ inc(to, 4);
1655     __ dec(count, 1 << shift);
1656     __ BIND(L_fill_32_bytes);
1657 #ifdef _LP64
1658     }
1659 #endif
1660 
1661     if (t == T_INT) {
1662       // Zero extend value
1663       __ srl(value, 0, value);
1664     }
1665     if (t == T_BYTE || t == T_SHORT || t == T_INT) {
1666       __ sllx(value, 32, O3);
1667       __ or3(value, O3, value);
1668     }
1669 
1670     Label L_check_fill_8_bytes;
1671     // Fill 32-byte chunks
1672     __ subcc(count, 8 << shift, count);
1673     __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
1674     __ delayed()->nop();
1675 
1676     Label L_fill_32_bytes_loop, L_fill_4_bytes;
1677     __ align(16);
1678     __ BIND(L_fill_32_bytes_loop);
1679

1840       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1841       __ delayed()->lduh(end_from, -2, O3);
1842       __ dec(end_from, 2);
1843       __ dec(end_to, 2);
1844       __ dec(count);
1845       __ sth(O3, end_to, 0);
1846     __ BIND(L_skip_alignment);
1847 
1848       // copy 2 elements to align 'end_to' on an 8 byte boundary
1849       __ andcc(end_to, 7, G0);
1850       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1851       __ delayed()->lduh(end_from, -2, O3);
1852       __ dec(count, 2);
1853       __ lduh(end_from, -4, O4);
1854       __ dec(end_from, 4);
1855       __ dec(end_to, 4);
1856       __ sth(O3, end_to, 2);
1857       __ sth(O4, end_to, 0);
1858     __ BIND(L_skip_alignment2);
1859     }
1860 #ifdef _LP64
1861     if (aligned) {
1862       // Both arrays are aligned to 8-bytes in 64-bits VM.
1863       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1864       // in unaligned case.
1865       __ dec(count, 8);
1866     } else
1867 #endif
1868     {
1869       // Copy with shift 16 bytes per iteration if arrays do not have
1870       // the same alignment mod 8, otherwise jump to the next
1871       // code for aligned copy (and substracting 8 from 'count' before jump).
1872       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1873       // Also jump over aligned copy after the copy with shift completed.
1874 
1875       copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1876                                         L_aligned_copy, L_copy_2_bytes);
1877     }
1878     // copy 4 elements (16 bytes) at a time
1879       __ align(OptoLoopAlignment);
1880     __ BIND(L_aligned_copy);
1881       __ dec(end_from, 16);
1882       __ ldx(end_from, 8, O3);
1883       __ ldx(end_from, 0, O4);
1884       __ dec(end_to, 16);
1885       __ deccc(count, 8);
1886       __ stx(O3, end_to, 8);
1887       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);

1957   //      count: O2 treated as signed
1958   //
1959   void generate_disjoint_int_copy_core(bool aligned) {
1960 
1961     Label L_skip_alignment, L_aligned_copy;
1962     Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
1963 
1964     const Register from      = O0;   // source array address
1965     const Register to        = O1;   // destination array address
1966     const Register count     = O2;   // elements count
1967     const Register offset    = O5;   // offset from start of arrays
1968     // O3, O4, G3, G4 are used as temp registers
1969 
1970     // 'aligned' == true when it is known statically during compilation
1971     // of this arraycopy call site that both 'from' and 'to' addresses
1972     // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1973     //
1974     // Aligned arrays have 4 bytes alignment in 32-bits VM
1975     // and 8 bytes - in 64-bits VM.
1976     //
1977 #ifdef _LP64
1978     if (!aligned)
1979 #endif
1980     {
1981       // The next check could be put under 'ifndef' since the code in
1982       // generate_disjoint_long_copy_core() has own checks and set 'offset'.
1983 
1984       // for short arrays, just do single element copy
1985       __ cmp(count, 5); // 4 + 1 (20 bytes)
1986       __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
1987       __ delayed()->mov(G0, offset);
1988 
1989       // copy 1 element to align 'to' on an 8 byte boundary
1990       __ andcc(to, 7, G0);
1991       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1992       __ delayed()->ld(from, 0, O3);
1993       __ inc(from, 4);
1994       __ inc(to, 4);
1995       __ dec(count);
1996       __ st(O3, to, -4);
1997     __ BIND(L_skip_alignment);
1998 
1999     // if arrays have same alignment mod 8, do 4 elements copy

2446     const Register from  = O0;  // source array address
2447     const Register to    = O1;  // destination array address
2448     const Register count = O2;  // elements count
2449 
2450     __ align(CodeEntryAlignment);
2451     StubCodeMark mark(this, "StubRoutines", name);
2452     address start = __ pc();
2453 
2454     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2455 
2456     if (entry != NULL) {
2457       *entry = __ pc();
2458       // caller can pass a 64-bit byte count here
2459       BLOCK_COMMENT("Entry:");
2460     }
2461 
2462     // save arguments for barrier generation
2463     __ mov(to, G1);
2464     __ mov(count, G5);
2465     gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2466   #ifdef _LP64
2467     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2468     if (UseCompressedOops) {
2469       generate_disjoint_int_copy_core(aligned);
2470     } else {
2471       generate_disjoint_long_copy_core(aligned);
2472     }
2473   #else
2474     generate_disjoint_int_copy_core(aligned);
2475   #endif
2476     // O0 is used as temp register
2477     gen_write_ref_array_post_barrier(G1, G5, O0);
2478 
2479     // O3, O4 are used as temp registers
2480     inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2481     __ retl();
2482     __ delayed()->mov(G0, O0); // return 0
2483     return start;
2484   }
2485 
2486   //  Generate stub for conjoint oop copy.  If "aligned" is true, the
2487   //  "from" and "to" addresses are assumed to be heapword aligned.
2488   //
2489   // Arguments for generated stub:
2490   //      from:  O0
2491   //      to:    O1
2492   //      count: O2 treated as signed
2493   //
2494   address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
2495                                      address *entry, const char *name,

2501 
2502     __ align(CodeEntryAlignment);
2503     StubCodeMark mark(this, "StubRoutines", name);
2504     address start = __ pc();
2505 
2506     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2507 
2508     if (entry != NULL) {
2509       *entry = __ pc();
2510       // caller can pass a 64-bit byte count here
2511       BLOCK_COMMENT("Entry:");
2512     }
2513 
2514     array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
2515 
2516     // save arguments for barrier generation
2517     __ mov(to, G1);
2518     __ mov(count, G5);
2519     gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2520 
2521   #ifdef _LP64
2522     if (UseCompressedOops) {
2523       generate_conjoint_int_copy_core(aligned);
2524     } else {
2525       generate_conjoint_long_copy_core(aligned);
2526     }
2527   #else
2528     generate_conjoint_int_copy_core(aligned);
2529   #endif
2530 
2531     // O0 is used as temp register
2532     gen_write_ref_array_post_barrier(G1, G5, O0);
2533 
2534     // O3, O4 are used as temp registers
2535     inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2536     __ retl();
2537     __ delayed()->mov(G0, O0); // return 0
2538     return start;
2539   }
2540 
2541 
2542   // Helper for generating a dynamic type check.
2543   // Smashes only the given temp registers.
2544   void generate_type_check(Register sub_klass,
2545                            Register super_check_offset,
2546                            Register super_klass,
2547                            Register temp,
2548                            Label& L_success) {
2549     assert_different_registers(sub_klass, super_check_offset, super_klass, temp);

3121                                                                                   "arrayof_jbyte_arraycopy");
3122 
3123     //*** jshort
3124     // Always need aligned and unaligned versions
3125     StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
3126                                                                                     "jshort_disjoint_arraycopy");
3127     StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
3128                                                                                     &entry_jshort_arraycopy,
3129                                                                                     "jshort_arraycopy");
3130     StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
3131                                                                                     "arrayof_jshort_disjoint_arraycopy");
3132     StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
3133                                                                                     "arrayof_jshort_arraycopy");
3134 
3135     //*** jint
3136     // Aligned versions
3137     StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
3138                                                                                 "arrayof_jint_disjoint_arraycopy");
3139     StubRoutines::_arrayof_jint_arraycopy          = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
3140                                                                                 "arrayof_jint_arraycopy");
3141 #ifdef _LP64
3142     // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
3143     // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
3144     StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_copy(false, &entry,
3145                                                                                 "jint_disjoint_arraycopy");
3146     StubRoutines::_jint_arraycopy                  = generate_conjoint_int_copy(false, entry,
3147                                                                                 &entry_jint_arraycopy,
3148                                                                                 "jint_arraycopy");
3149 #else
3150     // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version
3151     // (in fact in 32bit we always have a pre-loop part even in the aligned version,
3152     //  because it uses 64-bit loads/stores, so the aligned flag is actually ignored).
3153     StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy;
3154     StubRoutines::_jint_arraycopy          = StubRoutines::_arrayof_jint_arraycopy;
3155 #endif
3156 
3157 
3158     //*** jlong
3159     // It is always aligned
3160     StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
3161                                                                                   "arrayof_jlong_disjoint_arraycopy");
3162     StubRoutines::_arrayof_jlong_arraycopy          = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
3163                                                                                   "arrayof_jlong_arraycopy");
3164     StubRoutines::_jlong_disjoint_arraycopy         = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
3165     StubRoutines::_jlong_arraycopy                  = StubRoutines::_arrayof_jlong_arraycopy;
3166 
3167 
3168     //*** oops
3169     // Aligned versions
3170     StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy(true, &entry,
3171                                                                                       "arrayof_oop_disjoint_arraycopy");
3172     StubRoutines::_arrayof_oop_arraycopy                 = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
3173                                                                                       "arrayof_oop_arraycopy");
3174     // Aligned versions without pre-barriers
3175     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
3176                                                                                       "arrayof_oop_disjoint_arraycopy_uninit",
3177                                                                                       /*dest_uninitialized*/true);
3178     StubRoutines::_arrayof_oop_arraycopy_uninit          = generate_conjoint_oop_copy(true, entry, NULL,
3179                                                                                       "arrayof_oop_arraycopy_uninit",
3180                                                                                       /*dest_uninitialized*/true);
3181 #ifdef _LP64
3182     if (UseCompressedOops) {
3183       // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
3184       StubRoutines::_oop_disjoint_arraycopy            = generate_disjoint_oop_copy(false, &entry,
3185                                                                                     "oop_disjoint_arraycopy");
3186       StubRoutines::_oop_arraycopy                     = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
3187                                                                                     "oop_arraycopy");
3188       // Unaligned versions without pre-barriers
3189       StubRoutines::_oop_disjoint_arraycopy_uninit     = generate_disjoint_oop_copy(false, &entry,
3190                                                                                     "oop_disjoint_arraycopy_uninit",
3191                                                                                     /*dest_uninitialized*/true);
3192       StubRoutines::_oop_arraycopy_uninit              = generate_conjoint_oop_copy(false, entry, NULL,
3193                                                                                     "oop_arraycopy_uninit",
3194                                                                                     /*dest_uninitialized*/true);
3195     } else
3196 #endif
3197     {
3198       // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
3199       StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
3200       StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
3201       StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
3202       StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
3203     }
3204 
3205     StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
3206     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
3207                                                                         /*dest_uninitialized*/true);
3208 
3209     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
3210                                                               entry_jbyte_arraycopy,
3211                                                               entry_jshort_arraycopy,
3212                                                               entry_jint_arraycopy,
3213                                                               entry_jlong_arraycopy);
3214     StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
3215                                                                entry_jbyte_arraycopy,
3216                                                                entry_jshort_arraycopy,

5087 
5088   void generate_initial() {
5089     // Generates all stubs and initializes the entry points
5090 
5091     //------------------------------------------------------------------------------------------------------------------------
5092     // entry points that exist in all platforms
5093     // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
5094     //       the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
5095     StubRoutines::_forward_exception_entry                 = generate_forward_exception();
5096 
5097     StubRoutines::_call_stub_entry                         = generate_call_stub(StubRoutines::_call_stub_return_address);
5098     StubRoutines::_catch_exception_entry                   = generate_catch_exception();
5099 
5100     //------------------------------------------------------------------------------------------------------------------------
5101     // entry points that are platform specific
5102     StubRoutines::Sparc::_test_stop_entry                  = generate_test_stop();
5103 
5104     StubRoutines::Sparc::_stop_subroutine_entry            = generate_stop_subroutine();
5105     StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
5106 
5107 #if !defined(COMPILER2) && !defined(_LP64)
5108     StubRoutines::_atomic_xchg_entry         = generate_atomic_xchg();
5109     StubRoutines::_atomic_cmpxchg_entry      = generate_atomic_cmpxchg();
5110     StubRoutines::_atomic_add_entry          = generate_atomic_add();
5111     StubRoutines::_atomic_xchg_ptr_entry     = StubRoutines::_atomic_xchg_entry;
5112     StubRoutines::_atomic_cmpxchg_ptr_entry  = StubRoutines::_atomic_cmpxchg_entry;
5113     StubRoutines::_atomic_cmpxchg_byte_entry = ShouldNotCallThisStub();
5114     StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
5115     StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
5116 #endif  // COMPILER2 !=> _LP64
5117 
5118     // Build this early so it's available for the interpreter.
5119     StubRoutines::_throw_StackOverflowError_entry =
5120             generate_throw_exception("StackOverflowError throw_exception",
5121             CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
5122     StubRoutines::_throw_delayed_StackOverflowError_entry =
5123             generate_throw_exception("delayed StackOverflowError throw_exception",
5124             CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError));
5125 
5126     if (UseCRC32Intrinsics) {
5127       // set table address before stub generation which use it
5128       StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
5129       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
5130     }
5131 
5132     if (UseCRC32CIntrinsics) {
5133       // set table address before stub generation which use it
5134       StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
5135       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
5136     }
5137   }

5205     _stub_count = !all ? 0x100 : 0x200;
5206     if (all) {
5207       generate_all();
5208     } else {
5209       generate_initial();
5210     }
5211 
5212     // make sure this stub is available for all local calls
5213     if (_atomic_add_stub.is_unbound()) {
5214       // generate a second time, if necessary
5215       (void) generate_atomic_add();
5216     }
5217   }
5218 
5219 
5220  private:
5221   int _stub_count;
5222   void stub_prolog(StubCodeDesc* cdesc) {
5223     # ifdef ASSERT
5224       // put extra information in the stub code, to make it more readable
5225 #ifdef _LP64
5226 // Write the high part of the address
5227 // [RGV] Check if there is a dependency on the size of this prolog
5228       __ emit_data((intptr_t)cdesc >> 32,    relocInfo::none);
5229 #endif
5230       __ emit_data((intptr_t)cdesc,    relocInfo::none);
5231       __ emit_data(++_stub_count, relocInfo::none);
5232     # endif
5233     align(true);
5234   }
5235 
5236   void align(bool at_header = false) {
5237     // %%%%% move this constant somewhere else
5238     // UltraSPARC cache line size is 8 instructions:
5239     const unsigned int icache_line_size = 32;
5240     const unsigned int icache_half_line_size = 16;
5241 
5242     if (at_header) {
5243       while ((intptr_t)(__ pc()) % icache_line_size != 0) {
5244         __ emit_data(0, relocInfo::none);
5245       }
5246     } else {
5247       while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
5248         __ nop();
5249       }

 199       __ br(Assembler::greater, false, Assembler::pt, loop);
 200       __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
 201 
 202       // done
 203       __ BIND(exit);
 204     }
 205 
 206     // setup parameters, method & call Java function
 207 #ifdef ASSERT
 208     // layout_activation_impl checks it's notion of saved SP against
 209     // this register, so if this changes update it as well.
 210     const Register saved_SP = Lscratch;
 211     __ mov(SP, saved_SP);                               // keep track of SP before call
 212 #endif
 213 
 214     // setup parameters
 215     const Register t = G3_scratch;
 216     __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
 217     __ sll(t, Interpreter::logStackElementSize, t);    // compute number of bytes
 218     __ sub(FP, t, Gargs);                              // setup parameter pointer

 219     __ add( Gargs, STACK_BIAS, Gargs );                // Account for LP64 stack bias

 220     __ mov(SP, O5_savedSP);
 221 
 222 
 223     // do the call
 224     //
 225     // the following register must be setup:
 226     //
 227     // G2_thread
 228     // G5_method
 229     // Gargs
 230     BLOCK_COMMENT("call Java function");
 231     __ jmpl(entry_point.as_in().as_register(), G0, O7);
 232     __ delayed()->mov(method.as_in().as_register(), G5_method);   // setup method
 233 
 234     BLOCK_COMMENT("call_stub_return_address:");
 235     return_pc = __ pc();
 236 
 237     // The callee, if it wasn't interpreted, can return with SP changed so
 238     // we can no longer assert of change of SP.
 239

 252       // store int result
 253       __ st(O0, addr, G0);
 254 
 255       __ BIND(exit);
 256       __ ret();
 257       __ delayed()->restore();
 258 
 259       __ BIND(is_object);
 260       __ ba(exit);
 261       __ delayed()->st_ptr(O0, addr, G0);
 262 
 263       __ BIND(is_float);
 264       __ ba(exit);
 265       __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
 266 
 267       __ BIND(is_double);
 268       __ ba(exit);
 269       __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
 270 
 271       __ BIND(is_long);

 272       __ ba(exit);
 273       __ delayed()->st_long(O0, addr, G0);      // store entire long


















 274      }
 275      return start;
 276   }
 277 
 278 
 279   //----------------------------------------------------------------------------------------------------
 280   // Return point for a Java call if there's an exception thrown in Java code.
 281   // The exception is caught and transformed into a pending exception stored in
 282   // JavaThread that can be tested from within the VM.
 283   //
 284   // Oexception: exception oop
 285 
 286   address generate_catch_exception() {
 287     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 288 
 289     address start = __ pc();
 290     // verify that thread corresponds
 291     __ verify_thread();
 292 
 293     const Register& temp_reg = Gtemp;

 708 
 709     return start;
 710   }
 711   Label _atomic_add_stub;  // called from other stubs
 712 
 713 
 714   // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
 715   // Arguments :
 716   //
 717   //      ret  : O0, returned
 718   //      icc/xcc: set as O0 (depending on wordSize)
 719   //      sub  : O1, argument, not changed
 720   //      super: O2, argument, not changed
 721   //      raddr: O7, blown by call
 722   address generate_partial_subtype_check() {
 723     __ align(CodeEntryAlignment);
 724     StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
 725     address start = __ pc();
 726     Label miss;
 727 











 728     __ save_frame(0);
 729     Register Rret   = I0;
 730     Register Rsub   = I1;
 731     Register Rsuper = I2;

 732 
 733     Register L0_ary_len = L0;
 734     Register L1_ary_ptr = L1;
 735     Register L2_super   = L2;
 736     Register L3_index   = L3;
 737 
 738     __ check_klass_subtype_slow_path(Rsub, Rsuper,
 739                                      L0, L1, L2, L3,
 740                                      NULL, &miss);
 741 
 742     // Match falls through here.
 743     __ addcc(G0,0,Rret);        // set Z flags, Z result
 744 








 745     __ ret();                   // Result in Rret is zero; flags set to Z
 746     __ delayed()->restore();

 747 
 748     __ BIND(miss);
 749     __ addcc(G0,1,Rret);        // set NZ flags, NZ result
 750 








 751     __ ret();                   // Result in Rret is != 0; flags set to NZ
 752     __ delayed()->restore();

 753 
 754     return start;
 755   }
 756 
 757 
 758   // Called from MacroAssembler::verify_oop
 759   //
 760   address generate_verify_oop_subroutine() {
 761     StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
 762 
 763     address start = __ pc();
 764 
 765     __ verify_oop_subroutine();
 766 
 767     return start;
 768   }
 769 
 770 
 771   //
 772   // Verify that a register contains clean 32-bits positive value
 773   // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
 774   //
 775   //  Input:
 776   //    Rint  -  32-bits value
 777   //    Rtmp  -  scratch
 778   //
 779   void assert_clean_int(Register Rint, Register Rtmp) {





 780   }
 781 
 782   //
 783   //  Generate overlap test for array copy stubs
 784   //
 785   //  Input:
 786   //    O0    -  array1
 787   //    O1    -  array2
 788   //    O2    -  element count
 789   //
 790   //  Kills temps:  O3, O4
 791   //
 792   void array_overlap_test(address no_overlap_target, int log2_elem_size) {
 793     assert(no_overlap_target != NULL, "must be generated");
 794     array_overlap_test(no_overlap_target, NULL, log2_elem_size);
 795   }
 796   void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
 797     array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
 798   }
 799   void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {

1196 
1197     if (entry != NULL) {
1198       *entry = __ pc();
1199       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1200       BLOCK_COMMENT("Entry:");
1201     }
1202 
1203     // for short arrays, just do single element copy
1204     __ cmp(count, 23); // 16 + 7
1205     __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1206     __ delayed()->mov(G0, offset);
1207 
1208     if (aligned) {
1209       // 'aligned' == true when it is known statically during compilation
1210       // of this arraycopy call site that both 'from' and 'to' addresses
1211       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1212       //
1213       // Aligned arrays have 4 bytes alignment in 32-bits VM
1214       // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
1215       //











1216     } else {
1217       // copy bytes to align 'to' on 8 byte boundary
1218       __ andcc(to, 7, G1); // misaligned bytes
1219       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1220       __ delayed()->neg(G1);
1221       __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
1222       __ sub(count, G1, count);
1223     __ BIND(L_align);
1224       __ ldub(from, 0, O3);
1225       __ deccc(G1);
1226       __ inc(from);
1227       __ stb(O3, to, 0);
1228       __ br(Assembler::notZero, false, Assembler::pt, L_align);
1229       __ delayed()->inc(to);
1230     __ BIND(L_skip_alignment);
1231     }

1232     if (!aligned)

1233     {
1234       // Copy with shift 16 bytes per iteration if arrays do not have
1235       // the same alignment mod 8, otherwise fall through to the next
1236       // code for aligned copy.
1237       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1238       // Also jump over aligned copy after the copy with shift completed.
1239 
1240       copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
1241     }
1242 
1243     // Both array are 8 bytes aligned, copy 16 bytes at a time
1244       __ and3(count, 7, G4); // Save count
1245       __ srl(count, 3, count);
1246      generate_disjoint_long_copy_core(aligned);
1247       __ mov(G4, count);     // Restore count
1248 
1249     // copy tailing bytes
1250     __ BIND(L_copy_byte);
1251       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1252       __ align(OptoLoopAlignment);

1309     __ delayed()->add(from, count, end_from);
1310 
1311     {
1312       // Align end of arrays since they could be not aligned even
1313       // when arrays itself are aligned.
1314 
1315       // copy bytes to align 'end_to' on 8 byte boundary
1316       __ andcc(end_to, 7, G1); // misaligned bytes
1317       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1318       __ delayed()->nop();
1319       __ sub(count, G1, count);
1320     __ BIND(L_align);
1321       __ dec(end_from);
1322       __ dec(end_to);
1323       __ ldub(end_from, 0, O3);
1324       __ deccc(G1);
1325       __ brx(Assembler::notZero, false, Assembler::pt, L_align);
1326       __ delayed()->stb(O3, end_to, 0);
1327     __ BIND(L_skip_alignment);
1328     }

1329     if (aligned) {
1330       // Both arrays are aligned to 8-bytes in 64-bits VM.
1331       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1332       // in unaligned case.
1333       __ dec(count, 16);
1334     } else

1335     {
1336       // Copy with shift 16 bytes per iteration if arrays do not have
1337       // the same alignment mod 8, otherwise jump to the next
1338       // code for aligned copy (and substracting 16 from 'count' before jump).
1339       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1340       // Also jump over aligned copy after the copy with shift completed.
1341 
1342       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1343                                         L_aligned_copy, L_copy_byte);
1344     }
1345     // copy 4 elements (16 bytes) at a time
1346       __ align(OptoLoopAlignment);
1347     __ BIND(L_aligned_copy);
1348       __ dec(end_from, 16);
1349       __ ldx(end_from, 8, O3);
1350       __ ldx(end_from, 0, O4);
1351       __ dec(end_to, 16);
1352       __ deccc(count, 16);
1353       __ stx(O3, end_to, 8);
1354       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);

1402 
1403     if (entry != NULL) {
1404       *entry = __ pc();
1405       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1406       BLOCK_COMMENT("Entry:");
1407     }
1408 
1409     // for short arrays, just do single element copy
1410     __ cmp(count, 11); // 8 + 3  (22 bytes)
1411     __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1412     __ delayed()->mov(G0, offset);
1413 
1414     if (aligned) {
1415       // 'aligned' == true when it is known statically during compilation
1416       // of this arraycopy call site that both 'from' and 'to' addresses
1417       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1418       //
1419       // Aligned arrays have 4 bytes alignment in 32-bits VM
1420       // and 8 bytes - in 64-bits VM.
1421       //











1422     } else {
1423       // copy 1 element if necessary to align 'to' on an 4 bytes
1424       __ andcc(to, 3, G0);
1425       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1426       __ delayed()->lduh(from, 0, O3);
1427       __ inc(from, 2);
1428       __ inc(to, 2);
1429       __ dec(count);
1430       __ sth(O3, to, -2);
1431     __ BIND(L_skip_alignment);
1432 
1433       // copy 2 elements to align 'to' on an 8 byte boundary
1434       __ andcc(to, 7, G0);
1435       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1436       __ delayed()->lduh(from, 0, O3);
1437       __ dec(count, 2);
1438       __ lduh(from, 2, O4);
1439       __ inc(from, 4);
1440       __ inc(to, 4);
1441       __ sth(O3, to, -4);
1442       __ sth(O4, to, -2);
1443     __ BIND(L_skip_alignment2);
1444     }

1445     if (!aligned)

1446     {
1447       // Copy with shift 16 bytes per iteration if arrays do not have
1448       // the same alignment mod 8, otherwise fall through to the next
1449       // code for aligned copy.
1450       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1451       // Also jump over aligned copy after the copy with shift completed.
1452 
1453       copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
1454     }
1455 
1456     // Both array are 8 bytes aligned, copy 16 bytes at a time
1457       __ and3(count, 3, G4); // Save
1458       __ srl(count, 2, count);
1459      generate_disjoint_long_copy_core(aligned);
1460       __ mov(G4, count); // restore
1461 
1462     // copy 1 element at a time
1463     __ BIND(L_copy_2_bytes);
1464       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1465       __ align(OptoLoopAlignment);

1542       // align source address at 4 bytes address boundary
1543       if (t == T_BYTE) {
1544         // One byte misalignment happens only for byte arrays
1545         __ andcc(to, 1, G0);
1546         __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
1547         __ delayed()->nop();
1548         __ stb(value, to, 0);
1549         __ inc(to, 1);
1550         __ dec(count, 1);
1551         __ BIND(L_skip_align1);
1552       }
1553       // Two bytes misalignment happens only for byte and short (char) arrays
1554       __ andcc(to, 2, G0);
1555       __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
1556       __ delayed()->nop();
1557       __ sth(value, to, 0);
1558       __ inc(to, 2);
1559       __ dec(count, 1 << (shift - 1));
1560       __ BIND(L_skip_align2);
1561     }

1562     if (!aligned) {

1563     // align to 8 bytes, we know we are 4 byte aligned to start
1564     __ andcc(to, 7, G0);
1565     __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
1566     __ delayed()->nop();
1567     __ stw(value, to, 0);
1568     __ inc(to, 4);
1569     __ dec(count, 1 << shift);
1570     __ BIND(L_fill_32_bytes);

1571     }

1572 
1573     if (t == T_INT) {
1574       // Zero extend value
1575       __ srl(value, 0, value);
1576     }
1577     if (t == T_BYTE || t == T_SHORT || t == T_INT) {
1578       __ sllx(value, 32, O3);
1579       __ or3(value, O3, value);
1580     }
1581 
1582     Label L_check_fill_8_bytes;
1583     // Fill 32-byte chunks
1584     __ subcc(count, 8 << shift, count);
1585     __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
1586     __ delayed()->nop();
1587 
1588     Label L_fill_32_bytes_loop, L_fill_4_bytes;
1589     __ align(16);
1590     __ BIND(L_fill_32_bytes_loop);
1591

1752       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1753       __ delayed()->lduh(end_from, -2, O3);
1754       __ dec(end_from, 2);
1755       __ dec(end_to, 2);
1756       __ dec(count);
1757       __ sth(O3, end_to, 0);
1758     __ BIND(L_skip_alignment);
1759 
1760       // copy 2 elements to align 'end_to' on an 8 byte boundary
1761       __ andcc(end_to, 7, G0);
1762       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1763       __ delayed()->lduh(end_from, -2, O3);
1764       __ dec(count, 2);
1765       __ lduh(end_from, -4, O4);
1766       __ dec(end_from, 4);
1767       __ dec(end_to, 4);
1768       __ sth(O3, end_to, 2);
1769       __ sth(O4, end_to, 0);
1770     __ BIND(L_skip_alignment2);
1771     }

1772     if (aligned) {
1773       // Both arrays are aligned to 8-bytes in 64-bits VM.
1774       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1775       // in unaligned case.
1776       __ dec(count, 8);
1777     } else

1778     {
1779       // Copy with shift 16 bytes per iteration if arrays do not have
1780       // the same alignment mod 8, otherwise jump to the next
1781       // code for aligned copy (and substracting 8 from 'count' before jump).
1782       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1783       // Also jump over aligned copy after the copy with shift completed.
1784 
1785       copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1786                                         L_aligned_copy, L_copy_2_bytes);
1787     }
1788     // copy 4 elements (16 bytes) at a time
1789       __ align(OptoLoopAlignment);
1790     __ BIND(L_aligned_copy);
1791       __ dec(end_from, 16);
1792       __ ldx(end_from, 8, O3);
1793       __ ldx(end_from, 0, O4);
1794       __ dec(end_to, 16);
1795       __ deccc(count, 8);
1796       __ stx(O3, end_to, 8);
1797       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);

1867   //      count: O2 treated as signed
1868   //
1869   void generate_disjoint_int_copy_core(bool aligned) {
1870 
1871     Label L_skip_alignment, L_aligned_copy;
1872     Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
1873 
1874     const Register from      = O0;   // source array address
1875     const Register to        = O1;   // destination array address
1876     const Register count     = O2;   // elements count
1877     const Register offset    = O5;   // offset from start of arrays
1878     // O3, O4, G3, G4 are used as temp registers
1879 
1880     // 'aligned' == true when it is known statically during compilation
1881     // of this arraycopy call site that both 'from' and 'to' addresses
1882     // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1883     //
1884     // Aligned arrays have 4 bytes alignment in 32-bits VM
1885     // and 8 bytes - in 64-bits VM.
1886     //

1887     if (!aligned)

1888     {
1889       // The next check could be put under 'ifndef' since the code in
1890       // generate_disjoint_long_copy_core() has own checks and set 'offset'.
1891 
1892       // for short arrays, just do single element copy
1893       __ cmp(count, 5); // 4 + 1 (20 bytes)
1894       __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
1895       __ delayed()->mov(G0, offset);
1896 
1897       // copy 1 element to align 'to' on an 8 byte boundary
1898       __ andcc(to, 7, G0);
1899       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1900       __ delayed()->ld(from, 0, O3);
1901       __ inc(from, 4);
1902       __ inc(to, 4);
1903       __ dec(count);
1904       __ st(O3, to, -4);
1905     __ BIND(L_skip_alignment);
1906 
1907     // if arrays have same alignment mod 8, do 4 elements copy

2354     const Register from  = O0;  // source array address
2355     const Register to    = O1;  // destination array address
2356     const Register count = O2;  // elements count
2357 
2358     __ align(CodeEntryAlignment);
2359     StubCodeMark mark(this, "StubRoutines", name);
2360     address start = __ pc();
2361 
2362     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2363 
2364     if (entry != NULL) {
2365       *entry = __ pc();
2366       // caller can pass a 64-bit byte count here
2367       BLOCK_COMMENT("Entry:");
2368     }
2369 
2370     // save arguments for barrier generation
2371     __ mov(to, G1);
2372     __ mov(count, G5);
2373     gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);

2374     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2375     if (UseCompressedOops) {
2376       generate_disjoint_int_copy_core(aligned);
2377     } else {
2378       generate_disjoint_long_copy_core(aligned);
2379     }



2380     // O0 is used as temp register
2381     gen_write_ref_array_post_barrier(G1, G5, O0);
2382 
2383     // O3, O4 are used as temp registers
2384     inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2385     __ retl();
2386     __ delayed()->mov(G0, O0); // return 0
2387     return start;
2388   }
2389 
2390   //  Generate stub for conjoint oop copy.  If "aligned" is true, the
2391   //  "from" and "to" addresses are assumed to be heapword aligned.
2392   //
2393   // Arguments for generated stub:
2394   //      from:  O0
2395   //      to:    O1
2396   //      count: O2 treated as signed
2397   //
2398   address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
2399                                      address *entry, const char *name,

2405 
2406     __ align(CodeEntryAlignment);
2407     StubCodeMark mark(this, "StubRoutines", name);
2408     address start = __ pc();
2409 
2410     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2411 
2412     if (entry != NULL) {
2413       *entry = __ pc();
2414       // caller can pass a 64-bit byte count here
2415       BLOCK_COMMENT("Entry:");
2416     }
2417 
2418     array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
2419 
2420     // save arguments for barrier generation
2421     __ mov(to, G1);
2422     __ mov(count, G5);
2423     gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2424 

2425     if (UseCompressedOops) {
2426       generate_conjoint_int_copy_core(aligned);
2427     } else {
2428       generate_conjoint_long_copy_core(aligned);
2429     }



2430 
2431     // O0 is used as temp register
2432     gen_write_ref_array_post_barrier(G1, G5, O0);
2433 
2434     // O3, O4 are used as temp registers
2435     inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2436     __ retl();
2437     __ delayed()->mov(G0, O0); // return 0
2438     return start;
2439   }
2440 
2441 
2442   // Helper for generating a dynamic type check.
2443   // Smashes only the given temp registers.
2444   void generate_type_check(Register sub_klass,
2445                            Register super_check_offset,
2446                            Register super_klass,
2447                            Register temp,
2448                            Label& L_success) {
2449     assert_different_registers(sub_klass, super_check_offset, super_klass, temp);

3021                                                                                   "arrayof_jbyte_arraycopy");
3022 
3023     //*** jshort
3024     // Always need aligned and unaligned versions
3025     StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
3026                                                                                     "jshort_disjoint_arraycopy");
3027     StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
3028                                                                                     &entry_jshort_arraycopy,
3029                                                                                     "jshort_arraycopy");
3030     StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
3031                                                                                     "arrayof_jshort_disjoint_arraycopy");
3032     StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
3033                                                                                     "arrayof_jshort_arraycopy");
3034 
3035     //*** jint
3036     // Aligned versions
3037     StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
3038                                                                                 "arrayof_jint_disjoint_arraycopy");
3039     StubRoutines::_arrayof_jint_arraycopy          = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
3040                                                                                 "arrayof_jint_arraycopy");

3041     // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
3042     // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
3043     StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_copy(false, &entry,
3044                                                                                 "jint_disjoint_arraycopy");
3045     StubRoutines::_jint_arraycopy                  = generate_conjoint_int_copy(false, entry,
3046                                                                                 &entry_jint_arraycopy,
3047                                                                                 "jint_arraycopy");








3048 
3049     //*** jlong
3050     // It is always aligned
3051     StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
3052                                                                                   "arrayof_jlong_disjoint_arraycopy");
3053     StubRoutines::_arrayof_jlong_arraycopy          = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
3054                                                                                   "arrayof_jlong_arraycopy");
3055     StubRoutines::_jlong_disjoint_arraycopy         = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
3056     StubRoutines::_jlong_arraycopy                  = StubRoutines::_arrayof_jlong_arraycopy;
3057 
3058 
3059     //*** oops
3060     // Aligned versions
3061     StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy(true, &entry,
3062                                                                                       "arrayof_oop_disjoint_arraycopy");
3063     StubRoutines::_arrayof_oop_arraycopy                 = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
3064                                                                                       "arrayof_oop_arraycopy");
3065     // Aligned versions without pre-barriers
3066     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
3067                                                                                       "arrayof_oop_disjoint_arraycopy_uninit",
3068                                                                                       /*dest_uninitialized*/true);
3069     StubRoutines::_arrayof_oop_arraycopy_uninit          = generate_conjoint_oop_copy(true, entry, NULL,
3070                                                                                       "arrayof_oop_arraycopy_uninit",
3071                                                                                       /*dest_uninitialized*/true);

3072     if (UseCompressedOops) {
3073       // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
3074       StubRoutines::_oop_disjoint_arraycopy            = generate_disjoint_oop_copy(false, &entry,
3075                                                                                     "oop_disjoint_arraycopy");
3076       StubRoutines::_oop_arraycopy                     = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
3077                                                                                     "oop_arraycopy");
3078       // Unaligned versions without pre-barriers
3079       StubRoutines::_oop_disjoint_arraycopy_uninit     = generate_disjoint_oop_copy(false, &entry,
3080                                                                                     "oop_disjoint_arraycopy_uninit",
3081                                                                                     /*dest_uninitialized*/true);
3082       StubRoutines::_oop_arraycopy_uninit              = generate_conjoint_oop_copy(false, entry, NULL,
3083                                                                                     "oop_arraycopy_uninit",
3084                                                                                     /*dest_uninitialized*/true);
3085     } else

3086     {
3087       // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
3088       StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
3089       StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
3090       StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
3091       StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
3092     }
3093 
3094     StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
3095     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
3096                                                                         /*dest_uninitialized*/true);
3097 
3098     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
3099                                                               entry_jbyte_arraycopy,
3100                                                               entry_jshort_arraycopy,
3101                                                               entry_jint_arraycopy,
3102                                                               entry_jlong_arraycopy);
3103     StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
3104                                                                entry_jbyte_arraycopy,
3105                                                                entry_jshort_arraycopy,

4976 
4977   void generate_initial() {
4978     // Generates all stubs and initializes the entry points
4979 
4980     //------------------------------------------------------------------------------------------------------------------------
4981     // entry points that exist in all platforms
4982     // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
4983     //       the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
4984     StubRoutines::_forward_exception_entry                 = generate_forward_exception();
4985 
4986     StubRoutines::_call_stub_entry                         = generate_call_stub(StubRoutines::_call_stub_return_address);
4987     StubRoutines::_catch_exception_entry                   = generate_catch_exception();
4988 
4989     //------------------------------------------------------------------------------------------------------------------------
4990     // entry points that are platform specific
4991     StubRoutines::Sparc::_test_stop_entry                  = generate_test_stop();
4992 
4993     StubRoutines::Sparc::_stop_subroutine_entry            = generate_stop_subroutine();
4994     StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
4995 











4996     // Build this early so it's available for the interpreter.
4997     StubRoutines::_throw_StackOverflowError_entry =
4998             generate_throw_exception("StackOverflowError throw_exception",
4999             CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
5000     StubRoutines::_throw_delayed_StackOverflowError_entry =
5001             generate_throw_exception("delayed StackOverflowError throw_exception",
5002             CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError));
5003 
5004     if (UseCRC32Intrinsics) {
5005       // set table address before stub generation which use it
5006       StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
5007       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
5008     }
5009 
5010     if (UseCRC32CIntrinsics) {
5011       // set table address before stub generation which use it
5012       StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
5013       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
5014     }
5015   }

5083     _stub_count = !all ? 0x100 : 0x200;
5084     if (all) {
5085       generate_all();
5086     } else {
5087       generate_initial();
5088     }
5089 
5090     // make sure this stub is available for all local calls
5091     if (_atomic_add_stub.is_unbound()) {
5092       // generate a second time, if necessary
5093       (void) generate_atomic_add();
5094     }
5095   }
5096 
5097 
5098  private:
5099   int _stub_count;
5100   void stub_prolog(StubCodeDesc* cdesc) {
5101     # ifdef ASSERT
5102       // put extra information in the stub code, to make it more readable

5103 // Write the high part of the address
5104 // [RGV] Check if there is a dependency on the size of this prolog
5105       __ emit_data((intptr_t)cdesc >> 32,    relocInfo::none);

5106       __ emit_data((intptr_t)cdesc,    relocInfo::none);
5107       __ emit_data(++_stub_count, relocInfo::none);
5108     # endif
5109     align(true);
5110   }
5111 
5112   void align(bool at_header = false) {
5113     // %%%%% move this constant somewhere else
5114     // UltraSPARC cache line size is 8 instructions:
5115     const unsigned int icache_line_size = 32;
5116     const unsigned int icache_half_line_size = 16;
5117 
5118     if (at_header) {
5119       while ((intptr_t)(__ pc()) % icache_line_size != 0) {
5120         __ emit_data(0, relocInfo::none);
5121       }
5122     } else {
5123       while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
5124         __ nop();
5125       }

< prev index next >