hotspot Sdiff src/cpu/sparc/vm

src/cpu/sparc/vm/stubGenerator_sparc.cpp

   1 /*
   2  * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *

 199       __ br(Assembler::greater, false, Assembler::pt, loop);
 200       __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
 201 
 202       // done
 203       __ BIND(exit);
 204     }
 205 
 206     // setup parameters, method & call Java function
 207 #ifdef ASSERT
 208     // layout_activation_impl checks it's notion of saved SP against
 209     // this register, so if this changes update it as well.
 210     const Register saved_SP = Lscratch;
 211     __ mov(SP, saved_SP);                               // keep track of SP before call
 212 #endif
 213 
 214     // setup parameters
 215     const Register t = G3_scratch;
 216     __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
 217     __ sll(t, Interpreter::logStackElementSize, t);    // compute number of bytes
 218     __ sub(FP, t, Gargs);                              // setup parameter pointer
 219 #ifdef _LP64
 220     __ add( Gargs, STACK_BIAS, Gargs );                // Account for LP64 stack bias
 221 #endif
 222     __ mov(SP, O5_savedSP);
 223 
 224 
 225     // do the call
 226     //
 227     // the following register must be setup:
 228     //
 229     // G2_thread
 230     // G5_method
 231     // Gargs
 232     BLOCK_COMMENT("call Java function");
 233     __ jmpl(entry_point.as_in().as_register(), G0, O7);
 234     __ delayed()->mov(method.as_in().as_register(), G5_method);   // setup method
 235 
 236     BLOCK_COMMENT("call_stub_return_address:");
 237     return_pc = __ pc();
 238 
 239     // The callee, if it wasn't interpreted, can return with SP changed so
 240     // we can no longer assert of change of SP.
 241

 254       // store int result
 255       __ st(O0, addr, G0);
 256 
 257       __ BIND(exit);
 258       __ ret();
 259       __ delayed()->restore();
 260 
 261       __ BIND(is_object);
 262       __ ba(exit);
 263       __ delayed()->st_ptr(O0, addr, G0);
 264 
 265       __ BIND(is_float);
 266       __ ba(exit);
 267       __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
 268 
 269       __ BIND(is_double);
 270       __ ba(exit);
 271       __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
 272 
 273       __ BIND(is_long);
 274 #ifdef _LP64
 275       __ ba(exit);
 276       __ delayed()->st_long(O0, addr, G0);      // store entire long
 277 #else
 278 #if defined(COMPILER2)
 279   // All return values are where we want them, except for Longs.  C2 returns
 280   // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
 281   // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
 282   // build we simply always use G1.
 283   // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
 284   // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
 285   // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
 286 
 287       __ ba(exit);
 288       __ delayed()->stx(G1, addr, G0);  // store entire long
 289 #else
 290       __ st(O1, addr, BytesPerInt);
 291       __ ba(exit);
 292       __ delayed()->st(O0, addr, G0);
 293 #endif /* COMPILER2 */
 294 #endif /* _LP64 */
 295      }
 296      return start;
 297   }
 298 
 299 
 300   //----------------------------------------------------------------------------------------------------
 301   // Return point for a Java call if there's an exception thrown in Java code.
 302   // The exception is caught and transformed into a pending exception stored in
 303   // JavaThread that can be tested from within the VM.
 304   //
 305   // Oexception: exception oop
 306 
 307   address generate_catch_exception() {
 308     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 309 
 310     address start = __ pc();
 311     // verify that thread corresponds
 312     __ verify_thread();
 313 
 314     const Register& temp_reg = Gtemp;

 729 
 730     return start;
 731   }
 732   Label _atomic_add_stub;  // called from other stubs
 733 
 734 
 735   // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
 736   // Arguments :
 737   //
 738   //      ret  : O0, returned
 739   //      icc/xcc: set as O0 (depending on wordSize)
 740   //      sub  : O1, argument, not changed
 741   //      super: O2, argument, not changed
 742   //      raddr: O7, blown by call
 743   address generate_partial_subtype_check() {
 744     __ align(CodeEntryAlignment);
 745     StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
 746     address start = __ pc();
 747     Label miss;
 748 
 749 #if defined(COMPILER2) && !defined(_LP64)
 750     // Do not use a 'save' because it blows the 64-bit O registers.
 751     __ add(SP,-4*wordSize,SP);  // Make space for 4 temps (stack must be 2 words aligned)
 752     __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize);
 753     __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize);
 754     __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize);
 755     __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize);
 756     Register Rret   = O0;
 757     Register Rsub   = O1;
 758     Register Rsuper = O2;
 759 #else
 760     __ save_frame(0);
 761     Register Rret   = I0;
 762     Register Rsub   = I1;
 763     Register Rsuper = I2;
 764 #endif
 765 
 766     Register L0_ary_len = L0;
 767     Register L1_ary_ptr = L1;
 768     Register L2_super   = L2;
 769     Register L3_index   = L3;
 770 
 771     __ check_klass_subtype_slow_path(Rsub, Rsuper,
 772                                      L0, L1, L2, L3,
 773                                      NULL, &miss);
 774 
 775     // Match falls through here.
 776     __ addcc(G0,0,Rret);        // set Z flags, Z result
 777 
 778 #if defined(COMPILER2) && !defined(_LP64)
 779     __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
 780     __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
 781     __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
 782     __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
 783     __ retl();                  // Result in Rret is zero; flags set to Z
 784     __ delayed()->add(SP,4*wordSize,SP);
 785 #else
 786     __ ret();                   // Result in Rret is zero; flags set to Z
 787     __ delayed()->restore();
 788 #endif
 789 
 790     __ BIND(miss);
 791     __ addcc(G0,1,Rret);        // set NZ flags, NZ result
 792 
 793 #if defined(COMPILER2) && !defined(_LP64)
 794     __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
 795     __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
 796     __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
 797     __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
 798     __ retl();                  // Result in Rret is != 0; flags set to NZ
 799     __ delayed()->add(SP,4*wordSize,SP);
 800 #else
 801     __ ret();                   // Result in Rret is != 0; flags set to NZ
 802     __ delayed()->restore();
 803 #endif
 804 
 805     return start;
 806   }
 807 
 808 
 809   // Called from MacroAssembler::verify_oop
 810   //
 811   address generate_verify_oop_subroutine() {
 812     StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
 813 
 814     address start = __ pc();
 815 
 816     __ verify_oop_subroutine();
 817 
 818     return start;
 819   }
 820 
 821 
 822   //
 823   // Verify that a register contains clean 32-bits positive value
 824   // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
 825   //
 826   //  Input:
 827   //    Rint  -  32-bits value
 828   //    Rtmp  -  scratch
 829   //
 830   void assert_clean_int(Register Rint, Register Rtmp) {
 831 #if defined(ASSERT) && defined(_LP64)
 832     __ signx(Rint, Rtmp);
 833     __ cmp(Rint, Rtmp);
 834     __ breakpoint_trap(Assembler::notEqual, Assembler::xcc);
 835 #endif
 836   }
 837 
 838   //
 839   //  Generate overlap test for array copy stubs
 840   //
 841   //  Input:
 842   //    O0    -  array1
 843   //    O1    -  array2
 844   //    O2    -  element count
 845   //
 846   //  Kills temps:  O3, O4
 847   //
 848   void array_overlap_test(address no_overlap_target, int log2_elem_size) {
 849     assert(no_overlap_target != NULL, "must be generated");
 850     array_overlap_test(no_overlap_target, NULL, log2_elem_size);
 851   }
 852   void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
 853     array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
 854   }
 855   void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {

1252 
1253     if (entry != NULL) {
1254       *entry = __ pc();
1255       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1256       BLOCK_COMMENT("Entry:");
1257     }
1258 
1259     // for short arrays, just do single element copy
1260     __ cmp(count, 23); // 16 + 7
1261     __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1262     __ delayed()->mov(G0, offset);
1263 
1264     if (aligned) {
1265       // 'aligned' == true when it is known statically during compilation
1266       // of this arraycopy call site that both 'from' and 'to' addresses
1267       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1268       //
1269       // Aligned arrays have 4 bytes alignment in 32-bits VM
1270       // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
1271       //
1272 #ifndef _LP64
1273       // copy a 4-bytes word if necessary to align 'to' to 8 bytes
1274       __ andcc(to, 7, G0);
1275       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment);
1276       __ delayed()->ld(from, 0, O3);
1277       __ inc(from, 4);
1278       __ inc(to, 4);
1279       __ dec(count, 4);
1280       __ st(O3, to, -4);
1281     __ BIND(L_skip_alignment);
1282 #endif
1283     } else {
1284       // copy bytes to align 'to' on 8 byte boundary
1285       __ andcc(to, 7, G1); // misaligned bytes
1286       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1287       __ delayed()->neg(G1);
1288       __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
1289       __ sub(count, G1, count);
1290     __ BIND(L_align);
1291       __ ldub(from, 0, O3);
1292       __ deccc(G1);
1293       __ inc(from);
1294       __ stb(O3, to, 0);
1295       __ br(Assembler::notZero, false, Assembler::pt, L_align);
1296       __ delayed()->inc(to);
1297     __ BIND(L_skip_alignment);
1298     }
1299 #ifdef _LP64
1300     if (!aligned)
1301 #endif
1302     {
1303       // Copy with shift 16 bytes per iteration if arrays do not have
1304       // the same alignment mod 8, otherwise fall through to the next
1305       // code for aligned copy.
1306       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1307       // Also jump over aligned copy after the copy with shift completed.
1308 
1309       copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
1310     }
1311 
1312     // Both array are 8 bytes aligned, copy 16 bytes at a time
1313       __ and3(count, 7, G4); // Save count
1314       __ srl(count, 3, count);
1315      generate_disjoint_long_copy_core(aligned);
1316       __ mov(G4, count);     // Restore count
1317 
1318     // copy tailing bytes
1319     __ BIND(L_copy_byte);
1320       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1321       __ align(OptoLoopAlignment);

1378     __ delayed()->add(from, count, end_from);
1379 
1380     {
1381       // Align end of arrays since they could be not aligned even
1382       // when arrays itself are aligned.
1383 
1384       // copy bytes to align 'end_to' on 8 byte boundary
1385       __ andcc(end_to, 7, G1); // misaligned bytes
1386       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1387       __ delayed()->nop();
1388       __ sub(count, G1, count);
1389     __ BIND(L_align);
1390       __ dec(end_from);
1391       __ dec(end_to);
1392       __ ldub(end_from, 0, O3);
1393       __ deccc(G1);
1394       __ brx(Assembler::notZero, false, Assembler::pt, L_align);
1395       __ delayed()->stb(O3, end_to, 0);
1396     __ BIND(L_skip_alignment);
1397     }
1398 #ifdef _LP64
1399     if (aligned) {
1400       // Both arrays are aligned to 8-bytes in 64-bits VM.
1401       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1402       // in unaligned case.
1403       __ dec(count, 16);
1404     } else
1405 #endif
1406     {
1407       // Copy with shift 16 bytes per iteration if arrays do not have
1408       // the same alignment mod 8, otherwise jump to the next
1409       // code for aligned copy (and substracting 16 from 'count' before jump).
1410       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1411       // Also jump over aligned copy after the copy with shift completed.
1412 
1413       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1414                                         L_aligned_copy, L_copy_byte);
1415     }
1416     // copy 4 elements (16 bytes) at a time
1417       __ align(OptoLoopAlignment);
1418     __ BIND(L_aligned_copy);
1419       __ dec(end_from, 16);
1420       __ ldx(end_from, 8, O3);
1421       __ ldx(end_from, 0, O4);
1422       __ dec(end_to, 16);
1423       __ deccc(count, 16);
1424       __ stx(O3, end_to, 8);
1425       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);

1473 
1474     if (entry != NULL) {
1475       *entry = __ pc();
1476       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1477       BLOCK_COMMENT("Entry:");
1478     }
1479 
1480     // for short arrays, just do single element copy
1481     __ cmp(count, 11); // 8 + 3  (22 bytes)
1482     __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1483     __ delayed()->mov(G0, offset);
1484 
1485     if (aligned) {
1486       // 'aligned' == true when it is known statically during compilation
1487       // of this arraycopy call site that both 'from' and 'to' addresses
1488       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1489       //
1490       // Aligned arrays have 4 bytes alignment in 32-bits VM
1491       // and 8 bytes - in 64-bits VM.
1492       //
1493 #ifndef _LP64
1494       // copy a 2-elements word if necessary to align 'to' to 8 bytes
1495       __ andcc(to, 7, G0);
1496       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1497       __ delayed()->ld(from, 0, O3);
1498       __ inc(from, 4);
1499       __ inc(to, 4);
1500       __ dec(count, 2);
1501       __ st(O3, to, -4);
1502     __ BIND(L_skip_alignment);
1503 #endif
1504     } else {
1505       // copy 1 element if necessary to align 'to' on an 4 bytes
1506       __ andcc(to, 3, G0);
1507       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1508       __ delayed()->lduh(from, 0, O3);
1509       __ inc(from, 2);
1510       __ inc(to, 2);
1511       __ dec(count);
1512       __ sth(O3, to, -2);
1513     __ BIND(L_skip_alignment);
1514 
1515       // copy 2 elements to align 'to' on an 8 byte boundary
1516       __ andcc(to, 7, G0);
1517       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1518       __ delayed()->lduh(from, 0, O3);
1519       __ dec(count, 2);
1520       __ lduh(from, 2, O4);
1521       __ inc(from, 4);
1522       __ inc(to, 4);
1523       __ sth(O3, to, -4);
1524       __ sth(O4, to, -2);
1525     __ BIND(L_skip_alignment2);
1526     }
1527 #ifdef _LP64
1528     if (!aligned)
1529 #endif
1530     {
1531       // Copy with shift 16 bytes per iteration if arrays do not have
1532       // the same alignment mod 8, otherwise fall through to the next
1533       // code for aligned copy.
1534       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1535       // Also jump over aligned copy after the copy with shift completed.
1536 
1537       copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
1538     }
1539 
1540     // Both array are 8 bytes aligned, copy 16 bytes at a time
1541       __ and3(count, 3, G4); // Save
1542       __ srl(count, 2, count);
1543      generate_disjoint_long_copy_core(aligned);
1544       __ mov(G4, count); // restore
1545 
1546     // copy 1 element at a time
1547     __ BIND(L_copy_2_bytes);
1548       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1549       __ align(OptoLoopAlignment);

1626       // align source address at 4 bytes address boundary
1627       if (t == T_BYTE) {
1628         // One byte misalignment happens only for byte arrays
1629         __ andcc(to, 1, G0);
1630         __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
1631         __ delayed()->nop();
1632         __ stb(value, to, 0);
1633         __ inc(to, 1);
1634         __ dec(count, 1);
1635         __ BIND(L_skip_align1);
1636       }
1637       // Two bytes misalignment happens only for byte and short (char) arrays
1638       __ andcc(to, 2, G0);
1639       __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
1640       __ delayed()->nop();
1641       __ sth(value, to, 0);
1642       __ inc(to, 2);
1643       __ dec(count, 1 << (shift - 1));
1644       __ BIND(L_skip_align2);
1645     }
1646 #ifdef _LP64
1647     if (!aligned) {
1648 #endif
1649     // align to 8 bytes, we know we are 4 byte aligned to start
1650     __ andcc(to, 7, G0);
1651     __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
1652     __ delayed()->nop();
1653     __ stw(value, to, 0);
1654     __ inc(to, 4);
1655     __ dec(count, 1 << shift);
1656     __ BIND(L_fill_32_bytes);
1657 #ifdef _LP64
1658     }
1659 #endif
1660 
1661     if (t == T_INT) {
1662       // Zero extend value
1663       __ srl(value, 0, value);
1664     }
1665     if (t == T_BYTE || t == T_SHORT || t == T_INT) {
1666       __ sllx(value, 32, O3);
1667       __ or3(value, O3, value);
1668     }
1669 
1670     Label L_check_fill_8_bytes;
1671     // Fill 32-byte chunks
1672     __ subcc(count, 8 << shift, count);
1673     __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
1674     __ delayed()->nop();
1675 
1676     Label L_fill_32_bytes_loop, L_fill_4_bytes;
1677     __ align(16);
1678     __ BIND(L_fill_32_bytes_loop);
1679

1840       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1841       __ delayed()->lduh(end_from, -2, O3);
1842       __ dec(end_from, 2);
1843       __ dec(end_to, 2);
1844       __ dec(count);
1845       __ sth(O3, end_to, 0);
1846     __ BIND(L_skip_alignment);
1847 
1848       // copy 2 elements to align 'end_to' on an 8 byte boundary
1849       __ andcc(end_to, 7, G0);
1850       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1851       __ delayed()->lduh(end_from, -2, O3);
1852       __ dec(count, 2);
1853       __ lduh(end_from, -4, O4);
1854       __ dec(end_from, 4);
1855       __ dec(end_to, 4);
1856       __ sth(O3, end_to, 2);
1857       __ sth(O4, end_to, 0);
1858     __ BIND(L_skip_alignment2);
1859     }
1860 #ifdef _LP64
1861     if (aligned) {
1862       // Both arrays are aligned to 8-bytes in 64-bits VM.
1863       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1864       // in unaligned case.
1865       __ dec(count, 8);
1866     } else
1867 #endif
1868     {
1869       // Copy with shift 16 bytes per iteration if arrays do not have
1870       // the same alignment mod 8, otherwise jump to the next
1871       // code for aligned copy (and substracting 8 from 'count' before jump).
1872       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1873       // Also jump over aligned copy after the copy with shift completed.
1874 
1875       copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1876                                         L_aligned_copy, L_copy_2_bytes);
1877     }
1878     // copy 4 elements (16 bytes) at a time
1879       __ align(OptoLoopAlignment);
1880     __ BIND(L_aligned_copy);
1881       __ dec(end_from, 16);
1882       __ ldx(end_from, 8, O3);
1883       __ ldx(end_from, 0, O4);
1884       __ dec(end_to, 16);
1885       __ deccc(count, 8);
1886       __ stx(O3, end_to, 8);
1887       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);

1957   //      count: O2 treated as signed
1958   //
1959   void generate_disjoint_int_copy_core(bool aligned) {
1960 
1961     Label L_skip_alignment, L_aligned_copy;
1962     Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
1963 
1964     const Register from      = O0;   // source array address
1965     const Register to        = O1;   // destination array address
1966     const Register count     = O2;   // elements count
1967     const Register offset    = O5;   // offset from start of arrays
1968     // O3, O4, G3, G4 are used as temp registers
1969 
1970     // 'aligned' == true when it is known statically during compilation
1971     // of this arraycopy call site that both 'from' and 'to' addresses
1972     // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1973     //
1974     // Aligned arrays have 4 bytes alignment in 32-bits VM
1975     // and 8 bytes - in 64-bits VM.
1976     //
1977 #ifdef _LP64
1978     if (!aligned)
1979 #endif
1980     {
1981       // The next check could be put under 'ifndef' since the code in
1982       // generate_disjoint_long_copy_core() has own checks and set 'offset'.
1983 
1984       // for short arrays, just do single element copy
1985       __ cmp(count, 5); // 4 + 1 (20 bytes)
1986       __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
1987       __ delayed()->mov(G0, offset);
1988 
1989       // copy 1 element to align 'to' on an 8 byte boundary
1990       __ andcc(to, 7, G0);
1991       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1992       __ delayed()->ld(from, 0, O3);
1993       __ inc(from, 4);
1994       __ inc(to, 4);
1995       __ dec(count);
1996       __ st(O3, to, -4);
1997     __ BIND(L_skip_alignment);
1998 
1999     // if arrays have same alignment mod 8, do 4 elements copy

2446     const Register from  = O0;  // source array address
2447     const Register to    = O1;  // destination array address
2448     const Register count = O2;  // elements count
2449 
2450     __ align(CodeEntryAlignment);
2451     StubCodeMark mark(this, "StubRoutines", name);
2452     address start = __ pc();
2453 
2454     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2455 
2456     if (entry != NULL) {
2457       *entry = __ pc();
2458       // caller can pass a 64-bit byte count here
2459       BLOCK_COMMENT("Entry:");
2460     }
2461 
2462     // save arguments for barrier generation
2463     __ mov(to, G1);
2464     __ mov(count, G5);
2465     gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2466   #ifdef _LP64
2467     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2468     if (UseCompressedOops) {
2469       generate_disjoint_int_copy_core(aligned);
2470     } else {
2471       generate_disjoint_long_copy_core(aligned);
2472     }
2473   #else
2474     generate_disjoint_int_copy_core(aligned);
2475   #endif
2476     // O0 is used as temp register
2477     gen_write_ref_array_post_barrier(G1, G5, O0);
2478 
2479     // O3, O4 are used as temp registers
2480     inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2481     __ retl();
2482     __ delayed()->mov(G0, O0); // return 0
2483     return start;
2484   }
2485 
2486   //  Generate stub for conjoint oop copy.  If "aligned" is true, the
2487   //  "from" and "to" addresses are assumed to be heapword aligned.
2488   //
2489   // Arguments for generated stub:
2490   //      from:  O0
2491   //      to:    O1
2492   //      count: O2 treated as signed
2493   //
2494   address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
2495                                      address *entry, const char *name,

2501 
2502     __ align(CodeEntryAlignment);
2503     StubCodeMark mark(this, "StubRoutines", name);
2504     address start = __ pc();
2505 
2506     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2507 
2508     if (entry != NULL) {
2509       *entry = __ pc();
2510       // caller can pass a 64-bit byte count here
2511       BLOCK_COMMENT("Entry:");
2512     }
2513 
2514     array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
2515 
2516     // save arguments for barrier generation
2517     __ mov(to, G1);
2518     __ mov(count, G5);
2519     gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2520 
2521   #ifdef _LP64
2522     if (UseCompressedOops) {
2523       generate_conjoint_int_copy_core(aligned);
2524     } else {
2525       generate_conjoint_long_copy_core(aligned);
2526     }
2527   #else
2528     generate_conjoint_int_copy_core(aligned);
2529   #endif
2530 
2531     // O0 is used as temp register
2532     gen_write_ref_array_post_barrier(G1, G5, O0);
2533 
2534     // O3, O4 are used as temp registers
2535     inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2536     __ retl();
2537     __ delayed()->mov(G0, O0); // return 0
2538     return start;
2539   }
2540 
2541 
2542   // Helper for generating a dynamic type check.
2543   // Smashes only the given temp registers.
2544   void generate_type_check(Register sub_klass,
2545                            Register super_check_offset,
2546                            Register super_klass,
2547                            Register temp,
2548                            Label& L_success) {
2549     assert_different_registers(sub_klass, super_check_offset, super_klass, temp);

3121                                                                                   "arrayof_jbyte_arraycopy");
3122 
3123     //*** jshort
3124     // Always need aligned and unaligned versions
3125     StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
3126                                                                                     "jshort_disjoint_arraycopy");
3127     StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
3128                                                                                     &entry_jshort_arraycopy,
3129                                                                                     "jshort_arraycopy");
3130     StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
3131                                                                                     "arrayof_jshort_disjoint_arraycopy");
3132     StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
3133                                                                                     "arrayof_jshort_arraycopy");
3134 
3135     //*** jint
3136     // Aligned versions
3137     StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
3138                                                                                 "arrayof_jint_disjoint_arraycopy");
3139     StubRoutines::_arrayof_jint_arraycopy          = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
3140                                                                                 "arrayof_jint_arraycopy");
3141 #ifdef _LP64
3142     // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
3143     // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
3144     StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_copy(false, &entry,
3145                                                                                 "jint_disjoint_arraycopy");
3146     StubRoutines::_jint_arraycopy                  = generate_conjoint_int_copy(false, entry,
3147                                                                                 &entry_jint_arraycopy,
3148                                                                                 "jint_arraycopy");
3149 #else
3150     // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version
3151     // (in fact in 32bit we always have a pre-loop part even in the aligned version,
3152     //  because it uses 64-bit loads/stores, so the aligned flag is actually ignored).
3153     StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy;
3154     StubRoutines::_jint_arraycopy          = StubRoutines::_arrayof_jint_arraycopy;
3155 #endif
3156 
3157 
3158     //*** jlong
3159     // It is always aligned
3160     StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
3161                                                                                   "arrayof_jlong_disjoint_arraycopy");
3162     StubRoutines::_arrayof_jlong_arraycopy          = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
3163                                                                                   "arrayof_jlong_arraycopy");
3164     StubRoutines::_jlong_disjoint_arraycopy         = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
3165     StubRoutines::_jlong_arraycopy                  = StubRoutines::_arrayof_jlong_arraycopy;
3166 
3167 
3168     //*** oops
3169     // Aligned versions
3170     StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy(true, &entry,
3171                                                                                       "arrayof_oop_disjoint_arraycopy");
3172     StubRoutines::_arrayof_oop_arraycopy                 = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
3173                                                                                       "arrayof_oop_arraycopy");
3174     // Aligned versions without pre-barriers
3175     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
3176                                                                                       "arrayof_oop_disjoint_arraycopy_uninit",
3177                                                                                       /*dest_uninitialized*/true);
3178     StubRoutines::_arrayof_oop_arraycopy_uninit          = generate_conjoint_oop_copy(true, entry, NULL,
3179                                                                                       "arrayof_oop_arraycopy_uninit",
3180                                                                                       /*dest_uninitialized*/true);
3181 #ifdef _LP64
3182     if (UseCompressedOops) {
3183       // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
3184       StubRoutines::_oop_disjoint_arraycopy            = generate_disjoint_oop_copy(false, &entry,
3185                                                                                     "oop_disjoint_arraycopy");
3186       StubRoutines::_oop_arraycopy                     = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
3187                                                                                     "oop_arraycopy");
3188       // Unaligned versions without pre-barriers
3189       StubRoutines::_oop_disjoint_arraycopy_uninit     = generate_disjoint_oop_copy(false, &entry,
3190                                                                                     "oop_disjoint_arraycopy_uninit",
3191                                                                                     /*dest_uninitialized*/true);
3192       StubRoutines::_oop_arraycopy_uninit              = generate_conjoint_oop_copy(false, entry, NULL,
3193                                                                                     "oop_arraycopy_uninit",
3194                                                                                     /*dest_uninitialized*/true);
3195     } else
3196 #endif
3197     {
3198       // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
3199       StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
3200       StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
3201       StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
3202       StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
3203     }
3204 
3205     StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
3206     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
3207                                                                         /*dest_uninitialized*/true);
3208 
3209     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
3210                                                               entry_jbyte_arraycopy,
3211                                                               entry_jshort_arraycopy,
3212                                                               entry_jint_arraycopy,
3213                                                               entry_jlong_arraycopy);
3214     StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
3215                                                                entry_jbyte_arraycopy,
3216                                                                entry_jshort_arraycopy,

5087 
5088   void generate_initial() {
5089     // Generates all stubs and initializes the entry points
5090 
5091     //------------------------------------------------------------------------------------------------------------------------
5092     // entry points that exist in all platforms
5093     // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
5094     //       the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
5095     StubRoutines::_forward_exception_entry                 = generate_forward_exception();
5096 
5097     StubRoutines::_call_stub_entry                         = generate_call_stub(StubRoutines::_call_stub_return_address);
5098     StubRoutines::_catch_exception_entry                   = generate_catch_exception();
5099 
5100     //------------------------------------------------------------------------------------------------------------------------
5101     // entry points that are platform specific
5102     StubRoutines::Sparc::_test_stop_entry                  = generate_test_stop();
5103 
5104     StubRoutines::Sparc::_stop_subroutine_entry            = generate_stop_subroutine();
5105     StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
5106 
5107 #if !defined(COMPILER2) && !defined(_LP64)
5108     StubRoutines::_atomic_xchg_entry         = generate_atomic_xchg();
5109     StubRoutines::_atomic_cmpxchg_entry      = generate_atomic_cmpxchg();
5110     StubRoutines::_atomic_add_entry          = generate_atomic_add();
5111     StubRoutines::_atomic_xchg_ptr_entry     = StubRoutines::_atomic_xchg_entry;
5112     StubRoutines::_atomic_cmpxchg_ptr_entry  = StubRoutines::_atomic_cmpxchg_entry;
5113     StubRoutines::_atomic_cmpxchg_byte_entry = ShouldNotCallThisStub();
5114     StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
5115     StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
5116 #endif  // COMPILER2 !=> _LP64
5117 
5118     // Build this early so it's available for the interpreter.
5119     StubRoutines::_throw_StackOverflowError_entry =
5120             generate_throw_exception("StackOverflowError throw_exception",
5121             CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
5122     StubRoutines::_throw_delayed_StackOverflowError_entry =
5123             generate_throw_exception("delayed StackOverflowError throw_exception",
5124             CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError));
5125 
5126     if (UseCRC32Intrinsics) {
5127       // set table address before stub generation which use it
5128       StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
5129       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
5130     }
5131 
5132     if (UseCRC32CIntrinsics) {
5133       // set table address before stub generation which use it
5134       StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
5135       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
5136     }
5137   }

5205     _stub_count = !all ? 0x100 : 0x200;
5206     if (all) {
5207       generate_all();
5208     } else {
5209       generate_initial();
5210     }
5211 
5212     // make sure this stub is available for all local calls
5213     if (_atomic_add_stub.is_unbound()) {
5214       // generate a second time, if necessary
5215       (void) generate_atomic_add();
5216     }
5217   }
5218 
5219 
5220  private:
5221   int _stub_count;
5222   void stub_prolog(StubCodeDesc* cdesc) {
5223     # ifdef ASSERT
5224       // put extra information in the stub code, to make it more readable
5225 #ifdef _LP64
5226 // Write the high part of the address
5227 // [RGV] Check if there is a dependency on the size of this prolog
5228       __ emit_data((intptr_t)cdesc >> 32,    relocInfo::none);
5229 #endif
5230       __ emit_data((intptr_t)cdesc,    relocInfo::none);
5231       __ emit_data(++_stub_count, relocInfo::none);
5232     # endif
5233     align(true);
5234   }
5235 
5236   void align(bool at_header = false) {
5237     // %%%%% move this constant somewhere else
5238     // UltraSPARC cache line size is 8 instructions:
5239     const unsigned int icache_line_size = 32;
5240     const unsigned int icache_half_line_size = 16;
5241 
5242     if (at_header) {
5243       while ((intptr_t)(__ pc()) % icache_line_size != 0) {
5244         __ emit_data(0, relocInfo::none);
5245       }
5246     } else {
5247       while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
5248         __ nop();
5249       }

   1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *

 199       __ br(Assembler::greater, false, Assembler::pt, loop);
 200       __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
 201 
 202       // done
 203       __ BIND(exit);
 204     }
 205 
 206     // setup parameters, method & call Java function
 207 #ifdef ASSERT
 208     // layout_activation_impl checks it's notion of saved SP against
 209     // this register, so if this changes update it as well.
 210     const Register saved_SP = Lscratch;
 211     __ mov(SP, saved_SP);                               // keep track of SP before call
 212 #endif
 213 
 214     // setup parameters
 215     const Register t = G3_scratch;
 216     __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
 217     __ sll(t, Interpreter::logStackElementSize, t);    // compute number of bytes
 218     __ sub(FP, t, Gargs);                              // setup parameter pointer

 219     __ add( Gargs, STACK_BIAS, Gargs );                // Account for LP64 stack bias

 220     __ mov(SP, O5_savedSP);
 221 
 222 
 223     // do the call
 224     //
 225     // the following register must be setup:
 226     //
 227     // G2_thread
 228     // G5_method
 229     // Gargs
 230     BLOCK_COMMENT("call Java function");
 231     __ jmpl(entry_point.as_in().as_register(), G0, O7);
 232     __ delayed()->mov(method.as_in().as_register(), G5_method);   // setup method
 233 
 234     BLOCK_COMMENT("call_stub_return_address:");
 235     return_pc = __ pc();
 236 
 237     // The callee, if it wasn't interpreted, can return with SP changed so
 238     // we can no longer assert of change of SP.
 239

 252       // store int result
 253       __ st(O0, addr, G0);
 254 
 255       __ BIND(exit);
 256       __ ret();
 257       __ delayed()->restore();
 258 
 259       __ BIND(is_object);
 260       __ ba(exit);
 261       __ delayed()->st_ptr(O0, addr, G0);
 262 
 263       __ BIND(is_float);
 264       __ ba(exit);
 265       __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
 266 
 267       __ BIND(is_double);
 268       __ ba(exit);
 269       __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
 270 
 271       __ BIND(is_long);

 272       __ ba(exit);
 273       __ delayed()->st_long(O0, addr, G0);      // store entire long


















 274      }
 275      return start;
 276   }
 277 
 278 
 279   //----------------------------------------------------------------------------------------------------
 280   // Return point for a Java call if there's an exception thrown in Java code.
 281   // The exception is caught and transformed into a pending exception stored in
 282   // JavaThread that can be tested from within the VM.
 283   //
 284   // Oexception: exception oop
 285 
 286   address generate_catch_exception() {
 287     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 288 
 289     address start = __ pc();
 290     // verify that thread corresponds
 291     __ verify_thread();
 292 
 293     const Register& temp_reg = Gtemp;

 708 
 709     return start;
 710   }
 711   Label _atomic_add_stub;  // called from other stubs
 712 
 713 
 714   // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
 715   // Arguments :
 716   //
 717   //      ret  : O0, returned
 718   //      icc/xcc: set as O0 (depending on wordSize)
 719   //      sub  : O1, argument, not changed
 720   //      super: O2, argument, not changed
 721   //      raddr: O7, blown by call
 722   address generate_partial_subtype_check() {
 723     __ align(CodeEntryAlignment);
 724     StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
 725     address start = __ pc();
 726     Label miss;
 727 











 728     __ save_frame(0);
 729     Register Rret   = I0;
 730     Register Rsub   = I1;
 731     Register Rsuper = I2;

 732 
 733     Register L0_ary_len = L0;
 734     Register L1_ary_ptr = L1;
 735     Register L2_super   = L2;
 736     Register L3_index   = L3;
 737 
 738     __ check_klass_subtype_slow_path(Rsub, Rsuper,
 739                                      L0, L1, L2, L3,
 740                                      NULL, &miss);
 741 
 742     // Match falls through here.
 743     __ addcc(G0,0,Rret);        // set Z flags, Z result
 744 








 745     __ ret();                   // Result in Rret is zero; flags set to Z
 746     __ delayed()->restore();

 747 
 748     __ BIND(miss);
 749     __ addcc(G0,1,Rret);        // set NZ flags, NZ result
 750 








 751     __ ret();                   // Result in Rret is != 0; flags set to NZ
 752     __ delayed()->restore();

 753 
 754     return start;
 755   }
 756 
 757 
 758   // Called from MacroAssembler::verify_oop
 759   //
 760   address generate_verify_oop_subroutine() {
 761     StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
 762 
 763     address start = __ pc();
 764 
 765     __ verify_oop_subroutine();
 766 
 767     return start;
 768   }
 769 
 770 
 771   //
 772   // Verify that a register contains clean 32-bits positive value
 773   // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
 774   //
 775   //  Input:
 776   //    Rint  -  32-bits value
 777   //    Rtmp  -  scratch
 778   //
 779   void assert_clean_int(Register Rint, Register Rtmp) {
 780   #if defined(ASSERT)
 781     __ signx(Rint, Rtmp);
 782     __ cmp(Rint, Rtmp);
 783     __ breakpoint_trap(Assembler::notEqual, Assembler::xcc);
 784   #endif
 785   }
 786 
 787   //
 788   //  Generate overlap test for array copy stubs
 789   //
 790   //  Input:
 791   //    O0    -  array1
 792   //    O1    -  array2
 793   //    O2    -  element count
 794   //
 795   //  Kills temps:  O3, O4
 796   //
 797   void array_overlap_test(address no_overlap_target, int log2_elem_size) {
 798     assert(no_overlap_target != NULL, "must be generated");
 799     array_overlap_test(no_overlap_target, NULL, log2_elem_size);
 800   }
 801   void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
 802     array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
 803   }
 804   void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {

1201 
1202     if (entry != NULL) {
1203       *entry = __ pc();
1204       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1205       BLOCK_COMMENT("Entry:");
1206     }
1207 
1208     // for short arrays, just do single element copy
1209     __ cmp(count, 23); // 16 + 7
1210     __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1211     __ delayed()->mov(G0, offset);
1212 
1213     if (aligned) {
1214       // 'aligned' == true when it is known statically during compilation
1215       // of this arraycopy call site that both 'from' and 'to' addresses
1216       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1217       //
1218       // Aligned arrays have 4 bytes alignment in 32-bits VM
1219       // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
1220       //











1221     } else {
1222       // copy bytes to align 'to' on 8 byte boundary
1223       __ andcc(to, 7, G1); // misaligned bytes
1224       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1225       __ delayed()->neg(G1);
1226       __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
1227       __ sub(count, G1, count);
1228     __ BIND(L_align);
1229       __ ldub(from, 0, O3);
1230       __ deccc(G1);
1231       __ inc(from);
1232       __ stb(O3, to, 0);
1233       __ br(Assembler::notZero, false, Assembler::pt, L_align);
1234       __ delayed()->inc(to);
1235     __ BIND(L_skip_alignment);
1236     }

1237     if (!aligned)

1238     {
1239       // Copy with shift 16 bytes per iteration if arrays do not have
1240       // the same alignment mod 8, otherwise fall through to the next
1241       // code for aligned copy.
1242       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1243       // Also jump over aligned copy after the copy with shift completed.
1244 
1245       copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
1246     }
1247 
1248     // Both array are 8 bytes aligned, copy 16 bytes at a time
1249       __ and3(count, 7, G4); // Save count
1250       __ srl(count, 3, count);
1251      generate_disjoint_long_copy_core(aligned);
1252       __ mov(G4, count);     // Restore count
1253 
1254     // copy tailing bytes
1255     __ BIND(L_copy_byte);
1256       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1257       __ align(OptoLoopAlignment);

1314     __ delayed()->add(from, count, end_from);
1315 
1316     {
1317       // Align end of arrays since they could be not aligned even
1318       // when arrays itself are aligned.
1319 
1320       // copy bytes to align 'end_to' on 8 byte boundary
1321       __ andcc(end_to, 7, G1); // misaligned bytes
1322       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1323       __ delayed()->nop();
1324       __ sub(count, G1, count);
1325     __ BIND(L_align);
1326       __ dec(end_from);
1327       __ dec(end_to);
1328       __ ldub(end_from, 0, O3);
1329       __ deccc(G1);
1330       __ brx(Assembler::notZero, false, Assembler::pt, L_align);
1331       __ delayed()->stb(O3, end_to, 0);
1332     __ BIND(L_skip_alignment);
1333     }

1334     if (aligned) {
1335       // Both arrays are aligned to 8-bytes in 64-bits VM.
1336       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1337       // in unaligned case.
1338       __ dec(count, 16);
1339     } else

1340     {
1341       // Copy with shift 16 bytes per iteration if arrays do not have
1342       // the same alignment mod 8, otherwise jump to the next
1343       // code for aligned copy (and substracting 16 from 'count' before jump).
1344       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1345       // Also jump over aligned copy after the copy with shift completed.
1346 
1347       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1348                                         L_aligned_copy, L_copy_byte);
1349     }
1350     // copy 4 elements (16 bytes) at a time
1351       __ align(OptoLoopAlignment);
1352     __ BIND(L_aligned_copy);
1353       __ dec(end_from, 16);
1354       __ ldx(end_from, 8, O3);
1355       __ ldx(end_from, 0, O4);
1356       __ dec(end_to, 16);
1357       __ deccc(count, 16);
1358       __ stx(O3, end_to, 8);
1359       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);

1407 
1408     if (entry != NULL) {
1409       *entry = __ pc();
1410       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1411       BLOCK_COMMENT("Entry:");
1412     }
1413 
1414     // for short arrays, just do single element copy
1415     __ cmp(count, 11); // 8 + 3  (22 bytes)
1416     __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1417     __ delayed()->mov(G0, offset);
1418 
1419     if (aligned) {
1420       // 'aligned' == true when it is known statically during compilation
1421       // of this arraycopy call site that both 'from' and 'to' addresses
1422       // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1423       //
1424       // Aligned arrays have 4 bytes alignment in 32-bits VM
1425       // and 8 bytes - in 64-bits VM.
1426       //











1427     } else {
1428       // copy 1 element if necessary to align 'to' on an 4 bytes
1429       __ andcc(to, 3, G0);
1430       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1431       __ delayed()->lduh(from, 0, O3);
1432       __ inc(from, 2);
1433       __ inc(to, 2);
1434       __ dec(count);
1435       __ sth(O3, to, -2);
1436     __ BIND(L_skip_alignment);
1437 
1438       // copy 2 elements to align 'to' on an 8 byte boundary
1439       __ andcc(to, 7, G0);
1440       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1441       __ delayed()->lduh(from, 0, O3);
1442       __ dec(count, 2);
1443       __ lduh(from, 2, O4);
1444       __ inc(from, 4);
1445       __ inc(to, 4);
1446       __ sth(O3, to, -4);
1447       __ sth(O4, to, -2);
1448     __ BIND(L_skip_alignment2);
1449     }

1450     if (!aligned)

1451     {
1452       // Copy with shift 16 bytes per iteration if arrays do not have
1453       // the same alignment mod 8, otherwise fall through to the next
1454       // code for aligned copy.
1455       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1456       // Also jump over aligned copy after the copy with shift completed.
1457 
1458       copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
1459     }
1460 
1461     // Both array are 8 bytes aligned, copy 16 bytes at a time
1462       __ and3(count, 3, G4); // Save
1463       __ srl(count, 2, count);
1464      generate_disjoint_long_copy_core(aligned);
1465       __ mov(G4, count); // restore
1466 
1467     // copy 1 element at a time
1468     __ BIND(L_copy_2_bytes);
1469       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1470       __ align(OptoLoopAlignment);

1547       // align source address at 4 bytes address boundary
1548       if (t == T_BYTE) {
1549         // One byte misalignment happens only for byte arrays
1550         __ andcc(to, 1, G0);
1551         __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
1552         __ delayed()->nop();
1553         __ stb(value, to, 0);
1554         __ inc(to, 1);
1555         __ dec(count, 1);
1556         __ BIND(L_skip_align1);
1557       }
1558       // Two bytes misalignment happens only for byte and short (char) arrays
1559       __ andcc(to, 2, G0);
1560       __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
1561       __ delayed()->nop();
1562       __ sth(value, to, 0);
1563       __ inc(to, 2);
1564       __ dec(count, 1 << (shift - 1));
1565       __ BIND(L_skip_align2);
1566     }

1567     if (!aligned) {

1568     // align to 8 bytes, we know we are 4 byte aligned to start
1569     __ andcc(to, 7, G0);
1570     __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
1571     __ delayed()->nop();
1572     __ stw(value, to, 0);
1573     __ inc(to, 4);
1574     __ dec(count, 1 << shift);
1575     __ BIND(L_fill_32_bytes);

1576     }

1577 
1578     if (t == T_INT) {
1579       // Zero extend value
1580       __ srl(value, 0, value);
1581     }
1582     if (t == T_BYTE || t == T_SHORT || t == T_INT) {
1583       __ sllx(value, 32, O3);
1584       __ or3(value, O3, value);
1585     }
1586 
1587     Label L_check_fill_8_bytes;
1588     // Fill 32-byte chunks
1589     __ subcc(count, 8 << shift, count);
1590     __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
1591     __ delayed()->nop();
1592 
1593     Label L_fill_32_bytes_loop, L_fill_4_bytes;
1594     __ align(16);
1595     __ BIND(L_fill_32_bytes_loop);
1596

1757       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1758       __ delayed()->lduh(end_from, -2, O3);
1759       __ dec(end_from, 2);
1760       __ dec(end_to, 2);
1761       __ dec(count);
1762       __ sth(O3, end_to, 0);
1763     __ BIND(L_skip_alignment);
1764 
1765       // copy 2 elements to align 'end_to' on an 8 byte boundary
1766       __ andcc(end_to, 7, G0);
1767       __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1768       __ delayed()->lduh(end_from, -2, O3);
1769       __ dec(count, 2);
1770       __ lduh(end_from, -4, O4);
1771       __ dec(end_from, 4);
1772       __ dec(end_to, 4);
1773       __ sth(O3, end_to, 2);
1774       __ sth(O4, end_to, 0);
1775     __ BIND(L_skip_alignment2);
1776     }

1777     if (aligned) {
1778       // Both arrays are aligned to 8-bytes in 64-bits VM.
1779       // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1780       // in unaligned case.
1781       __ dec(count, 8);
1782     } else

1783     {
1784       // Copy with shift 16 bytes per iteration if arrays do not have
1785       // the same alignment mod 8, otherwise jump to the next
1786       // code for aligned copy (and substracting 8 from 'count' before jump).
1787       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1788       // Also jump over aligned copy after the copy with shift completed.
1789 
1790       copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1791                                         L_aligned_copy, L_copy_2_bytes);
1792     }
1793     // copy 4 elements (16 bytes) at a time
1794       __ align(OptoLoopAlignment);
1795     __ BIND(L_aligned_copy);
1796       __ dec(end_from, 16);
1797       __ ldx(end_from, 8, O3);
1798       __ ldx(end_from, 0, O4);
1799       __ dec(end_to, 16);
1800       __ deccc(count, 8);
1801       __ stx(O3, end_to, 8);
1802       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);

1872   //      count: O2 treated as signed
1873   //
1874   void generate_disjoint_int_copy_core(bool aligned) {
1875 
1876     Label L_skip_alignment, L_aligned_copy;
1877     Label L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
1878 
1879     const Register from      = O0;   // source array address
1880     const Register to        = O1;   // destination array address
1881     const Register count     = O2;   // elements count
1882     const Register offset    = O5;   // offset from start of arrays
1883     // O3, O4, G3, G4 are used as temp registers
1884 
1885     // 'aligned' == true when it is known statically during compilation
1886     // of this arraycopy call site that both 'from' and 'to' addresses
1887     // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1888     //
1889     // Aligned arrays have 4 bytes alignment in 32-bits VM
1890     // and 8 bytes - in 64-bits VM.
1891     //

1892     if (!aligned)

1893     {
1894       // The next check could be put under 'ifndef' since the code in
1895       // generate_disjoint_long_copy_core() has own checks and set 'offset'.
1896 
1897       // for short arrays, just do single element copy
1898       __ cmp(count, 5); // 4 + 1 (20 bytes)
1899       __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
1900       __ delayed()->mov(G0, offset);
1901 
1902       // copy 1 element to align 'to' on an 8 byte boundary
1903       __ andcc(to, 7, G0);
1904       __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1905       __ delayed()->ld(from, 0, O3);
1906       __ inc(from, 4);
1907       __ inc(to, 4);
1908       __ dec(count);
1909       __ st(O3, to, -4);
1910     __ BIND(L_skip_alignment);
1911 
1912     // if arrays have same alignment mod 8, do 4 elements copy

2359     const Register from  = O0;  // source array address
2360     const Register to    = O1;  // destination array address
2361     const Register count = O2;  // elements count
2362 
2363     __ align(CodeEntryAlignment);
2364     StubCodeMark mark(this, "StubRoutines", name);
2365     address start = __ pc();
2366 
2367     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2368 
2369     if (entry != NULL) {
2370       *entry = __ pc();
2371       // caller can pass a 64-bit byte count here
2372       BLOCK_COMMENT("Entry:");
2373     }
2374 
2375     // save arguments for barrier generation
2376     __ mov(to, G1);
2377     __ mov(count, G5);
2378     gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);

2379     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2380     if (UseCompressedOops) {
2381       generate_disjoint_int_copy_core(aligned);
2382     } else {
2383       generate_disjoint_long_copy_core(aligned);
2384     }



2385     // O0 is used as temp register
2386     gen_write_ref_array_post_barrier(G1, G5, O0);
2387 
2388     // O3, O4 are used as temp registers
2389     inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2390     __ retl();
2391     __ delayed()->mov(G0, O0); // return 0
2392     return start;
2393   }
2394 
2395   //  Generate stub for conjoint oop copy.  If "aligned" is true, the
2396   //  "from" and "to" addresses are assumed to be heapword aligned.
2397   //
2398   // Arguments for generated stub:
2399   //      from:  O0
2400   //      to:    O1
2401   //      count: O2 treated as signed
2402   //
2403   address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
2404                                      address *entry, const char *name,

2410 
2411     __ align(CodeEntryAlignment);
2412     StubCodeMark mark(this, "StubRoutines", name);
2413     address start = __ pc();
2414 
2415     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2416 
2417     if (entry != NULL) {
2418       *entry = __ pc();
2419       // caller can pass a 64-bit byte count here
2420       BLOCK_COMMENT("Entry:");
2421     }
2422 
2423     array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
2424 
2425     // save arguments for barrier generation
2426     __ mov(to, G1);
2427     __ mov(count, G5);
2428     gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2429 

2430     if (UseCompressedOops) {
2431       generate_conjoint_int_copy_core(aligned);
2432     } else {
2433       generate_conjoint_long_copy_core(aligned);
2434     }



2435 
2436     // O0 is used as temp register
2437     gen_write_ref_array_post_barrier(G1, G5, O0);
2438 
2439     // O3, O4 are used as temp registers
2440     inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2441     __ retl();
2442     __ delayed()->mov(G0, O0); // return 0
2443     return start;
2444   }
2445 
2446 
2447   // Helper for generating a dynamic type check.
2448   // Smashes only the given temp registers.
2449   void generate_type_check(Register sub_klass,
2450                            Register super_check_offset,
2451                            Register super_klass,
2452                            Register temp,
2453                            Label& L_success) {
2454     assert_different_registers(sub_klass, super_check_offset, super_klass, temp);

3026                                                                                   "arrayof_jbyte_arraycopy");
3027 
3028     //*** jshort
3029     // Always need aligned and unaligned versions
3030     StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
3031                                                                                     "jshort_disjoint_arraycopy");
3032     StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
3033                                                                                     &entry_jshort_arraycopy,
3034                                                                                     "jshort_arraycopy");
3035     StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
3036                                                                                     "arrayof_jshort_disjoint_arraycopy");
3037     StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
3038                                                                                     "arrayof_jshort_arraycopy");
3039 
3040     //*** jint
3041     // Aligned versions
3042     StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
3043                                                                                 "arrayof_jint_disjoint_arraycopy");
3044     StubRoutines::_arrayof_jint_arraycopy          = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
3045                                                                                 "arrayof_jint_arraycopy");

3046     // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
3047     // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
3048     StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_copy(false, &entry,
3049                                                                                 "jint_disjoint_arraycopy");
3050     StubRoutines::_jint_arraycopy                  = generate_conjoint_int_copy(false, entry,
3051                                                                                 &entry_jint_arraycopy,
3052                                                                                 "jint_arraycopy");








3053 
3054     //*** jlong
3055     // It is always aligned
3056     StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
3057                                                                                   "arrayof_jlong_disjoint_arraycopy");
3058     StubRoutines::_arrayof_jlong_arraycopy          = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
3059                                                                                   "arrayof_jlong_arraycopy");
3060     StubRoutines::_jlong_disjoint_arraycopy         = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
3061     StubRoutines::_jlong_arraycopy                  = StubRoutines::_arrayof_jlong_arraycopy;
3062 
3063 
3064     //*** oops
3065     // Aligned versions
3066     StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy(true, &entry,
3067                                                                                       "arrayof_oop_disjoint_arraycopy");
3068     StubRoutines::_arrayof_oop_arraycopy                 = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
3069                                                                                       "arrayof_oop_arraycopy");
3070     // Aligned versions without pre-barriers
3071     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
3072                                                                                       "arrayof_oop_disjoint_arraycopy_uninit",
3073                                                                                       /*dest_uninitialized*/true);
3074     StubRoutines::_arrayof_oop_arraycopy_uninit          = generate_conjoint_oop_copy(true, entry, NULL,
3075                                                                                       "arrayof_oop_arraycopy_uninit",
3076                                                                                       /*dest_uninitialized*/true);

3077     if (UseCompressedOops) {
3078       // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
3079       StubRoutines::_oop_disjoint_arraycopy            = generate_disjoint_oop_copy(false, &entry,
3080                                                                                     "oop_disjoint_arraycopy");
3081       StubRoutines::_oop_arraycopy                     = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
3082                                                                                     "oop_arraycopy");
3083       // Unaligned versions without pre-barriers
3084       StubRoutines::_oop_disjoint_arraycopy_uninit     = generate_disjoint_oop_copy(false, &entry,
3085                                                                                     "oop_disjoint_arraycopy_uninit",
3086                                                                                     /*dest_uninitialized*/true);
3087       StubRoutines::_oop_arraycopy_uninit              = generate_conjoint_oop_copy(false, entry, NULL,
3088                                                                                     "oop_arraycopy_uninit",
3089                                                                                     /*dest_uninitialized*/true);
3090     } else

3091     {
3092       // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
3093       StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
3094       StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
3095       StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
3096       StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
3097     }
3098 
3099     StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
3100     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
3101                                                                         /*dest_uninitialized*/true);
3102 
3103     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
3104                                                               entry_jbyte_arraycopy,
3105                                                               entry_jshort_arraycopy,
3106                                                               entry_jint_arraycopy,
3107                                                               entry_jlong_arraycopy);
3108     StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
3109                                                                entry_jbyte_arraycopy,
3110                                                                entry_jshort_arraycopy,

4981 
4982   void generate_initial() {
4983     // Generates all stubs and initializes the entry points
4984 
4985     //------------------------------------------------------------------------------------------------------------------------
4986     // entry points that exist in all platforms
4987     // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
4988     //       the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
4989     StubRoutines::_forward_exception_entry                 = generate_forward_exception();
4990 
4991     StubRoutines::_call_stub_entry                         = generate_call_stub(StubRoutines::_call_stub_return_address);
4992     StubRoutines::_catch_exception_entry                   = generate_catch_exception();
4993 
4994     //------------------------------------------------------------------------------------------------------------------------
4995     // entry points that are platform specific
4996     StubRoutines::Sparc::_test_stop_entry                  = generate_test_stop();
4997 
4998     StubRoutines::Sparc::_stop_subroutine_entry            = generate_stop_subroutine();
4999     StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
5000 











5001     // Build this early so it's available for the interpreter.
5002     StubRoutines::_throw_StackOverflowError_entry =
5003             generate_throw_exception("StackOverflowError throw_exception",
5004             CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
5005     StubRoutines::_throw_delayed_StackOverflowError_entry =
5006             generate_throw_exception("delayed StackOverflowError throw_exception",
5007             CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError));
5008 
5009     if (UseCRC32Intrinsics) {
5010       // set table address before stub generation which use it
5011       StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
5012       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
5013     }
5014 
5015     if (UseCRC32CIntrinsics) {
5016       // set table address before stub generation which use it
5017       StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
5018       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
5019     }
5020   }

5088     _stub_count = !all ? 0x100 : 0x200;
5089     if (all) {
5090       generate_all();
5091     } else {
5092       generate_initial();
5093     }
5094 
5095     // make sure this stub is available for all local calls
5096     if (_atomic_add_stub.is_unbound()) {
5097       // generate a second time, if necessary
5098       (void) generate_atomic_add();
5099     }
5100   }
5101 
5102 
5103  private:
5104   int _stub_count;
5105   void stub_prolog(StubCodeDesc* cdesc) {
5106     # ifdef ASSERT
5107       // put extra information in the stub code, to make it more readable

5108 // Write the high part of the address
5109 // [RGV] Check if there is a dependency on the size of this prolog
5110       __ emit_data((intptr_t)cdesc >> 32,    relocInfo::none);

5111       __ emit_data((intptr_t)cdesc,    relocInfo::none);
5112       __ emit_data(++_stub_count, relocInfo::none);
5113     # endif
5114     align(true);
5115   }
5116 
5117   void align(bool at_header = false) {
5118     // %%%%% move this constant somewhere else
5119     // UltraSPARC cache line size is 8 instructions:
5120     const unsigned int icache_line_size = 32;
5121     const unsigned int icache_half_line_size = 16;
5122 
5123     if (at_header) {
5124       while ((intptr_t)(__ pc()) % icache_line_size != 0) {
5125         __ emit_data(0, relocInfo::none);
5126       }
5127     } else {
5128       while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
5129         __ nop();
5130       }

< prev index next >