7063628_1 Sdiff src/cpu/sparc/vm

src/cpu/sparc/vm/stubGenerator_sparc.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 7063628_1 Sdiff src/cpu/sparc/vm

src/cpu/sparc/vm/stubGenerator_sparc.cpp

Print this page

 133     //       the code in frame::entry_frame_call_wrapper()
 134 
 135     const Argument link           = Argument(0, false); // used only for GC
 136     const Argument result         = Argument(1, false);
 137     const Argument result_type    = Argument(2, false);
 138     const Argument method         = Argument(3, false);
 139     const Argument entry_point    = Argument(4, false);
 140     const Argument parameters     = Argument(5, false);
 141     const Argument parameter_size = Argument(6, false);
 142     const Argument thread         = Argument(7, false);
 143 
 144     // setup thread register
 145     __ ld_ptr(thread.as_address(), G2_thread);
 146     __ reinit_heapbase();
 147 
 148 #ifdef ASSERT
 149     // make sure we have no pending exceptions
 150     { const Register t = G3_scratch;
 151       Label L;
 152       __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
 153       __ br_null(t, false, Assembler::pt, L);
 154       __ stop("StubRoutines::call_stub: entered with pending exception");
 155       __ bind(L);
 156     }
 157 #endif
 158 
 159     // create activation frame & allocate space for parameters
 160     { const Register t = G3_scratch;
 161       __ ld_ptr(parameter_size.as_address(), t);                // get parameter size (in words)
 162       __ add(t, frame::memory_parameter_word_sp_offset, t);     // add space for save area (in words)
 163       __ round_to(t, WordsPerLong);                             // make sure it is multiple of 2 (in words)
 164       __ sll(t, Interpreter::logStackElementSize, t);           // compute number of bytes
 165       __ neg(t);                                                // negate so it can be used with save
 166       __ save(SP, t, SP);                                       // setup new frame
 167     }
 168 
 169     // +---------------+ <--- sp + 0
 170     // |               |
 171     // . reg save area .
 172     // |               |
 173     // +---------------+ <--- sp + 0x40

 189     // . extra 7 slots .
 190     // |               |
 191     // +---------------+ <--- fp + 0x5c
 192     // |  param. size  |
 193     // +---------------+ <--- fp + 0x60
 194     // |    thread     |
 195     // +---------------+
 196     // |               |
 197 
 198     // pass parameters if any
 199     BLOCK_COMMENT("pass parameters if any");
 200     { const Register src = parameters.as_in().as_register();
 201       const Register dst = Lentry_args;
 202       const Register tmp = G3_scratch;
 203       const Register cnt = G4_scratch;
 204 
 205       // test if any parameters & setup of Lentry_args
 206       Label exit;
 207       __ ld_ptr(parameter_size.as_in().as_address(), cnt);      // parameter counter
 208       __ add( FP, STACK_BIAS, dst );
 209       __ tst(cnt);
 210       __ br(Assembler::zero, false, Assembler::pn, exit);
 211       __ delayed()->sub(dst, BytesPerWord, dst);                 // setup Lentry_args
 212 
 213       // copy parameters if any
 214       Label loop;
 215       __ BIND(loop);
 216       // Store parameter value
 217       __ ld_ptr(src, 0, tmp);
 218       __ add(src, BytesPerWord, src);
 219       __ st_ptr(tmp, dst, 0);
 220       __ deccc(cnt);
 221       __ br(Assembler::greater, false, Assembler::pt, loop);
 222       __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
 223 
 224       // done
 225       __ BIND(exit);
 226     }
 227 
 228     // setup parameters, method & call Java function
 229 #ifdef ASSERT
 230     // layout_activation_impl checks it's notion of saved SP against

 264     // store result depending on type
 265     // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
 266     //  is treated as T_INT)
 267     { const Register addr = result     .as_in().as_register();
 268       const Register type = result_type.as_in().as_register();
 269       Label is_long, is_float, is_double, is_object, exit;
 270       __            cmp(type, T_OBJECT);  __ br(Assembler::equal, false, Assembler::pn, is_object);
 271       __ delayed()->cmp(type, T_FLOAT);   __ br(Assembler::equal, false, Assembler::pn, is_float);
 272       __ delayed()->cmp(type, T_DOUBLE);  __ br(Assembler::equal, false, Assembler::pn, is_double);
 273       __ delayed()->cmp(type, T_LONG);    __ br(Assembler::equal, false, Assembler::pn, is_long);
 274       __ delayed()->nop();
 275 
 276       // store int result
 277       __ st(O0, addr, G0);
 278 
 279       __ BIND(exit);
 280       __ ret();
 281       __ delayed()->restore();
 282 
 283       __ BIND(is_object);
 284       __ ba(exit, false);
 285       __ delayed()->st_ptr(O0, addr, G0);
 286 
 287       __ BIND(is_float);
 288       __ ba(exit, false);
 289       __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
 290 
 291       __ BIND(is_double);
 292       __ ba(exit, false);
 293       __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
 294 
 295       __ BIND(is_long);
 296 #ifdef _LP64
 297       __ ba(exit, false);
 298       __ delayed()->st_long(O0, addr, G0);      // store entire long
 299 #else
 300 #if defined(COMPILER2)
 301   // All return values are where we want them, except for Longs.  C2 returns
 302   // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
 303   // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
 304   // build we simply always use G1.
 305   // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
 306   // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
 307   // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
 308 
 309       __ ba(exit, false);
 310       __ delayed()->stx(G1, addr, G0);  // store entire long
 311 #else
 312       __ st(O1, addr, BytesPerInt);
 313       __ ba(exit, false);
 314       __ delayed()->st(O0, addr, G0);
 315 #endif /* COMPILER2 */
 316 #endif /* _LP64 */
 317      }
 318      return start;
 319   }
 320 
 321 
 322   //----------------------------------------------------------------------------------------------------
 323   // Return point for a Java call if there's an exception thrown in Java code.
 324   // The exception is caught and transformed into a pending exception stored in
 325   // JavaThread that can be tested from within the VM.
 326   //
 327   // Oexception: exception oop
 328 
 329   address generate_catch_exception() {
 330     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 331 
 332     address start = __ pc();
 333     // verify that thread corresponds

 364   //
 365   // Contract with Java-level exception handler: O0 = exception
 366   //                                             O1 = throwing pc
 367 
 368   address generate_forward_exception() {
 369     StubCodeMark mark(this, "StubRoutines", "forward_exception");
 370     address start = __ pc();
 371 
 372     // Upon entry, O7 has the return address returning into Java
 373     // (interpreted or compiled) code; i.e. the return address
 374     // becomes the throwing pc.
 375 
 376     const Register& handler_reg = Gtemp;
 377 
 378     Address exception_addr(G2_thread, Thread::pending_exception_offset());
 379 
 380 #ifdef ASSERT
 381     // make sure that this code is only executed if there is a pending exception
 382     { Label L;
 383       __ ld_ptr(exception_addr, Gtemp);
 384       __ br_notnull(Gtemp, false, Assembler::pt, L);
 385       __ stop("StubRoutines::forward exception: no pending exception (1)");
 386       __ bind(L);
 387     }
 388 #endif
 389 
 390     // compute exception handler into handler_reg
 391     __ get_thread();
 392     __ ld_ptr(exception_addr, Oexception);
 393     __ verify_oop(Oexception);
 394     __ save_frame(0);             // compensates for compiler weakness
 395     __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
 396     BLOCK_COMMENT("call exception_handler_for_return_address");
 397     __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
 398     __ mov(O0, handler_reg);
 399     __ restore();                 // compensates for compiler weakness
 400 
 401     __ ld_ptr(exception_addr, Oexception);
 402     __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
 403 
 404 #ifdef ASSERT
 405     // make sure exception is set
 406     { Label L;
 407       __ br_notnull(Oexception, false, Assembler::pt, L);
 408       __ stop("StubRoutines::forward exception: no pending exception (2)");
 409       __ bind(L);
 410     }
 411 #endif
 412     // jump to exception handler
 413     __ jmp(handler_reg, 0);
 414     // clear pending exception
 415     __ delayed()->st_ptr(G0, exception_addr);
 416 
 417     return start;
 418   }
 419 
 420 
 421   //------------------------------------------------------------------------------------------------------------------------
 422   // Continuation point for throwing of implicit exceptions that are not handled in
 423   // the current activation. Fabricates an exception oop and initiates normal
 424   // exception dispatching in this frame. Only callee-saved registers are preserved
 425   // (through the normal register window / RegisterMap handling).
 426   // If the compiler needs all registers to be preserved between the fault
 427   // point and the exception handler then it must assume responsibility for that in

 481     if (arg2 != noreg) {
 482       __ mov(arg2, O2);
 483     }
 484     // do the call
 485     BLOCK_COMMENT("call runtime_entry");
 486     __ call(runtime_entry, relocInfo::runtime_call_type);
 487     if (!VerifyThread)
 488       __ delayed()->mov(G2_thread, O0);  // pass thread as first argument
 489     else
 490       __ delayed()->nop();             // (thread already passed)
 491     __ restore_thread(noreg);
 492     __ reset_last_Java_frame();
 493 
 494     // check for pending exceptions. use Gtemp as scratch register.
 495 #ifdef ASSERT
 496     Label L;
 497 
 498     Address exception_addr(G2_thread, Thread::pending_exception_offset());
 499     Register scratch_reg = Gtemp;
 500     __ ld_ptr(exception_addr, scratch_reg);
 501     __ br_notnull(scratch_reg, false, Assembler::pt, L);
 502     __ should_not_reach_here();
 503     __ bind(L);
 504 #endif // ASSERT
 505     BLOCK_COMMENT("call forward_exception_entry");
 506     __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
 507     // we use O7 linkage so that forward_exception_entry has the issuing PC
 508     __ delayed()->restore();
 509 
 510     RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
 511     return stub->entry_point();
 512   }
 513 
 514 #undef __
 515 #define __ _masm->
 516 
 517 
 518   // Generate a routine that sets all the registers so we
 519   // can tell if the stop routine prints them correctly.
 520   address generate_test_stop() {
 521     StubCodeMark mark(this, "StubRoutines", "test_stop");

 593     if (mark_oop_reg == noreg) {
 594       address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
 595       __ set((intptr_t)lock_ptr, lock_ptr_reg);
 596     } else {
 597       assert(scratch_reg != noreg, "just checking");
 598       address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
 599       __ set((intptr_t)lock_ptr, lock_ptr_reg);
 600       __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
 601       __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
 602     }
 603   }
 604 
 605   void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
 606 
 607     get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
 608     __ set(StubRoutines::Sparc::locked, lock_reg);
 609     // Initialize yield counter
 610     __ mov(G0,yield_reg);
 611 
 612     __ BIND(retry);
 613     __ cmp_and_br(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, false, Assembler::pt, dontyield);
 614 
 615     // This code can only be called from inside the VM, this
 616     // stub is only invoked from Atomic::add().  We do not
 617     // want to use call_VM, because _last_java_sp and such
 618     // must already be set.
 619     //
 620     // Save the regs and make space for a C call
 621     __ save(SP, -96, SP);
 622     __ save_all_globals_into_locals();
 623     BLOCK_COMMENT("call os::naked_sleep");
 624     __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
 625     __ delayed()->nop();
 626     __ restore_globals_from_locals();
 627     __ restore();
 628     // reset the counter
 629     __ mov(G0,yield_reg);
 630 
 631     __ BIND(dontyield);
 632 
 633     // try to get lock

 653   //      dest:           O1
 654   //
 655   // Results:
 656   //
 657   //     O0: the value previously stored in dest
 658   //
 659   address generate_atomic_xchg() {
 660     StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
 661     address start = __ pc();
 662 
 663     if (UseCASForSwap) {
 664       // Use CAS instead of swap, just in case the MP hardware
 665       // prefers to work with just one kind of synch. instruction.
 666       Label retry;
 667       __ BIND(retry);
 668       __ mov(O0, O3);       // scratch copy of exchange value
 669       __ ld(O1, 0, O2);     // observe the previous value
 670       // try to replace O2 with O3
 671       __ cas_under_lock(O1, O2, O3,
 672       (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
 673       __ cmp_and_br(O2, O3, Assembler::notEqual, false, Assembler::pn, retry);
 674 
 675       __ retl(false);
 676       __ delayed()->mov(O2, O0);  // report previous value to caller
 677 
 678     } else {
 679       if (VM_Version::v9_instructions_work()) {
 680         __ retl(false);
 681         __ delayed()->swap(O1, 0, O0);
 682       } else {
 683         const Register& lock_reg = O2;
 684         const Register& lock_ptr_reg = O3;
 685         const Register& yield_reg = O4;
 686 
 687         Label retry;
 688         Label dontyield;
 689 
 690         generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 691         // got the lock, do the swap
 692         __ swap(O1, 0, O0);
 693

 775   //
 776   // Results:
 777   //
 778   //     O0: the new value stored in dest
 779   //
 780   // Overwrites (v9): O3
 781   // Overwrites (v8): O3,O4,O5
 782   //
 783   address generate_atomic_add() {
 784     StubCodeMark mark(this, "StubRoutines", "atomic_add");
 785     address start = __ pc();
 786     __ BIND(_atomic_add_stub);
 787 
 788     if (VM_Version::v9_instructions_work()) {
 789       Label(retry);
 790       __ BIND(retry);
 791 
 792       __ lduw(O1, 0, O2);
 793       __ add(O0, O2, O3);
 794       __ cas(O1, O2, O3);
 795       __ cmp_and_br(O2, O3, Assembler::notEqual, false, Assembler::pn, retry);
 796       __ retl(false);
 797       __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
 798     } else {
 799       const Register& lock_reg = O2;
 800       const Register& lock_ptr_reg = O3;
 801       const Register& value_reg = O4;
 802       const Register& yield_reg = O5;
 803 
 804       Label(retry);
 805       Label(dontyield);
 806 
 807       generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 808       // got lock, do the increment
 809       __ ld(O1, 0, value_reg);
 810       __ add(O0, value_reg, value_reg);
 811       __ st(value_reg, O1, 0);
 812 
 813       // %%% only for RMO and PSO
 814       __ membar(Assembler::StoreStore);
 815

1343     if (!aligned)
1344 #endif
1345     {
1346       // Copy with shift 16 bytes per iteration if arrays do not have
1347       // the same alignment mod 8, otherwise fall through to the next
1348       // code for aligned copy.
1349       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1350       // Also jump over aligned copy after the copy with shift completed.
1351 
1352       copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
1353     }
1354 
1355     // Both array are 8 bytes aligned, copy 16 bytes at a time
1356       __ and3(count, 7, G4); // Save count
1357       __ srl(count, 3, count);
1358      generate_disjoint_long_copy_core(aligned);
1359       __ mov(G4, count);     // Restore count
1360 
1361     // copy tailing bytes
1362     __ BIND(L_copy_byte);
1363       __ br_zero(count, L_exit);
1364       __ align(OptoLoopAlignment);
1365     __ BIND(L_copy_byte_loop);
1366       __ ldub(from, offset, O3);
1367       __ deccc(count);
1368       __ stb(O3, to, offset);
1369       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1370       __ delayed()->inc(offset);
1371 
1372     __ BIND(L_exit);
1373       // O3, O4 are used as temp registers
1374       inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1375       __ retl();
1376       __ delayed()->mov(G0, O0); // return 0
1377     return start;
1378   }
1379 
1380   //
1381   //  Generate stub for conjoint byte copy.  If "aligned" is true, the
1382   //  "from" and "to" addresses are assumed to be heapword aligned.
1383   //

1454       // Also jump over aligned copy after the copy with shift completed.
1455 
1456       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1457                                         L_aligned_copy, L_copy_byte);
1458     }
1459     // copy 4 elements (16 bytes) at a time
1460       __ align(OptoLoopAlignment);
1461     __ BIND(L_aligned_copy);
1462       __ dec(end_from, 16);
1463       __ ldx(end_from, 8, O3);
1464       __ ldx(end_from, 0, O4);
1465       __ dec(end_to, 16);
1466       __ deccc(count, 16);
1467       __ stx(O3, end_to, 8);
1468       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1469       __ delayed()->stx(O4, end_to, 0);
1470       __ inc(count, 16);
1471 
1472     // copy 1 element (2 bytes) at a time
1473     __ BIND(L_copy_byte);
1474       __ br_zero(count, L_exit);
1475       __ align(OptoLoopAlignment);
1476     __ BIND(L_copy_byte_loop);
1477       __ dec(end_from);
1478       __ dec(end_to);
1479       __ ldub(end_from, 0, O4);
1480       __ deccc(count);
1481       __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1482       __ delayed()->stb(O4, end_to, 0);
1483 
1484     __ BIND(L_exit);
1485     // O3, O4 are used as temp registers
1486     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1487     __ retl();
1488     __ delayed()->mov(G0, O0); // return 0
1489     return start;
1490   }
1491 
1492   //
1493   //  Generate stub for disjoint short copy.  If "aligned" is true, the
1494   //  "from" and "to" addresses are assumed to be heapword aligned.

1571     if (!aligned)
1572 #endif
1573     {
1574       // Copy with shift 16 bytes per iteration if arrays do not have
1575       // the same alignment mod 8, otherwise fall through to the next
1576       // code for aligned copy.
1577       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1578       // Also jump over aligned copy after the copy with shift completed.
1579 
1580       copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
1581     }
1582 
1583     // Both array are 8 bytes aligned, copy 16 bytes at a time
1584       __ and3(count, 3, G4); // Save
1585       __ srl(count, 2, count);
1586      generate_disjoint_long_copy_core(aligned);
1587       __ mov(G4, count); // restore
1588 
1589     // copy 1 element at a time
1590     __ BIND(L_copy_2_bytes);
1591       __ br_zero(count, L_exit);
1592       __ align(OptoLoopAlignment);
1593     __ BIND(L_copy_2_bytes_loop);
1594       __ lduh(from, offset, O3);
1595       __ deccc(count);
1596       __ sth(O3, to, offset);
1597       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1598       __ delayed()->inc(offset, 2);
1599 
1600     __ BIND(L_exit);
1601       // O3, O4 are used as temp registers
1602       inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1603       __ retl();
1604       __ delayed()->mov(G0, O0); // return 0
1605     return start;
1606   }
1607 
1608   //
1609   //  Generate stub for disjoint short fill.  If "aligned" is true, the
1610   //  "to" address is assumed to be heapword aligned.
1611   //

1916       // Also jump over aligned copy after the copy with shift completed.
1917 
1918       copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1919                                         L_aligned_copy, L_copy_2_bytes);
1920     }
1921     // copy 4 elements (16 bytes) at a time
1922       __ align(OptoLoopAlignment);
1923     __ BIND(L_aligned_copy);
1924       __ dec(end_from, 16);
1925       __ ldx(end_from, 8, O3);
1926       __ ldx(end_from, 0, O4);
1927       __ dec(end_to, 16);
1928       __ deccc(count, 8);
1929       __ stx(O3, end_to, 8);
1930       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1931       __ delayed()->stx(O4, end_to, 0);
1932       __ inc(count, 8);
1933 
1934     // copy 1 element (2 bytes) at a time
1935     __ BIND(L_copy_2_bytes);
1936       __ br_zero(count, L_exit);
1937     __ BIND(L_copy_2_bytes_loop);
1938       __ dec(end_from, 2);
1939       __ dec(end_to, 2);
1940       __ lduh(end_from, 0, O4);
1941       __ deccc(count);
1942       __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1943       __ delayed()->sth(O4, end_to, 0);
1944 
1945     __ BIND(L_exit);
1946     // O3, O4 are used as temp registers
1947     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1948     __ retl();
1949     __ delayed()->mov(G0, O0); // return 0
1950     return start;
1951   }
1952 
1953   //
1954   //  Generate core code for disjoint int copy (and oop copy on 32-bit).
1955   //  If "aligned" is true, the "from" and "to" addresses are assumed
1956   //  to be heapword aligned.

2029       __ sllx(O4, 32, O4);
2030       __ srlx(G4, 32, G3);
2031       __ bset(G3, O4);
2032       __ stx(O4, to, -8);
2033       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2034       __ delayed()->mov(G4, O3);
2035 
2036       __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2037       __ delayed()->inc(count, 4); // restore 'count'
2038 
2039     __ BIND(L_aligned_copy);
2040     }
2041     // copy 4 elements (16 bytes) at a time
2042       __ and3(count, 1, G4); // Save
2043       __ srl(count, 1, count);
2044      generate_disjoint_long_copy_core(aligned);
2045       __ mov(G4, count);     // Restore
2046 
2047     // copy 1 element at a time
2048     __ BIND(L_copy_4_bytes);
2049       __ br_zero(count, L_exit);
2050     __ BIND(L_copy_4_bytes_loop);
2051       __ ld(from, offset, O3);
2052       __ deccc(count);
2053       __ st(O3, to, offset);
2054       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
2055       __ delayed()->inc(offset, 4);
2056     __ BIND(L_exit);
2057   }
2058 
2059   //
2060   //  Generate stub for disjoint int copy.  If "aligned" is true, the
2061   //  "from" and "to" addresses are assumed to be heapword aligned.
2062   //
2063   // Arguments for generated stub:
2064   //      from:  O0
2065   //      to:    O1
2066   //      count: O2 treated as signed
2067   //
2068   address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
2069     __ align(CodeEntryAlignment);

2161       __ delayed()->mov(O5, O3);
2162 
2163       __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2164       __ delayed()->inc(count, 4);
2165 
2166     // copy 4 elements (16 bytes) at a time
2167       __ align(OptoLoopAlignment);
2168     __ BIND(L_aligned_copy);
2169       __ dec(end_from, 16);
2170       __ ldx(end_from, 8, O3);
2171       __ ldx(end_from, 0, O4);
2172       __ dec(end_to, 16);
2173       __ deccc(count, 4);
2174       __ stx(O3, end_to, 8);
2175       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
2176       __ delayed()->stx(O4, end_to, 0);
2177       __ inc(count, 4);
2178 
2179     // copy 1 element (4 bytes) at a time
2180     __ BIND(L_copy_4_bytes);
2181       __ br_zero(count, L_exit);
2182     __ BIND(L_copy_4_bytes_loop);
2183       __ dec(end_from, 4);
2184       __ dec(end_to, 4);
2185       __ ld(end_from, 0, O4);
2186       __ deccc(count);
2187       __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
2188       __ delayed()->st(O4, end_to, 0);
2189     __ BIND(L_exit);
2190   }
2191 
2192   //
2193   //  Generate stub for conjoint int copy.  If "aligned" is true, the
2194   //  "from" and "to" addresses are assumed to be heapword aligned.
2195   //
2196   // Arguments for generated stub:
2197   //      from:  O0
2198   //      to:    O1
2199   //      count: O2 treated as signed
2200   //
2201   address generate_conjoint_int_copy(bool aligned, address nooverlap_target,

2543                            Register temp,
2544                            Label& L_success) {
2545     assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2546 
2547     BLOCK_COMMENT("type_check:");
2548 
2549     Label L_miss, L_pop_to_miss;
2550 
2551     assert_clean_int(super_check_offset, temp);
2552 
2553     __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
2554                                      &L_success, &L_miss, NULL,
2555                                      super_check_offset);
2556 
2557     BLOCK_COMMENT("type_check_slow_path:");
2558     __ save_frame(0);
2559     __ check_klass_subtype_slow_path(sub_klass->after_save(),
2560                                      super_klass->after_save(),
2561                                      L0, L1, L2, L4,
2562                                      NULL, &L_pop_to_miss);
2563     __ ba(L_success, false);
2564     __ delayed()->restore();
2565 
2566     __ bind(L_pop_to_miss);
2567     __ restore();
2568 
2569     // Fall through on failure!
2570     __ BIND(L_miss);
2571   }
2572 
2573 
2574   //  Generate stub for checked oop copy.
2575   //
2576   // Arguments for generated stub:
2577   //      from:  O0
2578   //      to:    O1
2579   //      count: O2 treated as signed
2580   //      ckoff: O3 (super_check_offset)
2581   //      ckval: O4 (super_klass)
2582   //      ret:   O0 zero for success; (-1^K) where K is partial transfer count
2583   //

2640     __ delayed()->set(0, O0);           // return 0 on (trivial) success
2641 
2642     // ======== begin loop ========
2643     // (Loop is rotated; its entry is load_element.)
2644     // Loop variables:
2645     //   (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2646     //   (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2647     //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2648     __ align(OptoLoopAlignment);
2649 
2650     __ BIND(store_element);
2651     __ deccc(G1_remain);                // decrement the count
2652     __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2653     __ inc(O5_offset, heapOopSize);     // step to next offset
2654     __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2655     __ delayed()->set(0, O0);           // return -1 on success
2656 
2657     // ======== loop entry is here ========
2658     __ BIND(load_element);
2659     __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
2660     __ br_null(G3_oop, true, Assembler::pt, store_element);
2661 
2662     __ load_klass(G3_oop, G4_klass); // query the object klass
2663 
2664     generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2665                         // branch to this on success:
2666                         store_element);
2667     // ======== end loop ========
2668 
2669     // It was a real error; we must depend on the caller to finish the job.
2670     // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2671     // Emit GC store barriers for the oops we have copied (O2 minus G1),
2672     // and report their number to the caller.
2673     __ BIND(fail);
2674     __ subcc(O2_count, G1_remain, O2_count);
2675     __ brx(Assembler::zero, false, Assembler::pt, done);
2676     __ delayed()->not1(O2_count, O0);   // report (-1^K) to caller
2677 
2678     __ BIND(do_card_marks);
2679     gen_write_ref_array_post_barrier(O1_to, O2_count, O3);   // store check on O1[0..O2]
2680

2862     __ delayed()->tst(dst_pos);
2863     __ br(Assembler::negative, false, Assembler::pn, L_failed);
2864 
2865     //  if (length < 0) return -1;
2866     __ delayed()->tst(length);
2867     __ br(Assembler::negative, false, Assembler::pn, L_failed);
2868 
2869     BLOCK_COMMENT("arraycopy argument klass checks");
2870     //  get src->klass()
2871     if (UseCompressedOops) {
2872       __ delayed()->nop(); // ??? not good
2873       __ load_klass(src, G3_src_klass);
2874     } else {
2875       __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2876     }
2877 
2878 #ifdef ASSERT
2879     //  assert(src->klass() != NULL);
2880     BLOCK_COMMENT("assert klasses not null");
2881     { Label L_a, L_b;
2882       __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
2883       __ bind(L_a);
2884       __ stop("broken null klass");
2885       __ bind(L_b);
2886       __ load_klass(dst, G4_dst_klass);
2887       __ br_null(G4_dst_klass, false, Assembler::pn, L_a, false); // this would be broken also
2888       __ delayed()->mov(G0, G4_dst_klass);      // scribble the temp
2889       BLOCK_COMMENT("assert done");
2890     }
2891 #endif
2892 
2893     // Load layout helper
2894     //
2895     //  |array_tag|     | header_size | element_type |     |log2_element_size|
2896     // 32        30    24            16              8     2                 0
2897     //
2898     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
2899     //
2900 
2901     int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2902                     Klass::layout_helper_offset_in_bytes();
2903 
2904     // Load 32-bits signed value. Use br() instruction with it to check icc.
2905     __ lduw(G3_src_klass, lh_offset, G5_lh);
2906 
2907     if (UseCompressedOops) {
2908       __ load_klass(dst, G4_dst_klass);
2909     }
2910     // Handle objArrays completely differently...
2911     juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2912     __ set(objArray_lh, O5_temp);
2913     __ cmp(G5_lh,       O5_temp);
2914     __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2915     if (UseCompressedOops) {
2916       __ delayed()->nop();
2917     } else {
2918       __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2919     }
2920 
2921     //  if (src->klass() != dst->klass()) return -1;
2922     __ cmp_and_brx(G3_src_klass, G4_dst_klass, Assembler::notEqual, false, Assembler::pn, L_failed);
2923 
2924     //  if (!src->is_Array()) return -1;
2925     __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2926     __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2927 
2928     // At this point, it is known to be a typeArray (array_tag 0x3).
2929 #ifdef ASSERT
2930     __ delayed()->nop();
2931     { Label L;
2932       jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2933       __ set(lh_prim_tag_in_place, O5_temp);
2934       __ cmp(G5_lh,                O5_temp);
2935       __ br(Assembler::greaterEqual, false, Assembler::pt, L);
2936       __ delayed()->nop();
2937       __ stop("must be a primitive array");
2938       __ bind(L);
2939     }
2940 #else
2941     __ delayed();                               // match next insn to prev branch
2942 #endif

2970     BLOCK_COMMENT("scale indexes to element size");
2971     __ sll_ptr(src_pos, G3_elsize, src_pos);
2972     __ sll_ptr(dst_pos, G3_elsize, dst_pos);
2973     __ add(src, src_pos, from);       // src_addr
2974     __ add(dst, dst_pos, to);         // dst_addr
2975 
2976     BLOCK_COMMENT("choose copy loop based on element size");
2977     __ cmp(G3_elsize, 0);
2978     __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
2979     __ delayed()->signx(length, count); // length
2980 
2981     __ cmp(G3_elsize, LogBytesPerShort);
2982     __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
2983     __ delayed()->signx(length, count); // length
2984 
2985     __ cmp(G3_elsize, LogBytesPerInt);
2986     __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
2987     __ delayed()->signx(length, count); // length
2988 #ifdef ASSERT
2989     { Label L;
2990       __ cmp_and_br(G3_elsize, LogBytesPerLong, Assembler::equal, false, Assembler::pt, L);
2991       __ stop("must be long copy, but elsize is wrong");
2992       __ bind(L);
2993     }
2994 #endif
2995     __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
2996     __ delayed()->signx(length, count); // length
2997 
2998     // objArrayKlass
2999   __ BIND(L_objArray);
3000     // live at this point:  G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
3001 
3002     Label L_plain_copy, L_checkcast_copy;
3003     //  test array classes for subtyping
3004     __ cmp(G3_src_klass, G4_dst_klass);         // usual case is exact equality
3005     __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
3006     __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
3007 
3008     // Identically typed arrays can be copied without element-wise checks.
3009     arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
3010                            O5_temp, G5_lh, L_failed);

 133     //       the code in frame::entry_frame_call_wrapper()
 134 
 135     const Argument link           = Argument(0, false); // used only for GC
 136     const Argument result         = Argument(1, false);
 137     const Argument result_type    = Argument(2, false);
 138     const Argument method         = Argument(3, false);
 139     const Argument entry_point    = Argument(4, false);
 140     const Argument parameters     = Argument(5, false);
 141     const Argument parameter_size = Argument(6, false);
 142     const Argument thread         = Argument(7, false);
 143 
 144     // setup thread register
 145     __ ld_ptr(thread.as_address(), G2_thread);
 146     __ reinit_heapbase();
 147 
 148 #ifdef ASSERT
 149     // make sure we have no pending exceptions
 150     { const Register t = G3_scratch;
 151       Label L;
 152       __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
 153       __ br_null_short(t, Assembler::pt, L);
 154       __ stop("StubRoutines::call_stub: entered with pending exception");
 155       __ bind(L);
 156     }
 157 #endif
 158 
 159     // create activation frame & allocate space for parameters
 160     { const Register t = G3_scratch;
 161       __ ld_ptr(parameter_size.as_address(), t);                // get parameter size (in words)
 162       __ add(t, frame::memory_parameter_word_sp_offset, t);     // add space for save area (in words)
 163       __ round_to(t, WordsPerLong);                             // make sure it is multiple of 2 (in words)
 164       __ sll(t, Interpreter::logStackElementSize, t);           // compute number of bytes
 165       __ neg(t);                                                // negate so it can be used with save
 166       __ save(SP, t, SP);                                       // setup new frame
 167     }
 168 
 169     // +---------------+ <--- sp + 0
 170     // |               |
 171     // . reg save area .
 172     // |               |
 173     // +---------------+ <--- sp + 0x40

 189     // . extra 7 slots .
 190     // |               |
 191     // +---------------+ <--- fp + 0x5c
 192     // |  param. size  |
 193     // +---------------+ <--- fp + 0x60
 194     // |    thread     |
 195     // +---------------+
 196     // |               |
 197 
 198     // pass parameters if any
 199     BLOCK_COMMENT("pass parameters if any");
 200     { const Register src = parameters.as_in().as_register();
 201       const Register dst = Lentry_args;
 202       const Register tmp = G3_scratch;
 203       const Register cnt = G4_scratch;
 204 
 205       // test if any parameters & setup of Lentry_args
 206       Label exit;
 207       __ ld_ptr(parameter_size.as_in().as_address(), cnt);      // parameter counter
 208       __ add( FP, STACK_BIAS, dst );
 209       __ cmp_zero_and_br(Assembler::zero, cnt, exit);

 210       __ delayed()->sub(dst, BytesPerWord, dst);                 // setup Lentry_args
 211 
 212       // copy parameters if any
 213       Label loop;
 214       __ BIND(loop);
 215       // Store parameter value
 216       __ ld_ptr(src, 0, tmp);
 217       __ add(src, BytesPerWord, src);
 218       __ st_ptr(tmp, dst, 0);
 219       __ deccc(cnt);
 220       __ br(Assembler::greater, false, Assembler::pt, loop);
 221       __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
 222 
 223       // done
 224       __ BIND(exit);
 225     }
 226 
 227     // setup parameters, method & call Java function
 228 #ifdef ASSERT
 229     // layout_activation_impl checks it's notion of saved SP against

 263     // store result depending on type
 264     // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
 265     //  is treated as T_INT)
 266     { const Register addr = result     .as_in().as_register();
 267       const Register type = result_type.as_in().as_register();
 268       Label is_long, is_float, is_double, is_object, exit;
 269       __            cmp(type, T_OBJECT);  __ br(Assembler::equal, false, Assembler::pn, is_object);
 270       __ delayed()->cmp(type, T_FLOAT);   __ br(Assembler::equal, false, Assembler::pn, is_float);
 271       __ delayed()->cmp(type, T_DOUBLE);  __ br(Assembler::equal, false, Assembler::pn, is_double);
 272       __ delayed()->cmp(type, T_LONG);    __ br(Assembler::equal, false, Assembler::pn, is_long);
 273       __ delayed()->nop();
 274 
 275       // store int result
 276       __ st(O0, addr, G0);
 277 
 278       __ BIND(exit);
 279       __ ret();
 280       __ delayed()->restore();
 281 
 282       __ BIND(is_object);
 283       __ ba(exit);
 284       __ delayed()->st_ptr(O0, addr, G0);
 285 
 286       __ BIND(is_float);
 287       __ ba(exit);
 288       __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
 289 
 290       __ BIND(is_double);
 291       __ ba(exit);
 292       __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
 293 
 294       __ BIND(is_long);
 295 #ifdef _LP64
 296       __ ba(exit);
 297       __ delayed()->st_long(O0, addr, G0);      // store entire long
 298 #else
 299 #if defined(COMPILER2)
 300   // All return values are where we want them, except for Longs.  C2 returns
 301   // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
 302   // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
 303   // build we simply always use G1.
 304   // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
 305   // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
 306   // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
 307 
 308       __ ba(exit);
 309       __ delayed()->stx(G1, addr, G0);  // store entire long
 310 #else
 311       __ st(O1, addr, BytesPerInt);
 312       __ ba(exit);
 313       __ delayed()->st(O0, addr, G0);
 314 #endif /* COMPILER2 */
 315 #endif /* _LP64 */
 316      }
 317      return start;
 318   }
 319 
 320 
 321   //----------------------------------------------------------------------------------------------------
 322   // Return point for a Java call if there's an exception thrown in Java code.
 323   // The exception is caught and transformed into a pending exception stored in
 324   // JavaThread that can be tested from within the VM.
 325   //
 326   // Oexception: exception oop
 327 
 328   address generate_catch_exception() {
 329     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 330 
 331     address start = __ pc();
 332     // verify that thread corresponds

 363   //
 364   // Contract with Java-level exception handler: O0 = exception
 365   //                                             O1 = throwing pc
 366 
 367   address generate_forward_exception() {
 368     StubCodeMark mark(this, "StubRoutines", "forward_exception");
 369     address start = __ pc();
 370 
 371     // Upon entry, O7 has the return address returning into Java
 372     // (interpreted or compiled) code; i.e. the return address
 373     // becomes the throwing pc.
 374 
 375     const Register& handler_reg = Gtemp;
 376 
 377     Address exception_addr(G2_thread, Thread::pending_exception_offset());
 378 
 379 #ifdef ASSERT
 380     // make sure that this code is only executed if there is a pending exception
 381     { Label L;
 382       __ ld_ptr(exception_addr, Gtemp);
 383       __ br_notnull_short(Gtemp, Assembler::pt, L);
 384       __ stop("StubRoutines::forward exception: no pending exception (1)");
 385       __ bind(L);
 386     }
 387 #endif
 388 
 389     // compute exception handler into handler_reg
 390     __ get_thread();
 391     __ ld_ptr(exception_addr, Oexception);
 392     __ verify_oop(Oexception);
 393     __ save_frame(0);             // compensates for compiler weakness
 394     __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
 395     BLOCK_COMMENT("call exception_handler_for_return_address");
 396     __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
 397     __ mov(O0, handler_reg);
 398     __ restore();                 // compensates for compiler weakness
 399 
 400     __ ld_ptr(exception_addr, Oexception);
 401     __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
 402 
 403 #ifdef ASSERT
 404     // make sure exception is set
 405     { Label L;
 406       __ br_notnull_short(Oexception, Assembler::pt, L);
 407       __ stop("StubRoutines::forward exception: no pending exception (2)");
 408       __ bind(L);
 409     }
 410 #endif
 411     // jump to exception handler
 412     __ jmp(handler_reg, 0);
 413     // clear pending exception
 414     __ delayed()->st_ptr(G0, exception_addr);
 415 
 416     return start;
 417   }
 418 
 419 
 420   //------------------------------------------------------------------------------------------------------------------------
 421   // Continuation point for throwing of implicit exceptions that are not handled in
 422   // the current activation. Fabricates an exception oop and initiates normal
 423   // exception dispatching in this frame. Only callee-saved registers are preserved
 424   // (through the normal register window / RegisterMap handling).
 425   // If the compiler needs all registers to be preserved between the fault
 426   // point and the exception handler then it must assume responsibility for that in

 480     if (arg2 != noreg) {
 481       __ mov(arg2, O2);
 482     }
 483     // do the call
 484     BLOCK_COMMENT("call runtime_entry");
 485     __ call(runtime_entry, relocInfo::runtime_call_type);
 486     if (!VerifyThread)
 487       __ delayed()->mov(G2_thread, O0);  // pass thread as first argument
 488     else
 489       __ delayed()->nop();             // (thread already passed)
 490     __ restore_thread(noreg);
 491     __ reset_last_Java_frame();
 492 
 493     // check for pending exceptions. use Gtemp as scratch register.
 494 #ifdef ASSERT
 495     Label L;
 496 
 497     Address exception_addr(G2_thread, Thread::pending_exception_offset());
 498     Register scratch_reg = Gtemp;
 499     __ ld_ptr(exception_addr, scratch_reg);
 500     __ br_notnull_short(scratch_reg, Assembler::pt, L);
 501     __ should_not_reach_here();
 502     __ bind(L);
 503 #endif // ASSERT
 504     BLOCK_COMMENT("call forward_exception_entry");
 505     __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
 506     // we use O7 linkage so that forward_exception_entry has the issuing PC
 507     __ delayed()->restore();
 508 
 509     RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
 510     return stub->entry_point();
 511   }
 512 
 513 #undef __
 514 #define __ _masm->
 515 
 516 
 517   // Generate a routine that sets all the registers so we
 518   // can tell if the stop routine prints them correctly.
 519   address generate_test_stop() {
 520     StubCodeMark mark(this, "StubRoutines", "test_stop");

 592     if (mark_oop_reg == noreg) {
 593       address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
 594       __ set((intptr_t)lock_ptr, lock_ptr_reg);
 595     } else {
 596       assert(scratch_reg != noreg, "just checking");
 597       address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
 598       __ set((intptr_t)lock_ptr, lock_ptr_reg);
 599       __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
 600       __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
 601     }
 602   }
 603 
 604   void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
 605 
 606     get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
 607     __ set(StubRoutines::Sparc::locked, lock_reg);
 608     // Initialize yield counter
 609     __ mov(G0,yield_reg);
 610 
 611     __ BIND(retry);
 612     __ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield);
 613 
 614     // This code can only be called from inside the VM, this
 615     // stub is only invoked from Atomic::add().  We do not
 616     // want to use call_VM, because _last_java_sp and such
 617     // must already be set.
 618     //
 619     // Save the regs and make space for a C call
 620     __ save(SP, -96, SP);
 621     __ save_all_globals_into_locals();
 622     BLOCK_COMMENT("call os::naked_sleep");
 623     __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
 624     __ delayed()->nop();
 625     __ restore_globals_from_locals();
 626     __ restore();
 627     // reset the counter
 628     __ mov(G0,yield_reg);
 629 
 630     __ BIND(dontyield);
 631 
 632     // try to get lock

 652   //      dest:           O1
 653   //
 654   // Results:
 655   //
 656   //     O0: the value previously stored in dest
 657   //
 658   address generate_atomic_xchg() {
 659     StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
 660     address start = __ pc();
 661 
 662     if (UseCASForSwap) {
 663       // Use CAS instead of swap, just in case the MP hardware
 664       // prefers to work with just one kind of synch. instruction.
 665       Label retry;
 666       __ BIND(retry);
 667       __ mov(O0, O3);       // scratch copy of exchange value
 668       __ ld(O1, 0, O2);     // observe the previous value
 669       // try to replace O2 with O3
 670       __ cas_under_lock(O1, O2, O3,
 671       (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
 672       __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);
 673 
 674       __ retl(false);
 675       __ delayed()->mov(O2, O0);  // report previous value to caller
 676 
 677     } else {
 678       if (VM_Version::v9_instructions_work()) {
 679         __ retl(false);
 680         __ delayed()->swap(O1, 0, O0);
 681       } else {
 682         const Register& lock_reg = O2;
 683         const Register& lock_ptr_reg = O3;
 684         const Register& yield_reg = O4;
 685 
 686         Label retry;
 687         Label dontyield;
 688 
 689         generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 690         // got the lock, do the swap
 691         __ swap(O1, 0, O0);
 692

 774   //
 775   // Results:
 776   //
 777   //     O0: the new value stored in dest
 778   //
 779   // Overwrites (v9): O3
 780   // Overwrites (v8): O3,O4,O5
 781   //
 782   address generate_atomic_add() {
 783     StubCodeMark mark(this, "StubRoutines", "atomic_add");
 784     address start = __ pc();
 785     __ BIND(_atomic_add_stub);
 786 
 787     if (VM_Version::v9_instructions_work()) {
 788       Label(retry);
 789       __ BIND(retry);
 790 
 791       __ lduw(O1, 0, O2);
 792       __ add(O0, O2, O3);
 793       __ cas(O1, O2, O3);
 794       __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);
 795       __ retl(false);
 796       __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
 797     } else {
 798       const Register& lock_reg = O2;
 799       const Register& lock_ptr_reg = O3;
 800       const Register& value_reg = O4;
 801       const Register& yield_reg = O5;
 802 
 803       Label(retry);
 804       Label(dontyield);
 805 
 806       generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 807       // got lock, do the increment
 808       __ ld(O1, 0, value_reg);
 809       __ add(O0, value_reg, value_reg);
 810       __ st(value_reg, O1, 0);
 811 
 812       // %%% only for RMO and PSO
 813       __ membar(Assembler::StoreStore);
 814

1342     if (!aligned)
1343 #endif
1344     {
1345       // Copy with shift 16 bytes per iteration if arrays do not have
1346       // the same alignment mod 8, otherwise fall through to the next
1347       // code for aligned copy.
1348       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1349       // Also jump over aligned copy after the copy with shift completed.
1350 
1351       copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
1352     }
1353 
1354     // Both array are 8 bytes aligned, copy 16 bytes at a time
1355       __ and3(count, 7, G4); // Save count
1356       __ srl(count, 3, count);
1357      generate_disjoint_long_copy_core(aligned);
1358       __ mov(G4, count);     // Restore count
1359 
1360     // copy tailing bytes
1361     __ BIND(L_copy_byte);
1362       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1363       __ align(OptoLoopAlignment);
1364     __ BIND(L_copy_byte_loop);
1365       __ ldub(from, offset, O3);
1366       __ deccc(count);
1367       __ stb(O3, to, offset);
1368       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1369       __ delayed()->inc(offset);
1370 
1371     __ BIND(L_exit);
1372       // O3, O4 are used as temp registers
1373       inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1374       __ retl();
1375       __ delayed()->mov(G0, O0); // return 0
1376     return start;
1377   }
1378 
1379   //
1380   //  Generate stub for conjoint byte copy.  If "aligned" is true, the
1381   //  "from" and "to" addresses are assumed to be heapword aligned.
1382   //

1453       // Also jump over aligned copy after the copy with shift completed.
1454 
1455       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1456                                         L_aligned_copy, L_copy_byte);
1457     }
1458     // copy 4 elements (16 bytes) at a time
1459       __ align(OptoLoopAlignment);
1460     __ BIND(L_aligned_copy);
1461       __ dec(end_from, 16);
1462       __ ldx(end_from, 8, O3);
1463       __ ldx(end_from, 0, O4);
1464       __ dec(end_to, 16);
1465       __ deccc(count, 16);
1466       __ stx(O3, end_to, 8);
1467       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1468       __ delayed()->stx(O4, end_to, 0);
1469       __ inc(count, 16);
1470 
1471     // copy 1 element (2 bytes) at a time
1472     __ BIND(L_copy_byte);
1473       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1474       __ align(OptoLoopAlignment);
1475     __ BIND(L_copy_byte_loop);
1476       __ dec(end_from);
1477       __ dec(end_to);
1478       __ ldub(end_from, 0, O4);
1479       __ deccc(count);
1480       __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1481       __ delayed()->stb(O4, end_to, 0);
1482 
1483     __ BIND(L_exit);
1484     // O3, O4 are used as temp registers
1485     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1486     __ retl();
1487     __ delayed()->mov(G0, O0); // return 0
1488     return start;
1489   }
1490 
1491   //
1492   //  Generate stub for disjoint short copy.  If "aligned" is true, the
1493   //  "from" and "to" addresses are assumed to be heapword aligned.

1570     if (!aligned)
1571 #endif
1572     {
1573       // Copy with shift 16 bytes per iteration if arrays do not have
1574       // the same alignment mod 8, otherwise fall through to the next
1575       // code for aligned copy.
1576       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1577       // Also jump over aligned copy after the copy with shift completed.
1578 
1579       copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
1580     }
1581 
1582     // Both array are 8 bytes aligned, copy 16 bytes at a time
1583       __ and3(count, 3, G4); // Save
1584       __ srl(count, 2, count);
1585      generate_disjoint_long_copy_core(aligned);
1586       __ mov(G4, count); // restore
1587 
1588     // copy 1 element at a time
1589     __ BIND(L_copy_2_bytes);
1590       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1591       __ align(OptoLoopAlignment);
1592     __ BIND(L_copy_2_bytes_loop);
1593       __ lduh(from, offset, O3);
1594       __ deccc(count);
1595       __ sth(O3, to, offset);
1596       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1597       __ delayed()->inc(offset, 2);
1598 
1599     __ BIND(L_exit);
1600       // O3, O4 are used as temp registers
1601       inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1602       __ retl();
1603       __ delayed()->mov(G0, O0); // return 0
1604     return start;
1605   }
1606 
1607   //
1608   //  Generate stub for disjoint short fill.  If "aligned" is true, the
1609   //  "to" address is assumed to be heapword aligned.
1610   //

1915       // Also jump over aligned copy after the copy with shift completed.
1916 
1917       copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1918                                         L_aligned_copy, L_copy_2_bytes);
1919     }
1920     // copy 4 elements (16 bytes) at a time
1921       __ align(OptoLoopAlignment);
1922     __ BIND(L_aligned_copy);
1923       __ dec(end_from, 16);
1924       __ ldx(end_from, 8, O3);
1925       __ ldx(end_from, 0, O4);
1926       __ dec(end_to, 16);
1927       __ deccc(count, 8);
1928       __ stx(O3, end_to, 8);
1929       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1930       __ delayed()->stx(O4, end_to, 0);
1931       __ inc(count, 8);
1932 
1933     // copy 1 element (2 bytes) at a time
1934     __ BIND(L_copy_2_bytes);
1935       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1936     __ BIND(L_copy_2_bytes_loop);
1937       __ dec(end_from, 2);
1938       __ dec(end_to, 2);
1939       __ lduh(end_from, 0, O4);
1940       __ deccc(count);
1941       __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1942       __ delayed()->sth(O4, end_to, 0);
1943 
1944     __ BIND(L_exit);
1945     // O3, O4 are used as temp registers
1946     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1947     __ retl();
1948     __ delayed()->mov(G0, O0); // return 0
1949     return start;
1950   }
1951 
1952   //
1953   //  Generate core code for disjoint int copy (and oop copy on 32-bit).
1954   //  If "aligned" is true, the "from" and "to" addresses are assumed
1955   //  to be heapword aligned.

2028       __ sllx(O4, 32, O4);
2029       __ srlx(G4, 32, G3);
2030       __ bset(G3, O4);
2031       __ stx(O4, to, -8);
2032       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2033       __ delayed()->mov(G4, O3);
2034 
2035       __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2036       __ delayed()->inc(count, 4); // restore 'count'
2037 
2038     __ BIND(L_aligned_copy);
2039     }
2040     // copy 4 elements (16 bytes) at a time
2041       __ and3(count, 1, G4); // Save
2042       __ srl(count, 1, count);
2043      generate_disjoint_long_copy_core(aligned);
2044       __ mov(G4, count);     // Restore
2045 
2046     // copy 1 element at a time
2047     __ BIND(L_copy_4_bytes);
2048       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
2049     __ BIND(L_copy_4_bytes_loop);
2050       __ ld(from, offset, O3);
2051       __ deccc(count);
2052       __ st(O3, to, offset);
2053       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
2054       __ delayed()->inc(offset, 4);
2055     __ BIND(L_exit);
2056   }
2057 
2058   //
2059   //  Generate stub for disjoint int copy.  If "aligned" is true, the
2060   //  "from" and "to" addresses are assumed to be heapword aligned.
2061   //
2062   // Arguments for generated stub:
2063   //      from:  O0
2064   //      to:    O1
2065   //      count: O2 treated as signed
2066   //
2067   address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
2068     __ align(CodeEntryAlignment);

2160       __ delayed()->mov(O5, O3);
2161 
2162       __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2163       __ delayed()->inc(count, 4);
2164 
2165     // copy 4 elements (16 bytes) at a time
2166       __ align(OptoLoopAlignment);
2167     __ BIND(L_aligned_copy);
2168       __ dec(end_from, 16);
2169       __ ldx(end_from, 8, O3);
2170       __ ldx(end_from, 0, O4);
2171       __ dec(end_to, 16);
2172       __ deccc(count, 4);
2173       __ stx(O3, end_to, 8);
2174       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
2175       __ delayed()->stx(O4, end_to, 0);
2176       __ inc(count, 4);
2177 
2178     // copy 1 element (4 bytes) at a time
2179     __ BIND(L_copy_4_bytes);
2180       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
2181     __ BIND(L_copy_4_bytes_loop);
2182       __ dec(end_from, 4);
2183       __ dec(end_to, 4);
2184       __ ld(end_from, 0, O4);
2185       __ deccc(count);
2186       __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
2187       __ delayed()->st(O4, end_to, 0);
2188     __ BIND(L_exit);
2189   }
2190 
2191   //
2192   //  Generate stub for conjoint int copy.  If "aligned" is true, the
2193   //  "from" and "to" addresses are assumed to be heapword aligned.
2194   //
2195   // Arguments for generated stub:
2196   //      from:  O0
2197   //      to:    O1
2198   //      count: O2 treated as signed
2199   //
2200   address generate_conjoint_int_copy(bool aligned, address nooverlap_target,

2542                            Register temp,
2543                            Label& L_success) {
2544     assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2545 
2546     BLOCK_COMMENT("type_check:");
2547 
2548     Label L_miss, L_pop_to_miss;
2549 
2550     assert_clean_int(super_check_offset, temp);
2551 
2552     __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
2553                                      &L_success, &L_miss, NULL,
2554                                      super_check_offset);
2555 
2556     BLOCK_COMMENT("type_check_slow_path:");
2557     __ save_frame(0);
2558     __ check_klass_subtype_slow_path(sub_klass->after_save(),
2559                                      super_klass->after_save(),
2560                                      L0, L1, L2, L4,
2561                                      NULL, &L_pop_to_miss);
2562     __ ba(L_success);
2563     __ delayed()->restore();
2564 
2565     __ bind(L_pop_to_miss);
2566     __ restore();
2567 
2568     // Fall through on failure!
2569     __ BIND(L_miss);
2570   }
2571 
2572 
2573   //  Generate stub for checked oop copy.
2574   //
2575   // Arguments for generated stub:
2576   //      from:  O0
2577   //      to:    O1
2578   //      count: O2 treated as signed
2579   //      ckoff: O3 (super_check_offset)
2580   //      ckval: O4 (super_klass)
2581   //      ret:   O0 zero for success; (-1^K) where K is partial transfer count
2582   //

2639     __ delayed()->set(0, O0);           // return 0 on (trivial) success
2640 
2641     // ======== begin loop ========
2642     // (Loop is rotated; its entry is load_element.)
2643     // Loop variables:
2644     //   (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2645     //   (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2646     //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2647     __ align(OptoLoopAlignment);
2648 
2649     __ BIND(store_element);
2650     __ deccc(G1_remain);                // decrement the count
2651     __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2652     __ inc(O5_offset, heapOopSize);     // step to next offset
2653     __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2654     __ delayed()->set(0, O0);           // return -1 on success
2655 
2656     // ======== loop entry is here ========
2657     __ BIND(load_element);
2658     __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
2659     __ br_null_short(G3_oop, Assembler::pt, store_element);
2660 
2661     __ load_klass(G3_oop, G4_klass); // query the object klass
2662 
2663     generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2664                         // branch to this on success:
2665                         store_element);
2666     // ======== end loop ========
2667 
2668     // It was a real error; we must depend on the caller to finish the job.
2669     // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2670     // Emit GC store barriers for the oops we have copied (O2 minus G1),
2671     // and report their number to the caller.
2672     __ BIND(fail);
2673     __ subcc(O2_count, G1_remain, O2_count);
2674     __ brx(Assembler::zero, false, Assembler::pt, done);
2675     __ delayed()->not1(O2_count, O0);   // report (-1^K) to caller
2676 
2677     __ BIND(do_card_marks);
2678     gen_write_ref_array_post_barrier(O1_to, O2_count, O3);   // store check on O1[0..O2]
2679

2861     __ delayed()->tst(dst_pos);
2862     __ br(Assembler::negative, false, Assembler::pn, L_failed);
2863 
2864     //  if (length < 0) return -1;
2865     __ delayed()->tst(length);
2866     __ br(Assembler::negative, false, Assembler::pn, L_failed);
2867 
2868     BLOCK_COMMENT("arraycopy argument klass checks");
2869     //  get src->klass()
2870     if (UseCompressedOops) {
2871       __ delayed()->nop(); // ??? not good
2872       __ load_klass(src, G3_src_klass);
2873     } else {
2874       __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2875     }
2876 
2877 #ifdef ASSERT
2878     //  assert(src->klass() != NULL);
2879     BLOCK_COMMENT("assert klasses not null");
2880     { Label L_a, L_b;
2881       __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL
2882       __ bind(L_a);
2883       __ stop("broken null klass");
2884       __ bind(L_b);
2885       __ load_klass(dst, G4_dst_klass);
2886       __ br_null(G4_dst_klass, false, Assembler::pn, L_a, false); // this would be broken also
2887       __ delayed()->mov(G0, G4_dst_klass);      // scribble the temp
2888       BLOCK_COMMENT("assert done");
2889     }
2890 #endif
2891 
2892     // Load layout helper
2893     //
2894     //  |array_tag|     | header_size | element_type |     |log2_element_size|
2895     // 32        30    24            16              8     2                 0
2896     //
2897     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
2898     //
2899 
2900     int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2901                     Klass::layout_helper_offset_in_bytes();
2902 
2903     // Load 32-bits signed value. Use br() instruction with it to check icc.
2904     __ lduw(G3_src_klass, lh_offset, G5_lh);
2905 
2906     if (UseCompressedOops) {
2907       __ load_klass(dst, G4_dst_klass);
2908     }
2909     // Handle objArrays completely differently...
2910     juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2911     __ set(objArray_lh, O5_temp);
2912     __ cmp(G5_lh,       O5_temp);
2913     __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2914     if (UseCompressedOops) {
2915       __ delayed()->nop();
2916     } else {
2917       __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2918     }
2919 
2920     //  if (src->klass() != dst->klass()) return -1;
2921     __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed);
2922 
2923     //  if (!src->is_Array()) return -1;
2924     __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2925     __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2926 
2927     // At this point, it is known to be a typeArray (array_tag 0x3).
2928 #ifdef ASSERT
2929     __ delayed()->nop();
2930     { Label L;
2931       jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2932       __ set(lh_prim_tag_in_place, O5_temp);
2933       __ cmp(G5_lh,                O5_temp);
2934       __ br(Assembler::greaterEqual, false, Assembler::pt, L);
2935       __ delayed()->nop();
2936       __ stop("must be a primitive array");
2937       __ bind(L);
2938     }
2939 #else
2940     __ delayed();                               // match next insn to prev branch
2941 #endif

2969     BLOCK_COMMENT("scale indexes to element size");
2970     __ sll_ptr(src_pos, G3_elsize, src_pos);
2971     __ sll_ptr(dst_pos, G3_elsize, dst_pos);
2972     __ add(src, src_pos, from);       // src_addr
2973     __ add(dst, dst_pos, to);         // dst_addr
2974 
2975     BLOCK_COMMENT("choose copy loop based on element size");
2976     __ cmp(G3_elsize, 0);
2977     __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
2978     __ delayed()->signx(length, count); // length
2979 
2980     __ cmp(G3_elsize, LogBytesPerShort);
2981     __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
2982     __ delayed()->signx(length, count); // length
2983 
2984     __ cmp(G3_elsize, LogBytesPerInt);
2985     __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
2986     __ delayed()->signx(length, count); // length
2987 #ifdef ASSERT
2988     { Label L;
2989       __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L);
2990       __ stop("must be long copy, but elsize is wrong");
2991       __ bind(L);
2992     }
2993 #endif
2994     __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
2995     __ delayed()->signx(length, count); // length
2996 
2997     // objArrayKlass
2998   __ BIND(L_objArray);
2999     // live at this point:  G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
3000 
3001     Label L_plain_copy, L_checkcast_copy;
3002     //  test array classes for subtyping
3003     __ cmp(G3_src_klass, G4_dst_klass);         // usual case is exact equality
3004     __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
3005     __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
3006 
3007     // Identically typed arrays can be copied without element-wise checks.
3008     arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
3009                            O5_temp, G5_lh, L_failed);

src/cpu/sparc/vm/stubGenerator_sparc.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File