7063628 Sdiff src/cpu/sparc/vm

src/cpu/sparc/vm/stubGenerator_sparc.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 7063628 Sdiff src/cpu/sparc/vm

src/cpu/sparc/vm/stubGenerator_sparc.cpp

Print this page

 133     //       the code in frame::entry_frame_call_wrapper()
 134 
 135     const Argument link           = Argument(0, false); // used only for GC
 136     const Argument result         = Argument(1, false);
 137     const Argument result_type    = Argument(2, false);
 138     const Argument method         = Argument(3, false);
 139     const Argument entry_point    = Argument(4, false);
 140     const Argument parameters     = Argument(5, false);
 141     const Argument parameter_size = Argument(6, false);
 142     const Argument thread         = Argument(7, false);
 143 
 144     // setup thread register
 145     __ ld_ptr(thread.as_address(), G2_thread);
 146     __ reinit_heapbase();
 147 
 148 #ifdef ASSERT
 149     // make sure we have no pending exceptions
 150     { const Register t = G3_scratch;
 151       Label L;
 152       __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
 153       __ br_null(t, false, Assembler::pt, L);
 154       __ delayed()->nop();
 155       __ stop("StubRoutines::call_stub: entered with pending exception");
 156       __ bind(L);
 157     }
 158 #endif
 159 
 160     // create activation frame & allocate space for parameters
 161     { const Register t = G3_scratch;
 162       __ ld_ptr(parameter_size.as_address(), t);                // get parameter size (in words)
 163       __ add(t, frame::memory_parameter_word_sp_offset, t);     // add space for save area (in words)
 164       __ round_to(t, WordsPerLong);                             // make sure it is multiple of 2 (in words)
 165       __ sll(t, Interpreter::logStackElementSize, t);           // compute number of bytes
 166       __ neg(t);                                                // negate so it can be used with save
 167       __ save(SP, t, SP);                                       // setup new frame
 168     }
 169 
 170     // +---------------+ <--- sp + 0
 171     // |               |
 172     // . reg save area .
 173     // |               |
 174     // +---------------+ <--- sp + 0x40

 190     // . extra 7 slots .
 191     // |               |
 192     // +---------------+ <--- fp + 0x5c
 193     // |  param. size  |
 194     // +---------------+ <--- fp + 0x60
 195     // |    thread     |
 196     // +---------------+
 197     // |               |
 198 
 199     // pass parameters if any
 200     BLOCK_COMMENT("pass parameters if any");
 201     { const Register src = parameters.as_in().as_register();
 202       const Register dst = Lentry_args;
 203       const Register tmp = G3_scratch;
 204       const Register cnt = G4_scratch;
 205 
 206       // test if any parameters & setup of Lentry_args
 207       Label exit;
 208       __ ld_ptr(parameter_size.as_in().as_address(), cnt);      // parameter counter
 209       __ add( FP, STACK_BIAS, dst );
 210       __ tst(cnt);
 211       __ br(Assembler::zero, false, Assembler::pn, exit);
 212       __ delayed()->sub(dst, BytesPerWord, dst);                 // setup Lentry_args
 213 
 214       // copy parameters if any
 215       Label loop;
 216       __ BIND(loop);
 217       // Store parameter value
 218       __ ld_ptr(src, 0, tmp);
 219       __ add(src, BytesPerWord, src);
 220       __ st_ptr(tmp, dst, 0);
 221       __ deccc(cnt);
 222       __ br(Assembler::greater, false, Assembler::pt, loop);
 223       __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
 224 
 225       // done
 226       __ BIND(exit);
 227     }
 228 
 229     // setup parameters, method & call Java function
 230 #ifdef ASSERT
 231     // layout_activation_impl checks it's notion of saved SP against

 265     // store result depending on type
 266     // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
 267     //  is treated as T_INT)
 268     { const Register addr = result     .as_in().as_register();
 269       const Register type = result_type.as_in().as_register();
 270       Label is_long, is_float, is_double, is_object, exit;
 271       __            cmp(type, T_OBJECT);  __ br(Assembler::equal, false, Assembler::pn, is_object);
 272       __ delayed()->cmp(type, T_FLOAT);   __ br(Assembler::equal, false, Assembler::pn, is_float);
 273       __ delayed()->cmp(type, T_DOUBLE);  __ br(Assembler::equal, false, Assembler::pn, is_double);
 274       __ delayed()->cmp(type, T_LONG);    __ br(Assembler::equal, false, Assembler::pn, is_long);
 275       __ delayed()->nop();
 276 
 277       // store int result
 278       __ st(O0, addr, G0);
 279 
 280       __ BIND(exit);
 281       __ ret();
 282       __ delayed()->restore();
 283 
 284       __ BIND(is_object);
 285       __ ba(false, exit);
 286       __ delayed()->st_ptr(O0, addr, G0);
 287 
 288       __ BIND(is_float);
 289       __ ba(false, exit);
 290       __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
 291 
 292       __ BIND(is_double);
 293       __ ba(false, exit);
 294       __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
 295 
 296       __ BIND(is_long);
 297 #ifdef _LP64
 298       __ ba(false, exit);
 299       __ delayed()->st_long(O0, addr, G0);      // store entire long
 300 #else
 301 #if defined(COMPILER2)
 302   // All return values are where we want them, except for Longs.  C2 returns
 303   // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
 304   // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
 305   // build we simply always use G1.
 306   // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
 307   // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
 308   // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
 309 
 310       __ ba(false, exit);
 311       __ delayed()->stx(G1, addr, G0);  // store entire long
 312 #else
 313       __ st(O1, addr, BytesPerInt);
 314       __ ba(false, exit);
 315       __ delayed()->st(O0, addr, G0);
 316 #endif /* COMPILER2 */
 317 #endif /* _LP64 */
 318      }
 319      return start;
 320   }
 321 
 322 
 323   //----------------------------------------------------------------------------------------------------
 324   // Return point for a Java call if there's an exception thrown in Java code.
 325   // The exception is caught and transformed into a pending exception stored in
 326   // JavaThread that can be tested from within the VM.
 327   //
 328   // Oexception: exception oop
 329 
 330   address generate_catch_exception() {
 331     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 332 
 333     address start = __ pc();
 334     // verify that thread corresponds

 365   //
 366   // Contract with Java-level exception handler: O0 = exception
 367   //                                             O1 = throwing pc
 368 
 369   address generate_forward_exception() {
 370     StubCodeMark mark(this, "StubRoutines", "forward_exception");
 371     address start = __ pc();
 372 
 373     // Upon entry, O7 has the return address returning into Java
 374     // (interpreted or compiled) code; i.e. the return address
 375     // becomes the throwing pc.
 376 
 377     const Register& handler_reg = Gtemp;
 378 
 379     Address exception_addr(G2_thread, Thread::pending_exception_offset());
 380 
 381 #ifdef ASSERT
 382     // make sure that this code is only executed if there is a pending exception
 383     { Label L;
 384       __ ld_ptr(exception_addr, Gtemp);
 385       __ br_notnull(Gtemp, false, Assembler::pt, L);
 386       __ delayed()->nop();
 387       __ stop("StubRoutines::forward exception: no pending exception (1)");
 388       __ bind(L);
 389     }
 390 #endif
 391 
 392     // compute exception handler into handler_reg
 393     __ get_thread();
 394     __ ld_ptr(exception_addr, Oexception);
 395     __ verify_oop(Oexception);
 396     __ save_frame(0);             // compensates for compiler weakness
 397     __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
 398     BLOCK_COMMENT("call exception_handler_for_return_address");
 399     __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
 400     __ mov(O0, handler_reg);
 401     __ restore();                 // compensates for compiler weakness
 402 
 403     __ ld_ptr(exception_addr, Oexception);
 404     __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
 405 
 406 #ifdef ASSERT
 407     // make sure exception is set
 408     { Label L;
 409       __ br_notnull(Oexception, false, Assembler::pt, L);
 410       __ delayed()->nop();
 411       __ stop("StubRoutines::forward exception: no pending exception (2)");
 412       __ bind(L);
 413     }
 414 #endif
 415     // jump to exception handler
 416     __ jmp(handler_reg, 0);
 417     // clear pending exception
 418     __ delayed()->st_ptr(G0, exception_addr);
 419 
 420     return start;
 421   }
 422 
 423 
 424   //------------------------------------------------------------------------------------------------------------------------
 425   // Continuation point for throwing of implicit exceptions that are not handled in
 426   // the current activation. Fabricates an exception oop and initiates normal
 427   // exception dispatching in this frame. Only callee-saved registers are preserved
 428   // (through the normal register window / RegisterMap handling).
 429   // If the compiler needs all registers to be preserved between the fault
 430   // point and the exception handler then it must assume responsibility for that in

 484     if (arg2 != noreg) {
 485       __ mov(arg2, O2);
 486     }
 487     // do the call
 488     BLOCK_COMMENT("call runtime_entry");
 489     __ call(runtime_entry, relocInfo::runtime_call_type);
 490     if (!VerifyThread)
 491       __ delayed()->mov(G2_thread, O0);  // pass thread as first argument
 492     else
 493       __ delayed()->nop();             // (thread already passed)
 494     __ restore_thread(noreg);
 495     __ reset_last_Java_frame();
 496 
 497     // check for pending exceptions. use Gtemp as scratch register.
 498 #ifdef ASSERT
 499     Label L;
 500 
 501     Address exception_addr(G2_thread, Thread::pending_exception_offset());
 502     Register scratch_reg = Gtemp;
 503     __ ld_ptr(exception_addr, scratch_reg);
 504     __ br_notnull(scratch_reg, false, Assembler::pt, L);
 505     __ delayed()->nop();
 506     __ should_not_reach_here();
 507     __ bind(L);
 508 #endif // ASSERT
 509     BLOCK_COMMENT("call forward_exception_entry");
 510     __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
 511     // we use O7 linkage so that forward_exception_entry has the issuing PC
 512     __ delayed()->restore();
 513 
 514     RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
 515     return stub->entry_point();
 516   }
 517 
 518 #undef __
 519 #define __ _masm->
 520 
 521 
 522   // Generate a routine that sets all the registers so we
 523   // can tell if the stop routine prints them correctly.
 524   address generate_test_stop() {
 525     StubCodeMark mark(this, "StubRoutines", "test_stop");

 597     if (mark_oop_reg == noreg) {
 598       address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
 599       __ set((intptr_t)lock_ptr, lock_ptr_reg);
 600     } else {
 601       assert(scratch_reg != noreg, "just checking");
 602       address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
 603       __ set((intptr_t)lock_ptr, lock_ptr_reg);
 604       __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
 605       __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
 606     }
 607   }
 608 
 609   void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
 610 
 611     get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
 612     __ set(StubRoutines::Sparc::locked, lock_reg);
 613     // Initialize yield counter
 614     __ mov(G0,yield_reg);
 615 
 616     __ BIND(retry);
 617     __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount);
 618     __ br(Assembler::less, false, Assembler::pt, dontyield);
 619     __ delayed()->nop();
 620 
 621     // This code can only be called from inside the VM, this
 622     // stub is only invoked from Atomic::add().  We do not
 623     // want to use call_VM, because _last_java_sp and such
 624     // must already be set.
 625     //
 626     // Save the regs and make space for a C call
 627     __ save(SP, -96, SP);
 628     __ save_all_globals_into_locals();
 629     BLOCK_COMMENT("call os::naked_sleep");
 630     __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
 631     __ delayed()->nop();
 632     __ restore_globals_from_locals();
 633     __ restore();
 634     // reset the counter
 635     __ mov(G0,yield_reg);
 636 
 637     __ BIND(dontyield);
 638 
 639     // try to get lock

 659   //      dest:           O1
 660   //
 661   // Results:
 662   //
 663   //     O0: the value previously stored in dest
 664   //
 665   address generate_atomic_xchg() {
 666     StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
 667     address start = __ pc();
 668 
 669     if (UseCASForSwap) {
 670       // Use CAS instead of swap, just in case the MP hardware
 671       // prefers to work with just one kind of synch. instruction.
 672       Label retry;
 673       __ BIND(retry);
 674       __ mov(O0, O3);       // scratch copy of exchange value
 675       __ ld(O1, 0, O2);     // observe the previous value
 676       // try to replace O2 with O3
 677       __ cas_under_lock(O1, O2, O3,
 678       (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
 679       __ cmp(O2, O3);
 680       __ br(Assembler::notEqual, false, Assembler::pn, retry);
 681       __ delayed()->nop();
 682 
 683       __ retl(false);
 684       __ delayed()->mov(O2, O0);  // report previous value to caller
 685 
 686     } else {
 687       if (VM_Version::v9_instructions_work()) {
 688         __ retl(false);
 689         __ delayed()->swap(O1, 0, O0);
 690       } else {
 691         const Register& lock_reg = O2;
 692         const Register& lock_ptr_reg = O3;
 693         const Register& yield_reg = O4;
 694 
 695         Label retry;
 696         Label dontyield;
 697 
 698         generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 699         // got the lock, do the swap
 700         __ swap(O1, 0, O0);
 701

 783   //
 784   // Results:
 785   //
 786   //     O0: the new value stored in dest
 787   //
 788   // Overwrites (v9): O3
 789   // Overwrites (v8): O3,O4,O5
 790   //
 791   address generate_atomic_add() {
 792     StubCodeMark mark(this, "StubRoutines", "atomic_add");
 793     address start = __ pc();
 794     __ BIND(_atomic_add_stub);
 795 
 796     if (VM_Version::v9_instructions_work()) {
 797       Label(retry);
 798       __ BIND(retry);
 799 
 800       __ lduw(O1, 0, O2);
 801       __ add(O0,   O2, O3);
 802       __ cas(O1,   O2, O3);
 803       __ cmp(      O2, O3);
 804       __ br(Assembler::notEqual, false, Assembler::pn, retry);
 805       __ delayed()->nop();
 806       __ retl(false);
 807       __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
 808     } else {
 809       const Register& lock_reg = O2;
 810       const Register& lock_ptr_reg = O3;
 811       const Register& value_reg = O4;
 812       const Register& yield_reg = O5;
 813 
 814       Label(retry);
 815       Label(dontyield);
 816 
 817       generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 818       // got lock, do the increment
 819       __ ld(O1, 0, value_reg);
 820       __ add(O0, value_reg, value_reg);
 821       __ st(value_reg, O1, 0);
 822 
 823       // %%% only for RMO and PSO
 824       __ membar(Assembler::StoreStore);
 825

1353     if (!aligned)
1354 #endif
1355     {
1356       // Copy with shift 16 bytes per iteration if arrays do not have
1357       // the same alignment mod 8, otherwise fall through to the next
1358       // code for aligned copy.
1359       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1360       // Also jump over aligned copy after the copy with shift completed.
1361 
1362       copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
1363     }
1364 
1365     // Both array are 8 bytes aligned, copy 16 bytes at a time
1366       __ and3(count, 7, G4); // Save count
1367       __ srl(count, 3, count);
1368      generate_disjoint_long_copy_core(aligned);
1369       __ mov(G4, count);     // Restore count
1370 
1371     // copy tailing bytes
1372     __ BIND(L_copy_byte);
1373       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1374       __ delayed()->nop();
1375       __ align(OptoLoopAlignment);
1376     __ BIND(L_copy_byte_loop);
1377       __ ldub(from, offset, O3);
1378       __ deccc(count);
1379       __ stb(O3, to, offset);
1380       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1381       __ delayed()->inc(offset);
1382 
1383     __ BIND(L_exit);
1384       // O3, O4 are used as temp registers
1385       inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1386       __ retl();
1387       __ delayed()->mov(G0, O0); // return 0
1388     return start;
1389   }
1390 
1391   //
1392   //  Generate stub for conjoint byte copy.  If "aligned" is true, the
1393   //  "from" and "to" addresses are assumed to be heapword aligned.
1394   //

1465       // Also jump over aligned copy after the copy with shift completed.
1466 
1467       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1468                                         L_aligned_copy, L_copy_byte);
1469     }
1470     // copy 4 elements (16 bytes) at a time
1471       __ align(OptoLoopAlignment);
1472     __ BIND(L_aligned_copy);
1473       __ dec(end_from, 16);
1474       __ ldx(end_from, 8, O3);
1475       __ ldx(end_from, 0, O4);
1476       __ dec(end_to, 16);
1477       __ deccc(count, 16);
1478       __ stx(O3, end_to, 8);
1479       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1480       __ delayed()->stx(O4, end_to, 0);
1481       __ inc(count, 16);
1482 
1483     // copy 1 element (2 bytes) at a time
1484     __ BIND(L_copy_byte);
1485       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1486       __ delayed()->nop();
1487       __ align(OptoLoopAlignment);
1488     __ BIND(L_copy_byte_loop);
1489       __ dec(end_from);
1490       __ dec(end_to);
1491       __ ldub(end_from, 0, O4);
1492       __ deccc(count);
1493       __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1494       __ delayed()->stb(O4, end_to, 0);
1495 
1496     __ BIND(L_exit);
1497     // O3, O4 are used as temp registers
1498     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1499     __ retl();
1500     __ delayed()->mov(G0, O0); // return 0
1501     return start;
1502   }
1503 
1504   //
1505   //  Generate stub for disjoint short copy.  If "aligned" is true, the
1506   //  "from" and "to" addresses are assumed to be heapword aligned.

1583     if (!aligned)
1584 #endif
1585     {
1586       // Copy with shift 16 bytes per iteration if arrays do not have
1587       // the same alignment mod 8, otherwise fall through to the next
1588       // code for aligned copy.
1589       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1590       // Also jump over aligned copy after the copy with shift completed.
1591 
1592       copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
1593     }
1594 
1595     // Both array are 8 bytes aligned, copy 16 bytes at a time
1596       __ and3(count, 3, G4); // Save
1597       __ srl(count, 2, count);
1598      generate_disjoint_long_copy_core(aligned);
1599       __ mov(G4, count); // restore
1600 
1601     // copy 1 element at a time
1602     __ BIND(L_copy_2_bytes);
1603       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1604       __ delayed()->nop();
1605       __ align(OptoLoopAlignment);
1606     __ BIND(L_copy_2_bytes_loop);
1607       __ lduh(from, offset, O3);
1608       __ deccc(count);
1609       __ sth(O3, to, offset);
1610       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1611       __ delayed()->inc(offset, 2);
1612 
1613     __ BIND(L_exit);
1614       // O3, O4 are used as temp registers
1615       inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1616       __ retl();
1617       __ delayed()->mov(G0, O0); // return 0
1618     return start;
1619   }
1620 
1621   //
1622   //  Generate stub for disjoint short fill.  If "aligned" is true, the
1623   //  "to" address is assumed to be heapword aligned.
1624   //

1929       // Also jump over aligned copy after the copy with shift completed.
1930 
1931       copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1932                                         L_aligned_copy, L_copy_2_bytes);
1933     }
1934     // copy 4 elements (16 bytes) at a time
1935       __ align(OptoLoopAlignment);
1936     __ BIND(L_aligned_copy);
1937       __ dec(end_from, 16);
1938       __ ldx(end_from, 8, O3);
1939       __ ldx(end_from, 0, O4);
1940       __ dec(end_to, 16);
1941       __ deccc(count, 8);
1942       __ stx(O3, end_to, 8);
1943       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1944       __ delayed()->stx(O4, end_to, 0);
1945       __ inc(count, 8);
1946 
1947     // copy 1 element (2 bytes) at a time
1948     __ BIND(L_copy_2_bytes);
1949       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1950       __ delayed()->nop();
1951     __ BIND(L_copy_2_bytes_loop);
1952       __ dec(end_from, 2);
1953       __ dec(end_to, 2);
1954       __ lduh(end_from, 0, O4);
1955       __ deccc(count);
1956       __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1957       __ delayed()->sth(O4, end_to, 0);
1958 
1959     __ BIND(L_exit);
1960     // O3, O4 are used as temp registers
1961     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1962     __ retl();
1963     __ delayed()->mov(G0, O0); // return 0
1964     return start;
1965   }
1966 
1967   //
1968   //  Generate core code for disjoint int copy (and oop copy on 32-bit).
1969   //  If "aligned" is true, the "from" and "to" addresses are assumed
1970   //  to be heapword aligned.

2043       __ sllx(O4, 32, O4);
2044       __ srlx(G4, 32, G3);
2045       __ bset(G3, O4);
2046       __ stx(O4, to, -8);
2047       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2048       __ delayed()->mov(G4, O3);
2049 
2050       __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2051       __ delayed()->inc(count, 4); // restore 'count'
2052 
2053     __ BIND(L_aligned_copy);
2054     }
2055     // copy 4 elements (16 bytes) at a time
2056       __ and3(count, 1, G4); // Save
2057       __ srl(count, 1, count);
2058      generate_disjoint_long_copy_core(aligned);
2059       __ mov(G4, count);     // Restore
2060 
2061     // copy 1 element at a time
2062     __ BIND(L_copy_4_bytes);
2063       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
2064       __ delayed()->nop();
2065     __ BIND(L_copy_4_bytes_loop);
2066       __ ld(from, offset, O3);
2067       __ deccc(count);
2068       __ st(O3, to, offset);
2069       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
2070       __ delayed()->inc(offset, 4);
2071     __ BIND(L_exit);
2072   }
2073 
2074   //
2075   //  Generate stub for disjoint int copy.  If "aligned" is true, the
2076   //  "from" and "to" addresses are assumed to be heapword aligned.
2077   //
2078   // Arguments for generated stub:
2079   //      from:  O0
2080   //      to:    O1
2081   //      count: O2 treated as signed
2082   //
2083   address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
2084     __ align(CodeEntryAlignment);

2176       __ delayed()->mov(O5, O3);
2177 
2178       __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2179       __ delayed()->inc(count, 4);
2180 
2181     // copy 4 elements (16 bytes) at a time
2182       __ align(OptoLoopAlignment);
2183     __ BIND(L_aligned_copy);
2184       __ dec(end_from, 16);
2185       __ ldx(end_from, 8, O3);
2186       __ ldx(end_from, 0, O4);
2187       __ dec(end_to, 16);
2188       __ deccc(count, 4);
2189       __ stx(O3, end_to, 8);
2190       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
2191       __ delayed()->stx(O4, end_to, 0);
2192       __ inc(count, 4);
2193 
2194     // copy 1 element (4 bytes) at a time
2195     __ BIND(L_copy_4_bytes);
2196       __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
2197       __ delayed()->nop();
2198     __ BIND(L_copy_4_bytes_loop);
2199       __ dec(end_from, 4);
2200       __ dec(end_to, 4);
2201       __ ld(end_from, 0, O4);
2202       __ deccc(count);
2203       __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
2204       __ delayed()->st(O4, end_to, 0);
2205     __ BIND(L_exit);
2206   }
2207 
2208   //
2209   //  Generate stub for conjoint int copy.  If "aligned" is true, the
2210   //  "from" and "to" addresses are assumed to be heapword aligned.
2211   //
2212   // Arguments for generated stub:
2213   //      from:  O0
2214   //      to:    O1
2215   //      count: O2 treated as signed
2216   //
2217   address generate_conjoint_int_copy(bool aligned, address nooverlap_target,

2559                            Register temp,
2560                            Label& L_success) {
2561     assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2562 
2563     BLOCK_COMMENT("type_check:");
2564 
2565     Label L_miss, L_pop_to_miss;
2566 
2567     assert_clean_int(super_check_offset, temp);
2568 
2569     __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
2570                                      &L_success, &L_miss, NULL,
2571                                      super_check_offset);
2572 
2573     BLOCK_COMMENT("type_check_slow_path:");
2574     __ save_frame(0);
2575     __ check_klass_subtype_slow_path(sub_klass->after_save(),
2576                                      super_klass->after_save(),
2577                                      L0, L1, L2, L4,
2578                                      NULL, &L_pop_to_miss);
2579     __ ba(false, L_success);
2580     __ delayed()->restore();
2581 
2582     __ bind(L_pop_to_miss);
2583     __ restore();
2584 
2585     // Fall through on failure!
2586     __ BIND(L_miss);
2587   }
2588 
2589 
2590   //  Generate stub for checked oop copy.
2591   //
2592   // Arguments for generated stub:
2593   //      from:  O0
2594   //      to:    O1
2595   //      count: O2 treated as signed
2596   //      ckoff: O3 (super_check_offset)
2597   //      ckval: O4 (super_klass)
2598   //      ret:   O0 zero for success; (-1^K) where K is partial transfer count
2599   //

2656     __ delayed()->set(0, O0);           // return 0 on (trivial) success
2657 
2658     // ======== begin loop ========
2659     // (Loop is rotated; its entry is load_element.)
2660     // Loop variables:
2661     //   (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2662     //   (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2663     //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2664     __ align(OptoLoopAlignment);
2665 
2666     __ BIND(store_element);
2667     __ deccc(G1_remain);                // decrement the count
2668     __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2669     __ inc(O5_offset, heapOopSize);     // step to next offset
2670     __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2671     __ delayed()->set(0, O0);           // return -1 on success
2672 
2673     // ======== loop entry is here ========
2674     __ BIND(load_element);
2675     __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
2676     __ br_null(G3_oop, true, Assembler::pt, store_element);
2677     __ delayed()->nop();
2678 
2679     __ load_klass(G3_oop, G4_klass); // query the object klass
2680 
2681     generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2682                         // branch to this on success:
2683                         store_element);
2684     // ======== end loop ========
2685 
2686     // It was a real error; we must depend on the caller to finish the job.
2687     // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2688     // Emit GC store barriers for the oops we have copied (O2 minus G1),
2689     // and report their number to the caller.
2690     __ BIND(fail);
2691     __ subcc(O2_count, G1_remain, O2_count);
2692     __ brx(Assembler::zero, false, Assembler::pt, done);
2693     __ delayed()->not1(O2_count, O0);   // report (-1^K) to caller
2694 
2695     __ BIND(do_card_marks);
2696     gen_write_ref_array_post_barrier(O1_to, O2_count, O3);   // store check on O1[0..O2]
2697

2879     __ delayed()->tst(dst_pos);
2880     __ br(Assembler::negative, false, Assembler::pn, L_failed);
2881 
2882     //  if (length < 0) return -1;
2883     __ delayed()->tst(length);
2884     __ br(Assembler::negative, false, Assembler::pn, L_failed);
2885 
2886     BLOCK_COMMENT("arraycopy argument klass checks");
2887     //  get src->klass()
2888     if (UseCompressedOops) {
2889       __ delayed()->nop(); // ??? not good
2890       __ load_klass(src, G3_src_klass);
2891     } else {
2892       __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2893     }
2894 
2895 #ifdef ASSERT
2896     //  assert(src->klass() != NULL);
2897     BLOCK_COMMENT("assert klasses not null");
2898     { Label L_a, L_b;
2899       __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
2900       __ delayed()->nop();
2901       __ bind(L_a);
2902       __ stop("broken null klass");
2903       __ bind(L_b);
2904       __ load_klass(dst, G4_dst_klass);
2905       __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
2906       __ delayed()->mov(G0, G4_dst_klass);      // scribble the temp
2907       BLOCK_COMMENT("assert done");
2908     }
2909 #endif
2910 
2911     // Load layout helper
2912     //
2913     //  |array_tag|     | header_size | element_type |     |log2_element_size|
2914     // 32        30    24            16              8     2                 0
2915     //
2916     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
2917     //
2918 
2919     int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2920                     Klass::layout_helper_offset_in_bytes();
2921 
2922     // Load 32-bits signed value. Use br() instruction with it to check icc.
2923     __ lduw(G3_src_klass, lh_offset, G5_lh);
2924 
2925     if (UseCompressedOops) {
2926       __ load_klass(dst, G4_dst_klass);
2927     }
2928     // Handle objArrays completely differently...
2929     juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2930     __ set(objArray_lh, O5_temp);
2931     __ cmp(G5_lh,       O5_temp);
2932     __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2933     if (UseCompressedOops) {
2934       __ delayed()->nop();
2935     } else {
2936       __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2937     }
2938 
2939     //  if (src->klass() != dst->klass()) return -1;
2940     __ cmp(G3_src_klass, G4_dst_klass);
2941     __ brx(Assembler::notEqual, false, Assembler::pn, L_failed);
2942     __ delayed()->nop();
2943 
2944     //  if (!src->is_Array()) return -1;
2945     __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2946     __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2947 
2948     // At this point, it is known to be a typeArray (array_tag 0x3).
2949 #ifdef ASSERT
2950     __ delayed()->nop();
2951     { Label L;
2952       jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2953       __ set(lh_prim_tag_in_place, O5_temp);
2954       __ cmp(G5_lh,                O5_temp);
2955       __ br(Assembler::greaterEqual, false, Assembler::pt, L);
2956       __ delayed()->nop();
2957       __ stop("must be a primitive array");
2958       __ bind(L);
2959     }
2960 #else
2961     __ delayed();                               // match next insn to prev branch
2962 #endif

2990     BLOCK_COMMENT("scale indexes to element size");
2991     __ sll_ptr(src_pos, G3_elsize, src_pos);
2992     __ sll_ptr(dst_pos, G3_elsize, dst_pos);
2993     __ add(src, src_pos, from);       // src_addr
2994     __ add(dst, dst_pos, to);         // dst_addr
2995 
2996     BLOCK_COMMENT("choose copy loop based on element size");
2997     __ cmp(G3_elsize, 0);
2998     __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
2999     __ delayed()->signx(length, count); // length
3000 
3001     __ cmp(G3_elsize, LogBytesPerShort);
3002     __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
3003     __ delayed()->signx(length, count); // length
3004 
3005     __ cmp(G3_elsize, LogBytesPerInt);
3006     __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
3007     __ delayed()->signx(length, count); // length
3008 #ifdef ASSERT
3009     { Label L;
3010       __ cmp(G3_elsize, LogBytesPerLong);
3011       __ br(Assembler::equal, false, Assembler::pt, L);
3012       __ delayed()->nop();
3013       __ stop("must be long copy, but elsize is wrong");
3014       __ bind(L);
3015     }
3016 #endif
3017     __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
3018     __ delayed()->signx(length, count); // length
3019 
3020     // objArrayKlass
3021   __ BIND(L_objArray);
3022     // live at this point:  G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
3023 
3024     Label L_plain_copy, L_checkcast_copy;
3025     //  test array classes for subtyping
3026     __ cmp(G3_src_klass, G4_dst_klass);         // usual case is exact equality
3027     __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
3028     __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
3029 
3030     // Identically typed arrays can be copied without element-wise checks.
3031     arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
3032                            O5_temp, G5_lh, L_failed);

 133     //       the code in frame::entry_frame_call_wrapper()
 134 
 135     const Argument link           = Argument(0, false); // used only for GC
 136     const Argument result         = Argument(1, false);
 137     const Argument result_type    = Argument(2, false);
 138     const Argument method         = Argument(3, false);
 139     const Argument entry_point    = Argument(4, false);
 140     const Argument parameters     = Argument(5, false);
 141     const Argument parameter_size = Argument(6, false);
 142     const Argument thread         = Argument(7, false);
 143 
 144     // setup thread register
 145     __ ld_ptr(thread.as_address(), G2_thread);
 146     __ reinit_heapbase();
 147 
 148 #ifdef ASSERT
 149     // make sure we have no pending exceptions
 150     { const Register t = G3_scratch;
 151       Label L;
 152       __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
 153       __ br_null_short(t, Assembler::pt, L);

 154       __ stop("StubRoutines::call_stub: entered with pending exception");
 155       __ bind(L);
 156     }
 157 #endif
 158 
 159     // create activation frame & allocate space for parameters
 160     { const Register t = G3_scratch;
 161       __ ld_ptr(parameter_size.as_address(), t);                // get parameter size (in words)
 162       __ add(t, frame::memory_parameter_word_sp_offset, t);     // add space for save area (in words)
 163       __ round_to(t, WordsPerLong);                             // make sure it is multiple of 2 (in words)
 164       __ sll(t, Interpreter::logStackElementSize, t);           // compute number of bytes
 165       __ neg(t);                                                // negate so it can be used with save
 166       __ save(SP, t, SP);                                       // setup new frame
 167     }
 168 
 169     // +---------------+ <--- sp + 0
 170     // |               |
 171     // . reg save area .
 172     // |               |
 173     // +---------------+ <--- sp + 0x40

 189     // . extra 7 slots .
 190     // |               |
 191     // +---------------+ <--- fp + 0x5c
 192     // |  param. size  |
 193     // +---------------+ <--- fp + 0x60
 194     // |    thread     |
 195     // +---------------+
 196     // |               |
 197 
 198     // pass parameters if any
 199     BLOCK_COMMENT("pass parameters if any");
 200     { const Register src = parameters.as_in().as_register();
 201       const Register dst = Lentry_args;
 202       const Register tmp = G3_scratch;
 203       const Register cnt = G4_scratch;
 204 
 205       // test if any parameters & setup of Lentry_args
 206       Label exit;
 207       __ ld_ptr(parameter_size.as_in().as_address(), cnt);      // parameter counter
 208       __ add( FP, STACK_BIAS, dst );
 209       __ cmp_zero_and_br(Assembler::zero, cnt, exit);

 210       __ delayed()->sub(dst, BytesPerWord, dst);                 // setup Lentry_args
 211 
 212       // copy parameters if any
 213       Label loop;
 214       __ BIND(loop);
 215       // Store parameter value
 216       __ ld_ptr(src, 0, tmp);
 217       __ add(src, BytesPerWord, src);
 218       __ st_ptr(tmp, dst, 0);
 219       __ deccc(cnt);
 220       __ br(Assembler::greater, false, Assembler::pt, loop);
 221       __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
 222 
 223       // done
 224       __ BIND(exit);
 225     }
 226 
 227     // setup parameters, method & call Java function
 228 #ifdef ASSERT
 229     // layout_activation_impl checks it's notion of saved SP against

 263     // store result depending on type
 264     // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
 265     //  is treated as T_INT)
 266     { const Register addr = result     .as_in().as_register();
 267       const Register type = result_type.as_in().as_register();
 268       Label is_long, is_float, is_double, is_object, exit;
 269       __            cmp(type, T_OBJECT);  __ br(Assembler::equal, false, Assembler::pn, is_object);
 270       __ delayed()->cmp(type, T_FLOAT);   __ br(Assembler::equal, false, Assembler::pn, is_float);
 271       __ delayed()->cmp(type, T_DOUBLE);  __ br(Assembler::equal, false, Assembler::pn, is_double);
 272       __ delayed()->cmp(type, T_LONG);    __ br(Assembler::equal, false, Assembler::pn, is_long);
 273       __ delayed()->nop();
 274 
 275       // store int result
 276       __ st(O0, addr, G0);
 277 
 278       __ BIND(exit);
 279       __ ret();
 280       __ delayed()->restore();
 281 
 282       __ BIND(is_object);
 283       __ ba(exit);
 284       __ delayed()->st_ptr(O0, addr, G0);
 285 
 286       __ BIND(is_float);
 287       __ ba(exit);
 288       __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
 289 
 290       __ BIND(is_double);
 291       __ ba(exit);
 292       __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
 293 
 294       __ BIND(is_long);
 295 #ifdef _LP64
 296       __ ba(exit);
 297       __ delayed()->st_long(O0, addr, G0);      // store entire long
 298 #else
 299 #if defined(COMPILER2)
 300   // All return values are where we want them, except for Longs.  C2 returns
 301   // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
 302   // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
 303   // build we simply always use G1.
 304   // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
 305   // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
 306   // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
 307 
 308       __ ba(exit);
 309       __ delayed()->stx(G1, addr, G0);  // store entire long
 310 #else
 311       __ st(O1, addr, BytesPerInt);
 312       __ ba(exit);
 313       __ delayed()->st(O0, addr, G0);
 314 #endif /* COMPILER2 */
 315 #endif /* _LP64 */
 316      }
 317      return start;
 318   }
 319 
 320 
 321   //----------------------------------------------------------------------------------------------------
 322   // Return point for a Java call if there's an exception thrown in Java code.
 323   // The exception is caught and transformed into a pending exception stored in
 324   // JavaThread that can be tested from within the VM.
 325   //
 326   // Oexception: exception oop
 327 
 328   address generate_catch_exception() {
 329     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 330 
 331     address start = __ pc();
 332     // verify that thread corresponds

 363   //
 364   // Contract with Java-level exception handler: O0 = exception
 365   //                                             O1 = throwing pc
 366 
 367   address generate_forward_exception() {
 368     StubCodeMark mark(this, "StubRoutines", "forward_exception");
 369     address start = __ pc();
 370 
 371     // Upon entry, O7 has the return address returning into Java
 372     // (interpreted or compiled) code; i.e. the return address
 373     // becomes the throwing pc.
 374 
 375     const Register& handler_reg = Gtemp;
 376 
 377     Address exception_addr(G2_thread, Thread::pending_exception_offset());
 378 
 379 #ifdef ASSERT
 380     // make sure that this code is only executed if there is a pending exception
 381     { Label L;
 382       __ ld_ptr(exception_addr, Gtemp);
 383       __ br_notnull_short(Gtemp, Assembler::pt, L);

 384       __ stop("StubRoutines::forward exception: no pending exception (1)");
 385       __ bind(L);
 386     }
 387 #endif
 388 
 389     // compute exception handler into handler_reg
 390     __ get_thread();
 391     __ ld_ptr(exception_addr, Oexception);
 392     __ verify_oop(Oexception);
 393     __ save_frame(0);             // compensates for compiler weakness
 394     __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
 395     BLOCK_COMMENT("call exception_handler_for_return_address");
 396     __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
 397     __ mov(O0, handler_reg);
 398     __ restore();                 // compensates for compiler weakness
 399 
 400     __ ld_ptr(exception_addr, Oexception);
 401     __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
 402 
 403 #ifdef ASSERT
 404     // make sure exception is set
 405     { Label L;
 406       __ br_notnull_short(Oexception, Assembler::pt, L);

 407       __ stop("StubRoutines::forward exception: no pending exception (2)");
 408       __ bind(L);
 409     }
 410 #endif
 411     // jump to exception handler
 412     __ jmp(handler_reg, 0);
 413     // clear pending exception
 414     __ delayed()->st_ptr(G0, exception_addr);
 415 
 416     return start;
 417   }
 418 
 419 
 420   //------------------------------------------------------------------------------------------------------------------------
 421   // Continuation point for throwing of implicit exceptions that are not handled in
 422   // the current activation. Fabricates an exception oop and initiates normal
 423   // exception dispatching in this frame. Only callee-saved registers are preserved
 424   // (through the normal register window / RegisterMap handling).
 425   // If the compiler needs all registers to be preserved between the fault
 426   // point and the exception handler then it must assume responsibility for that in

 480     if (arg2 != noreg) {
 481       __ mov(arg2, O2);
 482     }
 483     // do the call
 484     BLOCK_COMMENT("call runtime_entry");
 485     __ call(runtime_entry, relocInfo::runtime_call_type);
 486     if (!VerifyThread)
 487       __ delayed()->mov(G2_thread, O0);  // pass thread as first argument
 488     else
 489       __ delayed()->nop();             // (thread already passed)
 490     __ restore_thread(noreg);
 491     __ reset_last_Java_frame();
 492 
 493     // check for pending exceptions. use Gtemp as scratch register.
 494 #ifdef ASSERT
 495     Label L;
 496 
 497     Address exception_addr(G2_thread, Thread::pending_exception_offset());
 498     Register scratch_reg = Gtemp;
 499     __ ld_ptr(exception_addr, scratch_reg);
 500     __ br_notnull_short(scratch_reg, Assembler::pt, L);

 501     __ should_not_reach_here();
 502     __ bind(L);
 503 #endif // ASSERT
 504     BLOCK_COMMENT("call forward_exception_entry");
 505     __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
 506     // we use O7 linkage so that forward_exception_entry has the issuing PC
 507     __ delayed()->restore();
 508 
 509     RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
 510     return stub->entry_point();
 511   }
 512 
 513 #undef __
 514 #define __ _masm->
 515 
 516 
 517   // Generate a routine that sets all the registers so we
 518   // can tell if the stop routine prints them correctly.
 519   address generate_test_stop() {
 520     StubCodeMark mark(this, "StubRoutines", "test_stop");

 592     if (mark_oop_reg == noreg) {
 593       address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
 594       __ set((intptr_t)lock_ptr, lock_ptr_reg);
 595     } else {
 596       assert(scratch_reg != noreg, "just checking");
 597       address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
 598       __ set((intptr_t)lock_ptr, lock_ptr_reg);
 599       __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
 600       __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
 601     }
 602   }
 603 
 604   void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
 605 
 606     get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
 607     __ set(StubRoutines::Sparc::locked, lock_reg);
 608     // Initialize yield counter
 609     __ mov(G0,yield_reg);
 610 
 611     __ BIND(retry);
 612     __ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield);


 613 
 614     // This code can only be called from inside the VM, this
 615     // stub is only invoked from Atomic::add().  We do not
 616     // want to use call_VM, because _last_java_sp and such
 617     // must already be set.
 618     //
 619     // Save the regs and make space for a C call
 620     __ save(SP, -96, SP);
 621     __ save_all_globals_into_locals();
 622     BLOCK_COMMENT("call os::naked_sleep");
 623     __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
 624     __ delayed()->nop();
 625     __ restore_globals_from_locals();
 626     __ restore();
 627     // reset the counter
 628     __ mov(G0,yield_reg);
 629 
 630     __ BIND(dontyield);
 631 
 632     // try to get lock

 652   //      dest:           O1
 653   //
 654   // Results:
 655   //
 656   //     O0: the value previously stored in dest
 657   //
 658   address generate_atomic_xchg() {
 659     StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
 660     address start = __ pc();
 661 
 662     if (UseCASForSwap) {
 663       // Use CAS instead of swap, just in case the MP hardware
 664       // prefers to work with just one kind of synch. instruction.
 665       Label retry;
 666       __ BIND(retry);
 667       __ mov(O0, O3);       // scratch copy of exchange value
 668       __ ld(O1, 0, O2);     // observe the previous value
 669       // try to replace O2 with O3
 670       __ cas_under_lock(O1, O2, O3,
 671       (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
 672       __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);


 673 
 674       __ retl(false);
 675       __ delayed()->mov(O2, O0);  // report previous value to caller
 676 
 677     } else {
 678       if (VM_Version::v9_instructions_work()) {
 679         __ retl(false);
 680         __ delayed()->swap(O1, 0, O0);
 681       } else {
 682         const Register& lock_reg = O2;
 683         const Register& lock_ptr_reg = O3;
 684         const Register& yield_reg = O4;
 685 
 686         Label retry;
 687         Label dontyield;
 688 
 689         generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 690         // got the lock, do the swap
 691         __ swap(O1, 0, O0);
 692

 774   //
 775   // Results:
 776   //
 777   //     O0: the new value stored in dest
 778   //
 779   // Overwrites (v9): O3
 780   // Overwrites (v8): O3,O4,O5
 781   //
 782   address generate_atomic_add() {
 783     StubCodeMark mark(this, "StubRoutines", "atomic_add");
 784     address start = __ pc();
 785     __ BIND(_atomic_add_stub);
 786 
 787     if (VM_Version::v9_instructions_work()) {
 788       Label(retry);
 789       __ BIND(retry);
 790 
 791       __ lduw(O1, 0, O2);
 792       __ add(O0, O2, O3);
 793       __ cas(O1, O2, O3);
 794       __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);


 795       __ retl(false);
 796       __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
 797     } else {
 798       const Register& lock_reg = O2;
 799       const Register& lock_ptr_reg = O3;
 800       const Register& value_reg = O4;
 801       const Register& yield_reg = O5;
 802 
 803       Label(retry);
 804       Label(dontyield);
 805 
 806       generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 807       // got lock, do the increment
 808       __ ld(O1, 0, value_reg);
 809       __ add(O0, value_reg, value_reg);
 810       __ st(value_reg, O1, 0);
 811 
 812       // %%% only for RMO and PSO
 813       __ membar(Assembler::StoreStore);
 814

1342     if (!aligned)
1343 #endif
1344     {
1345       // Copy with shift 16 bytes per iteration if arrays do not have
1346       // the same alignment mod 8, otherwise fall through to the next
1347       // code for aligned copy.
1348       // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1349       // Also jump over aligned copy after the copy with shift completed.
1350 
1351       copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
1352     }
1353 
1354     // Both array are 8 bytes aligned, copy 16 bytes at a time
1355       __ and3(count, 7, G4); // Save count
1356       __ srl(count, 3, count);
1357      generate_disjoint_long_copy_core(aligned);
1358       __ mov(G4, count);     // Restore count
1359 
1360     // copy tailing bytes
1361     __ BIND(L_copy_byte);
1362       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);

1363       __ align(OptoLoopAlignment);
1364     __ BIND(L_copy_byte_loop);
1365       __ ldub(from, offset, O3);
1366       __ deccc(count);
1367       __ stb(O3, to, offset);
1368       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1369       __ delayed()->inc(offset);
1370 
1371     __ BIND(L_exit);
1372       // O3, O4 are used as temp registers
1373       inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1374       __ retl();
1375       __ delayed()->mov(G0, O0); // return 0
1376     return start;
1377   }
1378 
1379   //
1380   //  Generate stub for conjoint byte copy.  If "aligned" is true, the
1381   //  "from" and "to" addresses are assumed to be heapword aligned.
1382   //

1453       // Also jump over aligned copy after the copy with shift completed.
1454 
1455       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1456                                         L_aligned_copy, L_copy_byte);
1457     }
1458     // copy 4 elements (16 bytes) at a time
1459       __ align(OptoLoopAlignment);
1460     __ BIND(L_aligned_copy);
1461       __ dec(end_from, 16);
1462       __ ldx(end_from, 8, O3);
1463       __ ldx(end_from, 0, O4);
1464       __ dec(end_to, 16);
1465       __ deccc(count, 16);
1466       __ stx(O3, end_to, 8);
1467       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1468       __ delayed()->stx(O4, end_to, 0);
1469       __ inc(count, 16);
1470 
1471     // copy 1 element (2 bytes) at a time
1472     __ BIND(L_copy_byte);
1473       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);

1474       __ align(OptoLoopAlignment);
1475     __ BIND(L_copy_byte_loop);
1476       __ dec(end_from);
1477       __ dec(end_to);
1478       __ ldub(end_from, 0, O4);
1479       __ deccc(count);
1480       __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1481       __ delayed()->stb(O4, end_to, 0);
1482 
1483     __ BIND(L_exit);
1484     // O3, O4 are used as temp registers
1485     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1486     __ retl();
1487     __ delayed()->mov(G0, O0); // return 0
1488     return start;
1489   }
1490 
1491   //
1492   //  Generate stub for disjoint short copy.  If "aligned" is true, the
1493   //  "from" and "to" addresses are assumed to be heapword aligned.

1570     if (!aligned)
1571 #endif
1572     {
1573       // Copy with shift 16 bytes per iteration if arrays do not have
1574       // the same alignment mod 8, otherwise fall through to the next
1575       // code for aligned copy.
1576       // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1577       // Also jump over aligned copy after the copy with shift completed.
1578 
1579       copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
1580     }
1581 
1582     // Both array are 8 bytes aligned, copy 16 bytes at a time
1583       __ and3(count, 3, G4); // Save
1584       __ srl(count, 2, count);
1585      generate_disjoint_long_copy_core(aligned);
1586       __ mov(G4, count); // restore
1587 
1588     // copy 1 element at a time
1589     __ BIND(L_copy_2_bytes);
1590       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);

1591       __ align(OptoLoopAlignment);
1592     __ BIND(L_copy_2_bytes_loop);
1593       __ lduh(from, offset, O3);
1594       __ deccc(count);
1595       __ sth(O3, to, offset);
1596       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1597       __ delayed()->inc(offset, 2);
1598 
1599     __ BIND(L_exit);
1600       // O3, O4 are used as temp registers
1601       inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1602       __ retl();
1603       __ delayed()->mov(G0, O0); // return 0
1604     return start;
1605   }
1606 
1607   //
1608   //  Generate stub for disjoint short fill.  If "aligned" is true, the
1609   //  "to" address is assumed to be heapword aligned.
1610   //

1915       // Also jump over aligned copy after the copy with shift completed.
1916 
1917       copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1918                                         L_aligned_copy, L_copy_2_bytes);
1919     }
1920     // copy 4 elements (16 bytes) at a time
1921       __ align(OptoLoopAlignment);
1922     __ BIND(L_aligned_copy);
1923       __ dec(end_from, 16);
1924       __ ldx(end_from, 8, O3);
1925       __ ldx(end_from, 0, O4);
1926       __ dec(end_to, 16);
1927       __ deccc(count, 8);
1928       __ stx(O3, end_to, 8);
1929       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1930       __ delayed()->stx(O4, end_to, 0);
1931       __ inc(count, 8);
1932 
1933     // copy 1 element (2 bytes) at a time
1934     __ BIND(L_copy_2_bytes);
1935       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);

1936     __ BIND(L_copy_2_bytes_loop);
1937       __ dec(end_from, 2);
1938       __ dec(end_to, 2);
1939       __ lduh(end_from, 0, O4);
1940       __ deccc(count);
1941       __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1942       __ delayed()->sth(O4, end_to, 0);
1943 
1944     __ BIND(L_exit);
1945     // O3, O4 are used as temp registers
1946     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1947     __ retl();
1948     __ delayed()->mov(G0, O0); // return 0
1949     return start;
1950   }
1951 
1952   //
1953   //  Generate core code for disjoint int copy (and oop copy on 32-bit).
1954   //  If "aligned" is true, the "from" and "to" addresses are assumed
1955   //  to be heapword aligned.

2028       __ sllx(O4, 32, O4);
2029       __ srlx(G4, 32, G3);
2030       __ bset(G3, O4);
2031       __ stx(O4, to, -8);
2032       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2033       __ delayed()->mov(G4, O3);
2034 
2035       __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2036       __ delayed()->inc(count, 4); // restore 'count'
2037 
2038     __ BIND(L_aligned_copy);
2039     }
2040     // copy 4 elements (16 bytes) at a time
2041       __ and3(count, 1, G4); // Save
2042       __ srl(count, 1, count);
2043      generate_disjoint_long_copy_core(aligned);
2044       __ mov(G4, count);     // Restore
2045 
2046     // copy 1 element at a time
2047     __ BIND(L_copy_4_bytes);
2048       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);

2049     __ BIND(L_copy_4_bytes_loop);
2050       __ ld(from, offset, O3);
2051       __ deccc(count);
2052       __ st(O3, to, offset);
2053       __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
2054       __ delayed()->inc(offset, 4);
2055     __ BIND(L_exit);
2056   }
2057 
2058   //
2059   //  Generate stub for disjoint int copy.  If "aligned" is true, the
2060   //  "from" and "to" addresses are assumed to be heapword aligned.
2061   //
2062   // Arguments for generated stub:
2063   //      from:  O0
2064   //      to:    O1
2065   //      count: O2 treated as signed
2066   //
2067   address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
2068     __ align(CodeEntryAlignment);

2160       __ delayed()->mov(O5, O3);
2161 
2162       __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2163       __ delayed()->inc(count, 4);
2164 
2165     // copy 4 elements (16 bytes) at a time
2166       __ align(OptoLoopAlignment);
2167     __ BIND(L_aligned_copy);
2168       __ dec(end_from, 16);
2169       __ ldx(end_from, 8, O3);
2170       __ ldx(end_from, 0, O4);
2171       __ dec(end_to, 16);
2172       __ deccc(count, 4);
2173       __ stx(O3, end_to, 8);
2174       __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
2175       __ delayed()->stx(O4, end_to, 0);
2176       __ inc(count, 4);
2177 
2178     // copy 1 element (4 bytes) at a time
2179     __ BIND(L_copy_4_bytes);
2180       __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);

2181     __ BIND(L_copy_4_bytes_loop);
2182       __ dec(end_from, 4);
2183       __ dec(end_to, 4);
2184       __ ld(end_from, 0, O4);
2185       __ deccc(count);
2186       __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
2187       __ delayed()->st(O4, end_to, 0);
2188     __ BIND(L_exit);
2189   }
2190 
2191   //
2192   //  Generate stub for conjoint int copy.  If "aligned" is true, the
2193   //  "from" and "to" addresses are assumed to be heapword aligned.
2194   //
2195   // Arguments for generated stub:
2196   //      from:  O0
2197   //      to:    O1
2198   //      count: O2 treated as signed
2199   //
2200   address generate_conjoint_int_copy(bool aligned, address nooverlap_target,

2542                            Register temp,
2543                            Label& L_success) {
2544     assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2545 
2546     BLOCK_COMMENT("type_check:");
2547 
2548     Label L_miss, L_pop_to_miss;
2549 
2550     assert_clean_int(super_check_offset, temp);
2551 
2552     __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
2553                                      &L_success, &L_miss, NULL,
2554                                      super_check_offset);
2555 
2556     BLOCK_COMMENT("type_check_slow_path:");
2557     __ save_frame(0);
2558     __ check_klass_subtype_slow_path(sub_klass->after_save(),
2559                                      super_klass->after_save(),
2560                                      L0, L1, L2, L4,
2561                                      NULL, &L_pop_to_miss);
2562     __ ba(L_success);
2563     __ delayed()->restore();
2564 
2565     __ bind(L_pop_to_miss);
2566     __ restore();
2567 
2568     // Fall through on failure!
2569     __ BIND(L_miss);
2570   }
2571 
2572 
2573   //  Generate stub for checked oop copy.
2574   //
2575   // Arguments for generated stub:
2576   //      from:  O0
2577   //      to:    O1
2578   //      count: O2 treated as signed
2579   //      ckoff: O3 (super_check_offset)
2580   //      ckval: O4 (super_klass)
2581   //      ret:   O0 zero for success; (-1^K) where K is partial transfer count
2582   //

2639     __ delayed()->set(0, O0);           // return 0 on (trivial) success
2640 
2641     // ======== begin loop ========
2642     // (Loop is rotated; its entry is load_element.)
2643     // Loop variables:
2644     //   (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2645     //   (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2646     //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2647     __ align(OptoLoopAlignment);
2648 
2649     __ BIND(store_element);
2650     __ deccc(G1_remain);                // decrement the count
2651     __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2652     __ inc(O5_offset, heapOopSize);     // step to next offset
2653     __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2654     __ delayed()->set(0, O0);           // return -1 on success
2655 
2656     // ======== loop entry is here ========
2657     __ BIND(load_element);
2658     __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
2659     __ br_null_short(G3_oop, Assembler::pt, store_element);

2660 
2661     __ load_klass(G3_oop, G4_klass); // query the object klass
2662 
2663     generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2664                         // branch to this on success:
2665                         store_element);
2666     // ======== end loop ========
2667 
2668     // It was a real error; we must depend on the caller to finish the job.
2669     // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2670     // Emit GC store barriers for the oops we have copied (O2 minus G1),
2671     // and report their number to the caller.
2672     __ BIND(fail);
2673     __ subcc(O2_count, G1_remain, O2_count);
2674     __ brx(Assembler::zero, false, Assembler::pt, done);
2675     __ delayed()->not1(O2_count, O0);   // report (-1^K) to caller
2676 
2677     __ BIND(do_card_marks);
2678     gen_write_ref_array_post_barrier(O1_to, O2_count, O3);   // store check on O1[0..O2]
2679

2861     __ delayed()->tst(dst_pos);
2862     __ br(Assembler::negative, false, Assembler::pn, L_failed);
2863 
2864     //  if (length < 0) return -1;
2865     __ delayed()->tst(length);
2866     __ br(Assembler::negative, false, Assembler::pn, L_failed);
2867 
2868     BLOCK_COMMENT("arraycopy argument klass checks");
2869     //  get src->klass()
2870     if (UseCompressedOops) {
2871       __ delayed()->nop(); // ??? not good
2872       __ load_klass(src, G3_src_klass);
2873     } else {
2874       __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2875     }
2876 
2877 #ifdef ASSERT
2878     //  assert(src->klass() != NULL);
2879     BLOCK_COMMENT("assert klasses not null");
2880     { Label L_a, L_b;
2881       __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL

2882       __ bind(L_a);
2883       __ stop("broken null klass");
2884       __ bind(L_b);
2885       __ load_klass(dst, G4_dst_klass);
2886       __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
2887       __ delayed()->mov(G0, G4_dst_klass);      // scribble the temp
2888       BLOCK_COMMENT("assert done");
2889     }
2890 #endif
2891 
2892     // Load layout helper
2893     //
2894     //  |array_tag|     | header_size | element_type |     |log2_element_size|
2895     // 32        30    24            16              8     2                 0
2896     //
2897     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
2898     //
2899 
2900     int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2901                     Klass::layout_helper_offset_in_bytes();
2902 
2903     // Load 32-bits signed value. Use br() instruction with it to check icc.
2904     __ lduw(G3_src_klass, lh_offset, G5_lh);
2905 
2906     if (UseCompressedOops) {
2907       __ load_klass(dst, G4_dst_klass);
2908     }
2909     // Handle objArrays completely differently...
2910     juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2911     __ set(objArray_lh, O5_temp);
2912     __ cmp(G5_lh,       O5_temp);
2913     __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2914     if (UseCompressedOops) {
2915       __ delayed()->nop();
2916     } else {
2917       __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2918     }
2919 
2920     //  if (src->klass() != dst->klass()) return -1;
2921     __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed);


2922 
2923     //  if (!src->is_Array()) return -1;
2924     __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2925     __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2926 
2927     // At this point, it is known to be a typeArray (array_tag 0x3).
2928 #ifdef ASSERT
2929     __ delayed()->nop();
2930     { Label L;
2931       jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2932       __ set(lh_prim_tag_in_place, O5_temp);
2933       __ cmp(G5_lh,                O5_temp);
2934       __ br(Assembler::greaterEqual, false, Assembler::pt, L);
2935       __ delayed()->nop();
2936       __ stop("must be a primitive array");
2937       __ bind(L);
2938     }
2939 #else
2940     __ delayed();                               // match next insn to prev branch
2941 #endif

2969     BLOCK_COMMENT("scale indexes to element size");
2970     __ sll_ptr(src_pos, G3_elsize, src_pos);
2971     __ sll_ptr(dst_pos, G3_elsize, dst_pos);
2972     __ add(src, src_pos, from);       // src_addr
2973     __ add(dst, dst_pos, to);         // dst_addr
2974 
2975     BLOCK_COMMENT("choose copy loop based on element size");
2976     __ cmp(G3_elsize, 0);
2977     __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
2978     __ delayed()->signx(length, count); // length
2979 
2980     __ cmp(G3_elsize, LogBytesPerShort);
2981     __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
2982     __ delayed()->signx(length, count); // length
2983 
2984     __ cmp(G3_elsize, LogBytesPerInt);
2985     __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
2986     __ delayed()->signx(length, count); // length
2987 #ifdef ASSERT
2988     { Label L;
2989       __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L);


2990       __ stop("must be long copy, but elsize is wrong");
2991       __ bind(L);
2992     }
2993 #endif
2994     __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
2995     __ delayed()->signx(length, count); // length
2996 
2997     // objArrayKlass
2998   __ BIND(L_objArray);
2999     // live at this point:  G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
3000 
3001     Label L_plain_copy, L_checkcast_copy;
3002     //  test array classes for subtyping
3003     __ cmp(G3_src_klass, G4_dst_klass);         // usual case is exact equality
3004     __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
3005     __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
3006 
3007     // Identically typed arrays can be copied without element-wise checks.
3008     arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
3009                            O5_temp, G5_lh, L_failed);

src/cpu/sparc/vm/stubGenerator_sparc.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File