133 // the code in frame::entry_frame_call_wrapper()
134
135 const Argument link = Argument(0, false); // used only for GC
136 const Argument result = Argument(1, false);
137 const Argument result_type = Argument(2, false);
138 const Argument method = Argument(3, false);
139 const Argument entry_point = Argument(4, false);
140 const Argument parameters = Argument(5, false);
141 const Argument parameter_size = Argument(6, false);
142 const Argument thread = Argument(7, false);
143
144 // setup thread register
145 __ ld_ptr(thread.as_address(), G2_thread);
146 __ reinit_heapbase();
147
148 #ifdef ASSERT
149 // make sure we have no pending exceptions
150 { const Register t = G3_scratch;
151 Label L;
152 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
153 __ br_null(t, false, Assembler::pt, L);
154 __ stop("StubRoutines::call_stub: entered with pending exception");
155 __ bind(L);
156 }
157 #endif
158
159 // create activation frame & allocate space for parameters
160 { const Register t = G3_scratch;
161 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words)
162 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words)
163 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words)
164 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes
165 __ neg(t); // negate so it can be used with save
166 __ save(SP, t, SP); // setup new frame
167 }
168
169 // +---------------+ <--- sp + 0
170 // | |
171 // . reg save area .
172 // | |
173 // +---------------+ <--- sp + 0x40
189 // . extra 7 slots .
190 // | |
191 // +---------------+ <--- fp + 0x5c
192 // | param. size |
193 // +---------------+ <--- fp + 0x60
194 // | thread |
195 // +---------------+
196 // | |
197
198 // pass parameters if any
199 BLOCK_COMMENT("pass parameters if any");
200 { const Register src = parameters.as_in().as_register();
201 const Register dst = Lentry_args;
202 const Register tmp = G3_scratch;
203 const Register cnt = G4_scratch;
204
205 // test if any parameters & setup of Lentry_args
206 Label exit;
207 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter
208 __ add( FP, STACK_BIAS, dst );
209 __ tst(cnt);
210 __ br(Assembler::zero, false, Assembler::pn, exit);
211 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args
212
213 // copy parameters if any
214 Label loop;
215 __ BIND(loop);
216 // Store parameter value
217 __ ld_ptr(src, 0, tmp);
218 __ add(src, BytesPerWord, src);
219 __ st_ptr(tmp, dst, 0);
220 __ deccc(cnt);
221 __ br(Assembler::greater, false, Assembler::pt, loop);
222 __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
223
224 // done
225 __ BIND(exit);
226 }
227
228 // setup parameters, method & call Java function
229 #ifdef ASSERT
230 // layout_activation_impl checks it's notion of saved SP against
264 // store result depending on type
265 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
266 // is treated as T_INT)
267 { const Register addr = result .as_in().as_register();
268 const Register type = result_type.as_in().as_register();
269 Label is_long, is_float, is_double, is_object, exit;
270 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object);
271 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float);
272 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double);
273 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long);
274 __ delayed()->nop();
275
276 // store int result
277 __ st(O0, addr, G0);
278
279 __ BIND(exit);
280 __ ret();
281 __ delayed()->restore();
282
283 __ BIND(is_object);
284 __ ba(exit, false);
285 __ delayed()->st_ptr(O0, addr, G0);
286
287 __ BIND(is_float);
288 __ ba(exit, false);
289 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
290
291 __ BIND(is_double);
292 __ ba(exit, false);
293 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
294
295 __ BIND(is_long);
296 #ifdef _LP64
297 __ ba(exit, false);
298 __ delayed()->st_long(O0, addr, G0); // store entire long
299 #else
300 #if defined(COMPILER2)
301 // All return values are where we want them, except for Longs. C2 returns
302 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
303 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
304 // build we simply always use G1.
305 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
306 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
307 // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
308
309 __ ba(exit, false);
310 __ delayed()->stx(G1, addr, G0); // store entire long
311 #else
312 __ st(O1, addr, BytesPerInt);
313 __ ba(exit, false);
314 __ delayed()->st(O0, addr, G0);
315 #endif /* COMPILER2 */
316 #endif /* _LP64 */
317 }
318 return start;
319 }
320
321
322 //----------------------------------------------------------------------------------------------------
323 // Return point for a Java call if there's an exception thrown in Java code.
324 // The exception is caught and transformed into a pending exception stored in
325 // JavaThread that can be tested from within the VM.
326 //
327 // Oexception: exception oop
328
329 address generate_catch_exception() {
330 StubCodeMark mark(this, "StubRoutines", "catch_exception");
331
332 address start = __ pc();
333 // verify that thread corresponds
364 //
365 // Contract with Java-level exception handler: O0 = exception
366 // O1 = throwing pc
367
368 address generate_forward_exception() {
369 StubCodeMark mark(this, "StubRoutines", "forward_exception");
370 address start = __ pc();
371
372 // Upon entry, O7 has the return address returning into Java
373 // (interpreted or compiled) code; i.e. the return address
374 // becomes the throwing pc.
375
376 const Register& handler_reg = Gtemp;
377
378 Address exception_addr(G2_thread, Thread::pending_exception_offset());
379
380 #ifdef ASSERT
381 // make sure that this code is only executed if there is a pending exception
382 { Label L;
383 __ ld_ptr(exception_addr, Gtemp);
384 __ br_notnull(Gtemp, false, Assembler::pt, L);
385 __ stop("StubRoutines::forward exception: no pending exception (1)");
386 __ bind(L);
387 }
388 #endif
389
390 // compute exception handler into handler_reg
391 __ get_thread();
392 __ ld_ptr(exception_addr, Oexception);
393 __ verify_oop(Oexception);
394 __ save_frame(0); // compensates for compiler weakness
395 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
396 BLOCK_COMMENT("call exception_handler_for_return_address");
397 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
398 __ mov(O0, handler_reg);
399 __ restore(); // compensates for compiler weakness
400
401 __ ld_ptr(exception_addr, Oexception);
402 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
403
404 #ifdef ASSERT
405 // make sure exception is set
406 { Label L;
407 __ br_notnull(Oexception, false, Assembler::pt, L);
408 __ stop("StubRoutines::forward exception: no pending exception (2)");
409 __ bind(L);
410 }
411 #endif
412 // jump to exception handler
413 __ jmp(handler_reg, 0);
414 // clear pending exception
415 __ delayed()->st_ptr(G0, exception_addr);
416
417 return start;
418 }
419
420
421 //------------------------------------------------------------------------------------------------------------------------
422 // Continuation point for throwing of implicit exceptions that are not handled in
423 // the current activation. Fabricates an exception oop and initiates normal
424 // exception dispatching in this frame. Only callee-saved registers are preserved
425 // (through the normal register window / RegisterMap handling).
426 // If the compiler needs all registers to be preserved between the fault
427 // point and the exception handler then it must assume responsibility for that in
481 if (arg2 != noreg) {
482 __ mov(arg2, O2);
483 }
484 // do the call
485 BLOCK_COMMENT("call runtime_entry");
486 __ call(runtime_entry, relocInfo::runtime_call_type);
487 if (!VerifyThread)
488 __ delayed()->mov(G2_thread, O0); // pass thread as first argument
489 else
490 __ delayed()->nop(); // (thread already passed)
491 __ restore_thread(noreg);
492 __ reset_last_Java_frame();
493
494 // check for pending exceptions. use Gtemp as scratch register.
495 #ifdef ASSERT
496 Label L;
497
498 Address exception_addr(G2_thread, Thread::pending_exception_offset());
499 Register scratch_reg = Gtemp;
500 __ ld_ptr(exception_addr, scratch_reg);
501 __ br_notnull(scratch_reg, false, Assembler::pt, L);
502 __ should_not_reach_here();
503 __ bind(L);
504 #endif // ASSERT
505 BLOCK_COMMENT("call forward_exception_entry");
506 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
507 // we use O7 linkage so that forward_exception_entry has the issuing PC
508 __ delayed()->restore();
509
510 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
511 return stub->entry_point();
512 }
513
514 #undef __
515 #define __ _masm->
516
517
518 // Generate a routine that sets all the registers so we
519 // can tell if the stop routine prints them correctly.
520 address generate_test_stop() {
521 StubCodeMark mark(this, "StubRoutines", "test_stop");
593 if (mark_oop_reg == noreg) {
594 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
595 __ set((intptr_t)lock_ptr, lock_ptr_reg);
596 } else {
597 assert(scratch_reg != noreg, "just checking");
598 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
599 __ set((intptr_t)lock_ptr, lock_ptr_reg);
600 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
601 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
602 }
603 }
604
605 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
606
607 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
608 __ set(StubRoutines::Sparc::locked, lock_reg);
609 // Initialize yield counter
610 __ mov(G0,yield_reg);
611
612 __ BIND(retry);
613 __ cmp_and_br(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, false, Assembler::pt, dontyield);
614
615 // This code can only be called from inside the VM, this
616 // stub is only invoked from Atomic::add(). We do not
617 // want to use call_VM, because _last_java_sp and such
618 // must already be set.
619 //
620 // Save the regs and make space for a C call
621 __ save(SP, -96, SP);
622 __ save_all_globals_into_locals();
623 BLOCK_COMMENT("call os::naked_sleep");
624 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
625 __ delayed()->nop();
626 __ restore_globals_from_locals();
627 __ restore();
628 // reset the counter
629 __ mov(G0,yield_reg);
630
631 __ BIND(dontyield);
632
633 // try to get lock
653 // dest: O1
654 //
655 // Results:
656 //
657 // O0: the value previously stored in dest
658 //
659 address generate_atomic_xchg() {
660 StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
661 address start = __ pc();
662
663 if (UseCASForSwap) {
664 // Use CAS instead of swap, just in case the MP hardware
665 // prefers to work with just one kind of synch. instruction.
666 Label retry;
667 __ BIND(retry);
668 __ mov(O0, O3); // scratch copy of exchange value
669 __ ld(O1, 0, O2); // observe the previous value
670 // try to replace O2 with O3
671 __ cas_under_lock(O1, O2, O3,
672 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
673 __ cmp_and_br(O2, O3, Assembler::notEqual, false, Assembler::pn, retry);
674
675 __ retl(false);
676 __ delayed()->mov(O2, O0); // report previous value to caller
677
678 } else {
679 if (VM_Version::v9_instructions_work()) {
680 __ retl(false);
681 __ delayed()->swap(O1, 0, O0);
682 } else {
683 const Register& lock_reg = O2;
684 const Register& lock_ptr_reg = O3;
685 const Register& yield_reg = O4;
686
687 Label retry;
688 Label dontyield;
689
690 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
691 // got the lock, do the swap
692 __ swap(O1, 0, O0);
693
775 //
776 // Results:
777 //
778 // O0: the new value stored in dest
779 //
780 // Overwrites (v9): O3
781 // Overwrites (v8): O3,O4,O5
782 //
783 address generate_atomic_add() {
784 StubCodeMark mark(this, "StubRoutines", "atomic_add");
785 address start = __ pc();
786 __ BIND(_atomic_add_stub);
787
788 if (VM_Version::v9_instructions_work()) {
789 Label(retry);
790 __ BIND(retry);
791
792 __ lduw(O1, 0, O2);
793 __ add(O0, O2, O3);
794 __ cas(O1, O2, O3);
795 __ cmp_and_br(O2, O3, Assembler::notEqual, false, Assembler::pn, retry);
796 __ retl(false);
797 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
798 } else {
799 const Register& lock_reg = O2;
800 const Register& lock_ptr_reg = O3;
801 const Register& value_reg = O4;
802 const Register& yield_reg = O5;
803
804 Label(retry);
805 Label(dontyield);
806
807 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
808 // got lock, do the increment
809 __ ld(O1, 0, value_reg);
810 __ add(O0, value_reg, value_reg);
811 __ st(value_reg, O1, 0);
812
813 // %%% only for RMO and PSO
814 __ membar(Assembler::StoreStore);
815
1343 if (!aligned)
1344 #endif
1345 {
1346 // Copy with shift 16 bytes per iteration if arrays do not have
1347 // the same alignment mod 8, otherwise fall through to the next
1348 // code for aligned copy.
1349 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1350 // Also jump over aligned copy after the copy with shift completed.
1351
1352 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
1353 }
1354
1355 // Both array are 8 bytes aligned, copy 16 bytes at a time
1356 __ and3(count, 7, G4); // Save count
1357 __ srl(count, 3, count);
1358 generate_disjoint_long_copy_core(aligned);
1359 __ mov(G4, count); // Restore count
1360
1361 // copy tailing bytes
1362 __ BIND(L_copy_byte);
1363 __ br_zero(count, L_exit);
1364 __ align(OptoLoopAlignment);
1365 __ BIND(L_copy_byte_loop);
1366 __ ldub(from, offset, O3);
1367 __ deccc(count);
1368 __ stb(O3, to, offset);
1369 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1370 __ delayed()->inc(offset);
1371
1372 __ BIND(L_exit);
1373 // O3, O4 are used as temp registers
1374 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1375 __ retl();
1376 __ delayed()->mov(G0, O0); // return 0
1377 return start;
1378 }
1379
1380 //
1381 // Generate stub for conjoint byte copy. If "aligned" is true, the
1382 // "from" and "to" addresses are assumed to be heapword aligned.
1383 //
1454 // Also jump over aligned copy after the copy with shift completed.
1455
1456 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1457 L_aligned_copy, L_copy_byte);
1458 }
1459 // copy 4 elements (16 bytes) at a time
1460 __ align(OptoLoopAlignment);
1461 __ BIND(L_aligned_copy);
1462 __ dec(end_from, 16);
1463 __ ldx(end_from, 8, O3);
1464 __ ldx(end_from, 0, O4);
1465 __ dec(end_to, 16);
1466 __ deccc(count, 16);
1467 __ stx(O3, end_to, 8);
1468 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1469 __ delayed()->stx(O4, end_to, 0);
1470 __ inc(count, 16);
1471
1472 // copy 1 element (2 bytes) at a time
1473 __ BIND(L_copy_byte);
1474 __ br_zero(count, L_exit);
1475 __ align(OptoLoopAlignment);
1476 __ BIND(L_copy_byte_loop);
1477 __ dec(end_from);
1478 __ dec(end_to);
1479 __ ldub(end_from, 0, O4);
1480 __ deccc(count);
1481 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1482 __ delayed()->stb(O4, end_to, 0);
1483
1484 __ BIND(L_exit);
1485 // O3, O4 are used as temp registers
1486 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1487 __ retl();
1488 __ delayed()->mov(G0, O0); // return 0
1489 return start;
1490 }
1491
1492 //
1493 // Generate stub for disjoint short copy. If "aligned" is true, the
1494 // "from" and "to" addresses are assumed to be heapword aligned.
1571 if (!aligned)
1572 #endif
1573 {
1574 // Copy with shift 16 bytes per iteration if arrays do not have
1575 // the same alignment mod 8, otherwise fall through to the next
1576 // code for aligned copy.
1577 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1578 // Also jump over aligned copy after the copy with shift completed.
1579
1580 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
1581 }
1582
1583 // Both array are 8 bytes aligned, copy 16 bytes at a time
1584 __ and3(count, 3, G4); // Save
1585 __ srl(count, 2, count);
1586 generate_disjoint_long_copy_core(aligned);
1587 __ mov(G4, count); // restore
1588
1589 // copy 1 element at a time
1590 __ BIND(L_copy_2_bytes);
1591 __ br_zero(count, L_exit);
1592 __ align(OptoLoopAlignment);
1593 __ BIND(L_copy_2_bytes_loop);
1594 __ lduh(from, offset, O3);
1595 __ deccc(count);
1596 __ sth(O3, to, offset);
1597 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1598 __ delayed()->inc(offset, 2);
1599
1600 __ BIND(L_exit);
1601 // O3, O4 are used as temp registers
1602 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1603 __ retl();
1604 __ delayed()->mov(G0, O0); // return 0
1605 return start;
1606 }
1607
1608 //
1609 // Generate stub for disjoint short fill. If "aligned" is true, the
1610 // "to" address is assumed to be heapword aligned.
1611 //
1916 // Also jump over aligned copy after the copy with shift completed.
1917
1918 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1919 L_aligned_copy, L_copy_2_bytes);
1920 }
1921 // copy 4 elements (16 bytes) at a time
1922 __ align(OptoLoopAlignment);
1923 __ BIND(L_aligned_copy);
1924 __ dec(end_from, 16);
1925 __ ldx(end_from, 8, O3);
1926 __ ldx(end_from, 0, O4);
1927 __ dec(end_to, 16);
1928 __ deccc(count, 8);
1929 __ stx(O3, end_to, 8);
1930 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1931 __ delayed()->stx(O4, end_to, 0);
1932 __ inc(count, 8);
1933
1934 // copy 1 element (2 bytes) at a time
1935 __ BIND(L_copy_2_bytes);
1936 __ br_zero(count, L_exit);
1937 __ BIND(L_copy_2_bytes_loop);
1938 __ dec(end_from, 2);
1939 __ dec(end_to, 2);
1940 __ lduh(end_from, 0, O4);
1941 __ deccc(count);
1942 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1943 __ delayed()->sth(O4, end_to, 0);
1944
1945 __ BIND(L_exit);
1946 // O3, O4 are used as temp registers
1947 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1948 __ retl();
1949 __ delayed()->mov(G0, O0); // return 0
1950 return start;
1951 }
1952
1953 //
1954 // Generate core code for disjoint int copy (and oop copy on 32-bit).
1955 // If "aligned" is true, the "from" and "to" addresses are assumed
1956 // to be heapword aligned.
2029 __ sllx(O4, 32, O4);
2030 __ srlx(G4, 32, G3);
2031 __ bset(G3, O4);
2032 __ stx(O4, to, -8);
2033 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2034 __ delayed()->mov(G4, O3);
2035
2036 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2037 __ delayed()->inc(count, 4); // restore 'count'
2038
2039 __ BIND(L_aligned_copy);
2040 }
2041 // copy 4 elements (16 bytes) at a time
2042 __ and3(count, 1, G4); // Save
2043 __ srl(count, 1, count);
2044 generate_disjoint_long_copy_core(aligned);
2045 __ mov(G4, count); // Restore
2046
2047 // copy 1 element at a time
2048 __ BIND(L_copy_4_bytes);
2049 __ br_zero(count, L_exit);
2050 __ BIND(L_copy_4_bytes_loop);
2051 __ ld(from, offset, O3);
2052 __ deccc(count);
2053 __ st(O3, to, offset);
2054 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
2055 __ delayed()->inc(offset, 4);
2056 __ BIND(L_exit);
2057 }
2058
2059 //
2060 // Generate stub for disjoint int copy. If "aligned" is true, the
2061 // "from" and "to" addresses are assumed to be heapword aligned.
2062 //
2063 // Arguments for generated stub:
2064 // from: O0
2065 // to: O1
2066 // count: O2 treated as signed
2067 //
2068 address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
2069 __ align(CodeEntryAlignment);
2161 __ delayed()->mov(O5, O3);
2162
2163 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2164 __ delayed()->inc(count, 4);
2165
2166 // copy 4 elements (16 bytes) at a time
2167 __ align(OptoLoopAlignment);
2168 __ BIND(L_aligned_copy);
2169 __ dec(end_from, 16);
2170 __ ldx(end_from, 8, O3);
2171 __ ldx(end_from, 0, O4);
2172 __ dec(end_to, 16);
2173 __ deccc(count, 4);
2174 __ stx(O3, end_to, 8);
2175 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
2176 __ delayed()->stx(O4, end_to, 0);
2177 __ inc(count, 4);
2178
2179 // copy 1 element (4 bytes) at a time
2180 __ BIND(L_copy_4_bytes);
2181 __ br_zero(count, L_exit);
2182 __ BIND(L_copy_4_bytes_loop);
2183 __ dec(end_from, 4);
2184 __ dec(end_to, 4);
2185 __ ld(end_from, 0, O4);
2186 __ deccc(count);
2187 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
2188 __ delayed()->st(O4, end_to, 0);
2189 __ BIND(L_exit);
2190 }
2191
2192 //
2193 // Generate stub for conjoint int copy. If "aligned" is true, the
2194 // "from" and "to" addresses are assumed to be heapword aligned.
2195 //
2196 // Arguments for generated stub:
2197 // from: O0
2198 // to: O1
2199 // count: O2 treated as signed
2200 //
2201 address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
2543 Register temp,
2544 Label& L_success) {
2545 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2546
2547 BLOCK_COMMENT("type_check:");
2548
2549 Label L_miss, L_pop_to_miss;
2550
2551 assert_clean_int(super_check_offset, temp);
2552
2553 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
2554 &L_success, &L_miss, NULL,
2555 super_check_offset);
2556
2557 BLOCK_COMMENT("type_check_slow_path:");
2558 __ save_frame(0);
2559 __ check_klass_subtype_slow_path(sub_klass->after_save(),
2560 super_klass->after_save(),
2561 L0, L1, L2, L4,
2562 NULL, &L_pop_to_miss);
2563 __ ba(L_success, false);
2564 __ delayed()->restore();
2565
2566 __ bind(L_pop_to_miss);
2567 __ restore();
2568
2569 // Fall through on failure!
2570 __ BIND(L_miss);
2571 }
2572
2573
2574 // Generate stub for checked oop copy.
2575 //
2576 // Arguments for generated stub:
2577 // from: O0
2578 // to: O1
2579 // count: O2 treated as signed
2580 // ckoff: O3 (super_check_offset)
2581 // ckval: O4 (super_klass)
2582 // ret: O0 zero for success; (-1^K) where K is partial transfer count
2583 //
2640 __ delayed()->set(0, O0); // return 0 on (trivial) success
2641
2642 // ======== begin loop ========
2643 // (Loop is rotated; its entry is load_element.)
2644 // Loop variables:
2645 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2646 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2647 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2648 __ align(OptoLoopAlignment);
2649
2650 __ BIND(store_element);
2651 __ deccc(G1_remain); // decrement the count
2652 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2653 __ inc(O5_offset, heapOopSize); // step to next offset
2654 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2655 __ delayed()->set(0, O0); // return -1 on success
2656
2657 // ======== loop entry is here ========
2658 __ BIND(load_element);
2659 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
2660 __ br_null(G3_oop, true, Assembler::pt, store_element);
2661
2662 __ load_klass(G3_oop, G4_klass); // query the object klass
2663
2664 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2665 // branch to this on success:
2666 store_element);
2667 // ======== end loop ========
2668
2669 // It was a real error; we must depend on the caller to finish the job.
2670 // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2671 // Emit GC store barriers for the oops we have copied (O2 minus G1),
2672 // and report their number to the caller.
2673 __ BIND(fail);
2674 __ subcc(O2_count, G1_remain, O2_count);
2675 __ brx(Assembler::zero, false, Assembler::pt, done);
2676 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller
2677
2678 __ BIND(do_card_marks);
2679 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
2680
2862 __ delayed()->tst(dst_pos);
2863 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2864
2865 // if (length < 0) return -1;
2866 __ delayed()->tst(length);
2867 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2868
2869 BLOCK_COMMENT("arraycopy argument klass checks");
2870 // get src->klass()
2871 if (UseCompressedOops) {
2872 __ delayed()->nop(); // ??? not good
2873 __ load_klass(src, G3_src_klass);
2874 } else {
2875 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2876 }
2877
2878 #ifdef ASSERT
2879 // assert(src->klass() != NULL);
2880 BLOCK_COMMENT("assert klasses not null");
2881 { Label L_a, L_b;
2882 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
2883 __ bind(L_a);
2884 __ stop("broken null klass");
2885 __ bind(L_b);
2886 __ load_klass(dst, G4_dst_klass);
2887 __ br_null(G4_dst_klass, false, Assembler::pn, L_a, false); // this would be broken also
2888 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp
2889 BLOCK_COMMENT("assert done");
2890 }
2891 #endif
2892
2893 // Load layout helper
2894 //
2895 // |array_tag| | header_size | element_type | |log2_element_size|
2896 // 32 30 24 16 8 2 0
2897 //
2898 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
2899 //
2900
2901 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2902 Klass::layout_helper_offset_in_bytes();
2903
2904 // Load 32-bits signed value. Use br() instruction with it to check icc.
2905 __ lduw(G3_src_klass, lh_offset, G5_lh);
2906
2907 if (UseCompressedOops) {
2908 __ load_klass(dst, G4_dst_klass);
2909 }
2910 // Handle objArrays completely differently...
2911 juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2912 __ set(objArray_lh, O5_temp);
2913 __ cmp(G5_lh, O5_temp);
2914 __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2915 if (UseCompressedOops) {
2916 __ delayed()->nop();
2917 } else {
2918 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2919 }
2920
2921 // if (src->klass() != dst->klass()) return -1;
2922 __ cmp_and_brx(G3_src_klass, G4_dst_klass, Assembler::notEqual, false, Assembler::pn, L_failed);
2923
2924 // if (!src->is_Array()) return -1;
2925 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2926 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2927
2928 // At this point, it is known to be a typeArray (array_tag 0x3).
2929 #ifdef ASSERT
2930 __ delayed()->nop();
2931 { Label L;
2932 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2933 __ set(lh_prim_tag_in_place, O5_temp);
2934 __ cmp(G5_lh, O5_temp);
2935 __ br(Assembler::greaterEqual, false, Assembler::pt, L);
2936 __ delayed()->nop();
2937 __ stop("must be a primitive array");
2938 __ bind(L);
2939 }
2940 #else
2941 __ delayed(); // match next insn to prev branch
2942 #endif
2970 BLOCK_COMMENT("scale indexes to element size");
2971 __ sll_ptr(src_pos, G3_elsize, src_pos);
2972 __ sll_ptr(dst_pos, G3_elsize, dst_pos);
2973 __ add(src, src_pos, from); // src_addr
2974 __ add(dst, dst_pos, to); // dst_addr
2975
2976 BLOCK_COMMENT("choose copy loop based on element size");
2977 __ cmp(G3_elsize, 0);
2978 __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
2979 __ delayed()->signx(length, count); // length
2980
2981 __ cmp(G3_elsize, LogBytesPerShort);
2982 __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
2983 __ delayed()->signx(length, count); // length
2984
2985 __ cmp(G3_elsize, LogBytesPerInt);
2986 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
2987 __ delayed()->signx(length, count); // length
2988 #ifdef ASSERT
2989 { Label L;
2990 __ cmp_and_br(G3_elsize, LogBytesPerLong, Assembler::equal, false, Assembler::pt, L);
2991 __ stop("must be long copy, but elsize is wrong");
2992 __ bind(L);
2993 }
2994 #endif
2995 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
2996 __ delayed()->signx(length, count); // length
2997
2998 // objArrayKlass
2999 __ BIND(L_objArray);
3000 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
3001
3002 Label L_plain_copy, L_checkcast_copy;
3003 // test array classes for subtyping
3004 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality
3005 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
3006 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
3007
3008 // Identically typed arrays can be copied without element-wise checks.
3009 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
3010 O5_temp, G5_lh, L_failed);
|
133 // the code in frame::entry_frame_call_wrapper()
134
135 const Argument link = Argument(0, false); // used only for GC
136 const Argument result = Argument(1, false);
137 const Argument result_type = Argument(2, false);
138 const Argument method = Argument(3, false);
139 const Argument entry_point = Argument(4, false);
140 const Argument parameters = Argument(5, false);
141 const Argument parameter_size = Argument(6, false);
142 const Argument thread = Argument(7, false);
143
144 // setup thread register
145 __ ld_ptr(thread.as_address(), G2_thread);
146 __ reinit_heapbase();
147
148 #ifdef ASSERT
149 // make sure we have no pending exceptions
150 { const Register t = G3_scratch;
151 Label L;
152 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
153 __ br_null_short(t, Assembler::pt, L);
154 __ stop("StubRoutines::call_stub: entered with pending exception");
155 __ bind(L);
156 }
157 #endif
158
159 // create activation frame & allocate space for parameters
160 { const Register t = G3_scratch;
161 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words)
162 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words)
163 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words)
164 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes
165 __ neg(t); // negate so it can be used with save
166 __ save(SP, t, SP); // setup new frame
167 }
168
169 // +---------------+ <--- sp + 0
170 // | |
171 // . reg save area .
172 // | |
173 // +---------------+ <--- sp + 0x40
189 // . extra 7 slots .
190 // | |
191 // +---------------+ <--- fp + 0x5c
192 // | param. size |
193 // +---------------+ <--- fp + 0x60
194 // | thread |
195 // +---------------+
196 // | |
197
198 // pass parameters if any
199 BLOCK_COMMENT("pass parameters if any");
200 { const Register src = parameters.as_in().as_register();
201 const Register dst = Lentry_args;
202 const Register tmp = G3_scratch;
203 const Register cnt = G4_scratch;
204
205 // test if any parameters & setup of Lentry_args
206 Label exit;
207 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter
208 __ add( FP, STACK_BIAS, dst );
209 __ cmp_zero_and_br(Assembler::zero, cnt, exit);
210 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args
211
212 // copy parameters if any
213 Label loop;
214 __ BIND(loop);
215 // Store parameter value
216 __ ld_ptr(src, 0, tmp);
217 __ add(src, BytesPerWord, src);
218 __ st_ptr(tmp, dst, 0);
219 __ deccc(cnt);
220 __ br(Assembler::greater, false, Assembler::pt, loop);
221 __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
222
223 // done
224 __ BIND(exit);
225 }
226
227 // setup parameters, method & call Java function
228 #ifdef ASSERT
229 // layout_activation_impl checks it's notion of saved SP against
263 // store result depending on type
264 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
265 // is treated as T_INT)
266 { const Register addr = result .as_in().as_register();
267 const Register type = result_type.as_in().as_register();
268 Label is_long, is_float, is_double, is_object, exit;
269 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object);
270 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float);
271 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double);
272 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long);
273 __ delayed()->nop();
274
275 // store int result
276 __ st(O0, addr, G0);
277
278 __ BIND(exit);
279 __ ret();
280 __ delayed()->restore();
281
282 __ BIND(is_object);
283 __ ba(exit);
284 __ delayed()->st_ptr(O0, addr, G0);
285
286 __ BIND(is_float);
287 __ ba(exit);
288 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
289
290 __ BIND(is_double);
291 __ ba(exit);
292 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
293
294 __ BIND(is_long);
295 #ifdef _LP64
296 __ ba(exit);
297 __ delayed()->st_long(O0, addr, G0); // store entire long
298 #else
299 #if defined(COMPILER2)
300 // All return values are where we want them, except for Longs. C2 returns
301 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
302 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
303 // build we simply always use G1.
304 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
305 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
306 // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
307
308 __ ba(exit);
309 __ delayed()->stx(G1, addr, G0); // store entire long
310 #else
311 __ st(O1, addr, BytesPerInt);
312 __ ba(exit);
313 __ delayed()->st(O0, addr, G0);
314 #endif /* COMPILER2 */
315 #endif /* _LP64 */
316 }
317 return start;
318 }
319
320
321 //----------------------------------------------------------------------------------------------------
322 // Return point for a Java call if there's an exception thrown in Java code.
323 // The exception is caught and transformed into a pending exception stored in
324 // JavaThread that can be tested from within the VM.
325 //
326 // Oexception: exception oop
327
328 address generate_catch_exception() {
329 StubCodeMark mark(this, "StubRoutines", "catch_exception");
330
331 address start = __ pc();
332 // verify that thread corresponds
363 //
364 // Contract with Java-level exception handler: O0 = exception
365 // O1 = throwing pc
366
367 address generate_forward_exception() {
368 StubCodeMark mark(this, "StubRoutines", "forward_exception");
369 address start = __ pc();
370
371 // Upon entry, O7 has the return address returning into Java
372 // (interpreted or compiled) code; i.e. the return address
373 // becomes the throwing pc.
374
375 const Register& handler_reg = Gtemp;
376
377 Address exception_addr(G2_thread, Thread::pending_exception_offset());
378
379 #ifdef ASSERT
380 // make sure that this code is only executed if there is a pending exception
381 { Label L;
382 __ ld_ptr(exception_addr, Gtemp);
383 __ br_notnull_short(Gtemp, Assembler::pt, L);
384 __ stop("StubRoutines::forward exception: no pending exception (1)");
385 __ bind(L);
386 }
387 #endif
388
389 // compute exception handler into handler_reg
390 __ get_thread();
391 __ ld_ptr(exception_addr, Oexception);
392 __ verify_oop(Oexception);
393 __ save_frame(0); // compensates for compiler weakness
394 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
395 BLOCK_COMMENT("call exception_handler_for_return_address");
396 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
397 __ mov(O0, handler_reg);
398 __ restore(); // compensates for compiler weakness
399
400 __ ld_ptr(exception_addr, Oexception);
401 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
402
403 #ifdef ASSERT
404 // make sure exception is set
405 { Label L;
406 __ br_notnull_short(Oexception, Assembler::pt, L);
407 __ stop("StubRoutines::forward exception: no pending exception (2)");
408 __ bind(L);
409 }
410 #endif
411 // jump to exception handler
412 __ jmp(handler_reg, 0);
413 // clear pending exception
414 __ delayed()->st_ptr(G0, exception_addr);
415
416 return start;
417 }
418
419
420 //------------------------------------------------------------------------------------------------------------------------
421 // Continuation point for throwing of implicit exceptions that are not handled in
422 // the current activation. Fabricates an exception oop and initiates normal
423 // exception dispatching in this frame. Only callee-saved registers are preserved
424 // (through the normal register window / RegisterMap handling).
425 // If the compiler needs all registers to be preserved between the fault
426 // point and the exception handler then it must assume responsibility for that in
480 if (arg2 != noreg) {
481 __ mov(arg2, O2);
482 }
483 // do the call
484 BLOCK_COMMENT("call runtime_entry");
485 __ call(runtime_entry, relocInfo::runtime_call_type);
486 if (!VerifyThread)
487 __ delayed()->mov(G2_thread, O0); // pass thread as first argument
488 else
489 __ delayed()->nop(); // (thread already passed)
490 __ restore_thread(noreg);
491 __ reset_last_Java_frame();
492
493 // check for pending exceptions. use Gtemp as scratch register.
494 #ifdef ASSERT
495 Label L;
496
497 Address exception_addr(G2_thread, Thread::pending_exception_offset());
498 Register scratch_reg = Gtemp;
499 __ ld_ptr(exception_addr, scratch_reg);
500 __ br_notnull_short(scratch_reg, Assembler::pt, L);
501 __ should_not_reach_here();
502 __ bind(L);
503 #endif // ASSERT
504 BLOCK_COMMENT("call forward_exception_entry");
505 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
506 // we use O7 linkage so that forward_exception_entry has the issuing PC
507 __ delayed()->restore();
508
509 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
510 return stub->entry_point();
511 }
512
513 #undef __
514 #define __ _masm->
515
516
517 // Generate a routine that sets all the registers so we
518 // can tell if the stop routine prints them correctly.
519 address generate_test_stop() {
520 StubCodeMark mark(this, "StubRoutines", "test_stop");
592 if (mark_oop_reg == noreg) {
593 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
594 __ set((intptr_t)lock_ptr, lock_ptr_reg);
595 } else {
596 assert(scratch_reg != noreg, "just checking");
597 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
598 __ set((intptr_t)lock_ptr, lock_ptr_reg);
599 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
600 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
601 }
602 }
603
604 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
605
606 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
607 __ set(StubRoutines::Sparc::locked, lock_reg);
608 // Initialize yield counter
609 __ mov(G0,yield_reg);
610
611 __ BIND(retry);
612 __ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield);
613
614 // This code can only be called from inside the VM, this
615 // stub is only invoked from Atomic::add(). We do not
616 // want to use call_VM, because _last_java_sp and such
617 // must already be set.
618 //
619 // Save the regs and make space for a C call
620 __ save(SP, -96, SP);
621 __ save_all_globals_into_locals();
622 BLOCK_COMMENT("call os::naked_sleep");
623 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
624 __ delayed()->nop();
625 __ restore_globals_from_locals();
626 __ restore();
627 // reset the counter
628 __ mov(G0,yield_reg);
629
630 __ BIND(dontyield);
631
632 // try to get lock
652 // dest: O1
653 //
654 // Results:
655 //
656 // O0: the value previously stored in dest
657 //
658 address generate_atomic_xchg() {
659 StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
660 address start = __ pc();
661
662 if (UseCASForSwap) {
663 // Use CAS instead of swap, just in case the MP hardware
664 // prefers to work with just one kind of synch. instruction.
665 Label retry;
666 __ BIND(retry);
667 __ mov(O0, O3); // scratch copy of exchange value
668 __ ld(O1, 0, O2); // observe the previous value
669 // try to replace O2 with O3
670 __ cas_under_lock(O1, O2, O3,
671 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
672 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);
673
674 __ retl(false);
675 __ delayed()->mov(O2, O0); // report previous value to caller
676
677 } else {
678 if (VM_Version::v9_instructions_work()) {
679 __ retl(false);
680 __ delayed()->swap(O1, 0, O0);
681 } else {
682 const Register& lock_reg = O2;
683 const Register& lock_ptr_reg = O3;
684 const Register& yield_reg = O4;
685
686 Label retry;
687 Label dontyield;
688
689 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
690 // got the lock, do the swap
691 __ swap(O1, 0, O0);
692
774 //
775 // Results:
776 //
777 // O0: the new value stored in dest
778 //
779 // Overwrites (v9): O3
780 // Overwrites (v8): O3,O4,O5
781 //
782 address generate_atomic_add() {
783 StubCodeMark mark(this, "StubRoutines", "atomic_add");
784 address start = __ pc();
785 __ BIND(_atomic_add_stub);
786
787 if (VM_Version::v9_instructions_work()) {
788 Label(retry);
789 __ BIND(retry);
790
791 __ lduw(O1, 0, O2);
792 __ add(O0, O2, O3);
793 __ cas(O1, O2, O3);
794 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);
795 __ retl(false);
796 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
797 } else {
798 const Register& lock_reg = O2;
799 const Register& lock_ptr_reg = O3;
800 const Register& value_reg = O4;
801 const Register& yield_reg = O5;
802
803 Label(retry);
804 Label(dontyield);
805
806 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
807 // got lock, do the increment
808 __ ld(O1, 0, value_reg);
809 __ add(O0, value_reg, value_reg);
810 __ st(value_reg, O1, 0);
811
812 // %%% only for RMO and PSO
813 __ membar(Assembler::StoreStore);
814
1342 if (!aligned)
1343 #endif
1344 {
1345 // Copy with shift 16 bytes per iteration if arrays do not have
1346 // the same alignment mod 8, otherwise fall through to the next
1347 // code for aligned copy.
1348 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1349 // Also jump over aligned copy after the copy with shift completed.
1350
1351 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
1352 }
1353
1354 // Both array are 8 bytes aligned, copy 16 bytes at a time
1355 __ and3(count, 7, G4); // Save count
1356 __ srl(count, 3, count);
1357 generate_disjoint_long_copy_core(aligned);
1358 __ mov(G4, count); // Restore count
1359
1360 // copy tailing bytes
1361 __ BIND(L_copy_byte);
1362 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1363 __ align(OptoLoopAlignment);
1364 __ BIND(L_copy_byte_loop);
1365 __ ldub(from, offset, O3);
1366 __ deccc(count);
1367 __ stb(O3, to, offset);
1368 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1369 __ delayed()->inc(offset);
1370
1371 __ BIND(L_exit);
1372 // O3, O4 are used as temp registers
1373 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1374 __ retl();
1375 __ delayed()->mov(G0, O0); // return 0
1376 return start;
1377 }
1378
1379 //
1380 // Generate stub for conjoint byte copy. If "aligned" is true, the
1381 // "from" and "to" addresses are assumed to be heapword aligned.
1382 //
1453 // Also jump over aligned copy after the copy with shift completed.
1454
1455 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1456 L_aligned_copy, L_copy_byte);
1457 }
1458 // copy 4 elements (16 bytes) at a time
1459 __ align(OptoLoopAlignment);
1460 __ BIND(L_aligned_copy);
1461 __ dec(end_from, 16);
1462 __ ldx(end_from, 8, O3);
1463 __ ldx(end_from, 0, O4);
1464 __ dec(end_to, 16);
1465 __ deccc(count, 16);
1466 __ stx(O3, end_to, 8);
1467 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1468 __ delayed()->stx(O4, end_to, 0);
1469 __ inc(count, 16);
1470
1471 // copy 1 element (2 bytes) at a time
1472 __ BIND(L_copy_byte);
1473 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1474 __ align(OptoLoopAlignment);
1475 __ BIND(L_copy_byte_loop);
1476 __ dec(end_from);
1477 __ dec(end_to);
1478 __ ldub(end_from, 0, O4);
1479 __ deccc(count);
1480 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1481 __ delayed()->stb(O4, end_to, 0);
1482
1483 __ BIND(L_exit);
1484 // O3, O4 are used as temp registers
1485 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1486 __ retl();
1487 __ delayed()->mov(G0, O0); // return 0
1488 return start;
1489 }
1490
1491 //
1492 // Generate stub for disjoint short copy. If "aligned" is true, the
1493 // "from" and "to" addresses are assumed to be heapword aligned.
1570 if (!aligned)
1571 #endif
1572 {
1573 // Copy with shift 16 bytes per iteration if arrays do not have
1574 // the same alignment mod 8, otherwise fall through to the next
1575 // code for aligned copy.
1576 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1577 // Also jump over aligned copy after the copy with shift completed.
1578
1579 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
1580 }
1581
1582 // Both array are 8 bytes aligned, copy 16 bytes at a time
1583 __ and3(count, 3, G4); // Save
1584 __ srl(count, 2, count);
1585 generate_disjoint_long_copy_core(aligned);
1586 __ mov(G4, count); // restore
1587
1588 // copy 1 element at a time
1589 __ BIND(L_copy_2_bytes);
1590 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1591 __ align(OptoLoopAlignment);
1592 __ BIND(L_copy_2_bytes_loop);
1593 __ lduh(from, offset, O3);
1594 __ deccc(count);
1595 __ sth(O3, to, offset);
1596 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1597 __ delayed()->inc(offset, 2);
1598
1599 __ BIND(L_exit);
1600 // O3, O4 are used as temp registers
1601 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1602 __ retl();
1603 __ delayed()->mov(G0, O0); // return 0
1604 return start;
1605 }
1606
1607 //
1608 // Generate stub for disjoint short fill. If "aligned" is true, the
1609 // "to" address is assumed to be heapword aligned.
1610 //
1915 // Also jump over aligned copy after the copy with shift completed.
1916
1917 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1918 L_aligned_copy, L_copy_2_bytes);
1919 }
1920 // copy 4 elements (16 bytes) at a time
1921 __ align(OptoLoopAlignment);
1922 __ BIND(L_aligned_copy);
1923 __ dec(end_from, 16);
1924 __ ldx(end_from, 8, O3);
1925 __ ldx(end_from, 0, O4);
1926 __ dec(end_to, 16);
1927 __ deccc(count, 8);
1928 __ stx(O3, end_to, 8);
1929 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1930 __ delayed()->stx(O4, end_to, 0);
1931 __ inc(count, 8);
1932
1933 // copy 1 element (2 bytes) at a time
1934 __ BIND(L_copy_2_bytes);
1935 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1936 __ BIND(L_copy_2_bytes_loop);
1937 __ dec(end_from, 2);
1938 __ dec(end_to, 2);
1939 __ lduh(end_from, 0, O4);
1940 __ deccc(count);
1941 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1942 __ delayed()->sth(O4, end_to, 0);
1943
1944 __ BIND(L_exit);
1945 // O3, O4 are used as temp registers
1946 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1947 __ retl();
1948 __ delayed()->mov(G0, O0); // return 0
1949 return start;
1950 }
1951
1952 //
1953 // Generate core code for disjoint int copy (and oop copy on 32-bit).
1954 // If "aligned" is true, the "from" and "to" addresses are assumed
1955 // to be heapword aligned.
2028 __ sllx(O4, 32, O4);
2029 __ srlx(G4, 32, G3);
2030 __ bset(G3, O4);
2031 __ stx(O4, to, -8);
2032 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2033 __ delayed()->mov(G4, O3);
2034
2035 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2036 __ delayed()->inc(count, 4); // restore 'count'
2037
2038 __ BIND(L_aligned_copy);
2039 }
2040 // copy 4 elements (16 bytes) at a time
2041 __ and3(count, 1, G4); // Save
2042 __ srl(count, 1, count);
2043 generate_disjoint_long_copy_core(aligned);
2044 __ mov(G4, count); // Restore
2045
2046 // copy 1 element at a time
2047 __ BIND(L_copy_4_bytes);
2048 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
2049 __ BIND(L_copy_4_bytes_loop);
2050 __ ld(from, offset, O3);
2051 __ deccc(count);
2052 __ st(O3, to, offset);
2053 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
2054 __ delayed()->inc(offset, 4);
2055 __ BIND(L_exit);
2056 }
2057
2058 //
2059 // Generate stub for disjoint int copy. If "aligned" is true, the
2060 // "from" and "to" addresses are assumed to be heapword aligned.
2061 //
2062 // Arguments for generated stub:
2063 // from: O0
2064 // to: O1
2065 // count: O2 treated as signed
2066 //
2067 address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
2068 __ align(CodeEntryAlignment);
2160 __ delayed()->mov(O5, O3);
2161
2162 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2163 __ delayed()->inc(count, 4);
2164
2165 // copy 4 elements (16 bytes) at a time
2166 __ align(OptoLoopAlignment);
2167 __ BIND(L_aligned_copy);
2168 __ dec(end_from, 16);
2169 __ ldx(end_from, 8, O3);
2170 __ ldx(end_from, 0, O4);
2171 __ dec(end_to, 16);
2172 __ deccc(count, 4);
2173 __ stx(O3, end_to, 8);
2174 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
2175 __ delayed()->stx(O4, end_to, 0);
2176 __ inc(count, 4);
2177
2178 // copy 1 element (4 bytes) at a time
2179 __ BIND(L_copy_4_bytes);
2180 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
2181 __ BIND(L_copy_4_bytes_loop);
2182 __ dec(end_from, 4);
2183 __ dec(end_to, 4);
2184 __ ld(end_from, 0, O4);
2185 __ deccc(count);
2186 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
2187 __ delayed()->st(O4, end_to, 0);
2188 __ BIND(L_exit);
2189 }
2190
2191 //
2192 // Generate stub for conjoint int copy. If "aligned" is true, the
2193 // "from" and "to" addresses are assumed to be heapword aligned.
2194 //
2195 // Arguments for generated stub:
2196 // from: O0
2197 // to: O1
2198 // count: O2 treated as signed
2199 //
2200 address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
2542 Register temp,
2543 Label& L_success) {
2544 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2545
2546 BLOCK_COMMENT("type_check:");
2547
2548 Label L_miss, L_pop_to_miss;
2549
2550 assert_clean_int(super_check_offset, temp);
2551
2552 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
2553 &L_success, &L_miss, NULL,
2554 super_check_offset);
2555
2556 BLOCK_COMMENT("type_check_slow_path:");
2557 __ save_frame(0);
2558 __ check_klass_subtype_slow_path(sub_klass->after_save(),
2559 super_klass->after_save(),
2560 L0, L1, L2, L4,
2561 NULL, &L_pop_to_miss);
2562 __ ba(L_success);
2563 __ delayed()->restore();
2564
2565 __ bind(L_pop_to_miss);
2566 __ restore();
2567
2568 // Fall through on failure!
2569 __ BIND(L_miss);
2570 }
2571
2572
2573 // Generate stub for checked oop copy.
2574 //
2575 // Arguments for generated stub:
2576 // from: O0
2577 // to: O1
2578 // count: O2 treated as signed
2579 // ckoff: O3 (super_check_offset)
2580 // ckval: O4 (super_klass)
2581 // ret: O0 zero for success; (-1^K) where K is partial transfer count
2582 //
2639 __ delayed()->set(0, O0); // return 0 on (trivial) success
2640
2641 // ======== begin loop ========
2642 // (Loop is rotated; its entry is load_element.)
2643 // Loop variables:
2644 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2645 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2646 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2647 __ align(OptoLoopAlignment);
2648
2649 __ BIND(store_element);
2650 __ deccc(G1_remain); // decrement the count
2651 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2652 __ inc(O5_offset, heapOopSize); // step to next offset
2653 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2654 __ delayed()->set(0, O0); // return -1 on success
2655
2656 // ======== loop entry is here ========
2657 __ BIND(load_element);
2658 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
2659 __ br_null_short(G3_oop, Assembler::pt, store_element);
2660
2661 __ load_klass(G3_oop, G4_klass); // query the object klass
2662
2663 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2664 // branch to this on success:
2665 store_element);
2666 // ======== end loop ========
2667
2668 // It was a real error; we must depend on the caller to finish the job.
2669 // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2670 // Emit GC store barriers for the oops we have copied (O2 minus G1),
2671 // and report their number to the caller.
2672 __ BIND(fail);
2673 __ subcc(O2_count, G1_remain, O2_count);
2674 __ brx(Assembler::zero, false, Assembler::pt, done);
2675 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller
2676
2677 __ BIND(do_card_marks);
2678 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
2679
2861 __ delayed()->tst(dst_pos);
2862 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2863
2864 // if (length < 0) return -1;
2865 __ delayed()->tst(length);
2866 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2867
2868 BLOCK_COMMENT("arraycopy argument klass checks");
2869 // get src->klass()
2870 if (UseCompressedOops) {
2871 __ delayed()->nop(); // ??? not good
2872 __ load_klass(src, G3_src_klass);
2873 } else {
2874 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2875 }
2876
2877 #ifdef ASSERT
2878 // assert(src->klass() != NULL);
2879 BLOCK_COMMENT("assert klasses not null");
2880 { Label L_a, L_b;
2881 __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL
2882 __ bind(L_a);
2883 __ stop("broken null klass");
2884 __ bind(L_b);
2885 __ load_klass(dst, G4_dst_klass);
2886 __ br_null(G4_dst_klass, false, Assembler::pn, L_a, false); // this would be broken also
2887 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp
2888 BLOCK_COMMENT("assert done");
2889 }
2890 #endif
2891
2892 // Load layout helper
2893 //
2894 // |array_tag| | header_size | element_type | |log2_element_size|
2895 // 32 30 24 16 8 2 0
2896 //
2897 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
2898 //
2899
2900 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2901 Klass::layout_helper_offset_in_bytes();
2902
2903 // Load 32-bits signed value. Use br() instruction with it to check icc.
2904 __ lduw(G3_src_klass, lh_offset, G5_lh);
2905
2906 if (UseCompressedOops) {
2907 __ load_klass(dst, G4_dst_klass);
2908 }
2909 // Handle objArrays completely differently...
2910 juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2911 __ set(objArray_lh, O5_temp);
2912 __ cmp(G5_lh, O5_temp);
2913 __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2914 if (UseCompressedOops) {
2915 __ delayed()->nop();
2916 } else {
2917 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2918 }
2919
2920 // if (src->klass() != dst->klass()) return -1;
2921 __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed);
2922
2923 // if (!src->is_Array()) return -1;
2924 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2925 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2926
2927 // At this point, it is known to be a typeArray (array_tag 0x3).
2928 #ifdef ASSERT
2929 __ delayed()->nop();
2930 { Label L;
2931 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2932 __ set(lh_prim_tag_in_place, O5_temp);
2933 __ cmp(G5_lh, O5_temp);
2934 __ br(Assembler::greaterEqual, false, Assembler::pt, L);
2935 __ delayed()->nop();
2936 __ stop("must be a primitive array");
2937 __ bind(L);
2938 }
2939 #else
2940 __ delayed(); // match next insn to prev branch
2941 #endif
2969 BLOCK_COMMENT("scale indexes to element size");
2970 __ sll_ptr(src_pos, G3_elsize, src_pos);
2971 __ sll_ptr(dst_pos, G3_elsize, dst_pos);
2972 __ add(src, src_pos, from); // src_addr
2973 __ add(dst, dst_pos, to); // dst_addr
2974
2975 BLOCK_COMMENT("choose copy loop based on element size");
2976 __ cmp(G3_elsize, 0);
2977 __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
2978 __ delayed()->signx(length, count); // length
2979
2980 __ cmp(G3_elsize, LogBytesPerShort);
2981 __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
2982 __ delayed()->signx(length, count); // length
2983
2984 __ cmp(G3_elsize, LogBytesPerInt);
2985 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
2986 __ delayed()->signx(length, count); // length
2987 #ifdef ASSERT
2988 { Label L;
2989 __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L);
2990 __ stop("must be long copy, but elsize is wrong");
2991 __ bind(L);
2992 }
2993 #endif
2994 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
2995 __ delayed()->signx(length, count); // length
2996
2997 // objArrayKlass
2998 __ BIND(L_objArray);
2999 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
3000
3001 Label L_plain_copy, L_checkcast_copy;
3002 // test array classes for subtyping
3003 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality
3004 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
3005 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
3006
3007 // Identically typed arrays can be copied without element-wise checks.
3008 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
3009 O5_temp, G5_lh, L_failed);
|