133 // the code in frame::entry_frame_call_wrapper()
134
135 const Argument link = Argument(0, false); // used only for GC
136 const Argument result = Argument(1, false);
137 const Argument result_type = Argument(2, false);
138 const Argument method = Argument(3, false);
139 const Argument entry_point = Argument(4, false);
140 const Argument parameters = Argument(5, false);
141 const Argument parameter_size = Argument(6, false);
142 const Argument thread = Argument(7, false);
143
144 // setup thread register
145 __ ld_ptr(thread.as_address(), G2_thread);
146 __ reinit_heapbase();
147
148 #ifdef ASSERT
149 // make sure we have no pending exceptions
150 { const Register t = G3_scratch;
151 Label L;
152 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
153 __ br_null(t, false, Assembler::pt, L);
154 __ delayed()->nop();
155 __ stop("StubRoutines::call_stub: entered with pending exception");
156 __ bind(L);
157 }
158 #endif
159
160 // create activation frame & allocate space for parameters
161 { const Register t = G3_scratch;
162 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words)
163 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words)
164 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words)
165 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes
166 __ neg(t); // negate so it can be used with save
167 __ save(SP, t, SP); // setup new frame
168 }
169
170 // +---------------+ <--- sp + 0
171 // | |
172 // . reg save area .
173 // | |
174 // +---------------+ <--- sp + 0x40
190 // . extra 7 slots .
191 // | |
192 // +---------------+ <--- fp + 0x5c
193 // | param. size |
194 // +---------------+ <--- fp + 0x60
195 // | thread |
196 // +---------------+
197 // | |
198
199 // pass parameters if any
200 BLOCK_COMMENT("pass parameters if any");
201 { const Register src = parameters.as_in().as_register();
202 const Register dst = Lentry_args;
203 const Register tmp = G3_scratch;
204 const Register cnt = G4_scratch;
205
206 // test if any parameters & setup of Lentry_args
207 Label exit;
208 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter
209 __ add( FP, STACK_BIAS, dst );
210 __ tst(cnt);
211 __ br(Assembler::zero, false, Assembler::pn, exit);
212 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args
213
214 // copy parameters if any
215 Label loop;
216 __ BIND(loop);
217 // Store parameter value
218 __ ld_ptr(src, 0, tmp);
219 __ add(src, BytesPerWord, src);
220 __ st_ptr(tmp, dst, 0);
221 __ deccc(cnt);
222 __ br(Assembler::greater, false, Assembler::pt, loop);
223 __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
224
225 // done
226 __ BIND(exit);
227 }
228
229 // setup parameters, method & call Java function
230 #ifdef ASSERT
231 // layout_activation_impl checks it's notion of saved SP against
265 // store result depending on type
266 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
267 // is treated as T_INT)
268 { const Register addr = result .as_in().as_register();
269 const Register type = result_type.as_in().as_register();
270 Label is_long, is_float, is_double, is_object, exit;
271 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object);
272 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float);
273 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double);
274 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long);
275 __ delayed()->nop();
276
277 // store int result
278 __ st(O0, addr, G0);
279
280 __ BIND(exit);
281 __ ret();
282 __ delayed()->restore();
283
284 __ BIND(is_object);
285 __ ba(false, exit);
286 __ delayed()->st_ptr(O0, addr, G0);
287
288 __ BIND(is_float);
289 __ ba(false, exit);
290 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
291
292 __ BIND(is_double);
293 __ ba(false, exit);
294 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
295
296 __ BIND(is_long);
297 #ifdef _LP64
298 __ ba(false, exit);
299 __ delayed()->st_long(O0, addr, G0); // store entire long
300 #else
301 #if defined(COMPILER2)
302 // All return values are where we want them, except for Longs. C2 returns
303 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
304 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
305 // build we simply always use G1.
306 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
307 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
308 // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
309
310 __ ba(false, exit);
311 __ delayed()->stx(G1, addr, G0); // store entire long
312 #else
313 __ st(O1, addr, BytesPerInt);
314 __ ba(false, exit);
315 __ delayed()->st(O0, addr, G0);
316 #endif /* COMPILER2 */
317 #endif /* _LP64 */
318 }
319 return start;
320 }
321
322
323 //----------------------------------------------------------------------------------------------------
324 // Return point for a Java call if there's an exception thrown in Java code.
325 // The exception is caught and transformed into a pending exception stored in
326 // JavaThread that can be tested from within the VM.
327 //
328 // Oexception: exception oop
329
330 address generate_catch_exception() {
331 StubCodeMark mark(this, "StubRoutines", "catch_exception");
332
333 address start = __ pc();
334 // verify that thread corresponds
365 //
366 // Contract with Java-level exception handler: O0 = exception
367 // O1 = throwing pc
368
369 address generate_forward_exception() {
370 StubCodeMark mark(this, "StubRoutines", "forward_exception");
371 address start = __ pc();
372
373 // Upon entry, O7 has the return address returning into Java
374 // (interpreted or compiled) code; i.e. the return address
375 // becomes the throwing pc.
376
377 const Register& handler_reg = Gtemp;
378
379 Address exception_addr(G2_thread, Thread::pending_exception_offset());
380
381 #ifdef ASSERT
382 // make sure that this code is only executed if there is a pending exception
383 { Label L;
384 __ ld_ptr(exception_addr, Gtemp);
385 __ br_notnull(Gtemp, false, Assembler::pt, L);
386 __ delayed()->nop();
387 __ stop("StubRoutines::forward exception: no pending exception (1)");
388 __ bind(L);
389 }
390 #endif
391
392 // compute exception handler into handler_reg
393 __ get_thread();
394 __ ld_ptr(exception_addr, Oexception);
395 __ verify_oop(Oexception);
396 __ save_frame(0); // compensates for compiler weakness
397 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
398 BLOCK_COMMENT("call exception_handler_for_return_address");
399 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
400 __ mov(O0, handler_reg);
401 __ restore(); // compensates for compiler weakness
402
403 __ ld_ptr(exception_addr, Oexception);
404 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
405
406 #ifdef ASSERT
407 // make sure exception is set
408 { Label L;
409 __ br_notnull(Oexception, false, Assembler::pt, L);
410 __ delayed()->nop();
411 __ stop("StubRoutines::forward exception: no pending exception (2)");
412 __ bind(L);
413 }
414 #endif
415 // jump to exception handler
416 __ jmp(handler_reg, 0);
417 // clear pending exception
418 __ delayed()->st_ptr(G0, exception_addr);
419
420 return start;
421 }
422
423
424 //------------------------------------------------------------------------------------------------------------------------
425 // Continuation point for throwing of implicit exceptions that are not handled in
426 // the current activation. Fabricates an exception oop and initiates normal
427 // exception dispatching in this frame. Only callee-saved registers are preserved
428 // (through the normal register window / RegisterMap handling).
429 // If the compiler needs all registers to be preserved between the fault
430 // point and the exception handler then it must assume responsibility for that in
484 if (arg2 != noreg) {
485 __ mov(arg2, O2);
486 }
487 // do the call
488 BLOCK_COMMENT("call runtime_entry");
489 __ call(runtime_entry, relocInfo::runtime_call_type);
490 if (!VerifyThread)
491 __ delayed()->mov(G2_thread, O0); // pass thread as first argument
492 else
493 __ delayed()->nop(); // (thread already passed)
494 __ restore_thread(noreg);
495 __ reset_last_Java_frame();
496
497 // check for pending exceptions. use Gtemp as scratch register.
498 #ifdef ASSERT
499 Label L;
500
501 Address exception_addr(G2_thread, Thread::pending_exception_offset());
502 Register scratch_reg = Gtemp;
503 __ ld_ptr(exception_addr, scratch_reg);
504 __ br_notnull(scratch_reg, false, Assembler::pt, L);
505 __ delayed()->nop();
506 __ should_not_reach_here();
507 __ bind(L);
508 #endif // ASSERT
509 BLOCK_COMMENT("call forward_exception_entry");
510 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
511 // we use O7 linkage so that forward_exception_entry has the issuing PC
512 __ delayed()->restore();
513
514 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
515 return stub->entry_point();
516 }
517
518 #undef __
519 #define __ _masm->
520
521
522 // Generate a routine that sets all the registers so we
523 // can tell if the stop routine prints them correctly.
524 address generate_test_stop() {
525 StubCodeMark mark(this, "StubRoutines", "test_stop");
597 if (mark_oop_reg == noreg) {
598 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
599 __ set((intptr_t)lock_ptr, lock_ptr_reg);
600 } else {
601 assert(scratch_reg != noreg, "just checking");
602 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
603 __ set((intptr_t)lock_ptr, lock_ptr_reg);
604 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
605 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
606 }
607 }
608
609 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
610
611 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
612 __ set(StubRoutines::Sparc::locked, lock_reg);
613 // Initialize yield counter
614 __ mov(G0,yield_reg);
615
616 __ BIND(retry);
617 __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount);
618 __ br(Assembler::less, false, Assembler::pt, dontyield);
619 __ delayed()->nop();
620
621 // This code can only be called from inside the VM, this
622 // stub is only invoked from Atomic::add(). We do not
623 // want to use call_VM, because _last_java_sp and such
624 // must already be set.
625 //
626 // Save the regs and make space for a C call
627 __ save(SP, -96, SP);
628 __ save_all_globals_into_locals();
629 BLOCK_COMMENT("call os::naked_sleep");
630 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
631 __ delayed()->nop();
632 __ restore_globals_from_locals();
633 __ restore();
634 // reset the counter
635 __ mov(G0,yield_reg);
636
637 __ BIND(dontyield);
638
639 // try to get lock
659 // dest: O1
660 //
661 // Results:
662 //
663 // O0: the value previously stored in dest
664 //
665 address generate_atomic_xchg() {
666 StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
667 address start = __ pc();
668
669 if (UseCASForSwap) {
670 // Use CAS instead of swap, just in case the MP hardware
671 // prefers to work with just one kind of synch. instruction.
672 Label retry;
673 __ BIND(retry);
674 __ mov(O0, O3); // scratch copy of exchange value
675 __ ld(O1, 0, O2); // observe the previous value
676 // try to replace O2 with O3
677 __ cas_under_lock(O1, O2, O3,
678 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
679 __ cmp(O2, O3);
680 __ br(Assembler::notEqual, false, Assembler::pn, retry);
681 __ delayed()->nop();
682
683 __ retl(false);
684 __ delayed()->mov(O2, O0); // report previous value to caller
685
686 } else {
687 if (VM_Version::v9_instructions_work()) {
688 __ retl(false);
689 __ delayed()->swap(O1, 0, O0);
690 } else {
691 const Register& lock_reg = O2;
692 const Register& lock_ptr_reg = O3;
693 const Register& yield_reg = O4;
694
695 Label retry;
696 Label dontyield;
697
698 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
699 // got the lock, do the swap
700 __ swap(O1, 0, O0);
701
783 //
784 // Results:
785 //
786 // O0: the new value stored in dest
787 //
788 // Overwrites (v9): O3
789 // Overwrites (v8): O3,O4,O5
790 //
791 address generate_atomic_add() {
792 StubCodeMark mark(this, "StubRoutines", "atomic_add");
793 address start = __ pc();
794 __ BIND(_atomic_add_stub);
795
796 if (VM_Version::v9_instructions_work()) {
797 Label(retry);
798 __ BIND(retry);
799
800 __ lduw(O1, 0, O2);
801 __ add(O0, O2, O3);
802 __ cas(O1, O2, O3);
803 __ cmp( O2, O3);
804 __ br(Assembler::notEqual, false, Assembler::pn, retry);
805 __ delayed()->nop();
806 __ retl(false);
807 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
808 } else {
809 const Register& lock_reg = O2;
810 const Register& lock_ptr_reg = O3;
811 const Register& value_reg = O4;
812 const Register& yield_reg = O5;
813
814 Label(retry);
815 Label(dontyield);
816
817 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
818 // got lock, do the increment
819 __ ld(O1, 0, value_reg);
820 __ add(O0, value_reg, value_reg);
821 __ st(value_reg, O1, 0);
822
823 // %%% only for RMO and PSO
824 __ membar(Assembler::StoreStore);
825
1353 if (!aligned)
1354 #endif
1355 {
1356 // Copy with shift 16 bytes per iteration if arrays do not have
1357 // the same alignment mod 8, otherwise fall through to the next
1358 // code for aligned copy.
1359 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1360 // Also jump over aligned copy after the copy with shift completed.
1361
1362 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
1363 }
1364
1365 // Both array are 8 bytes aligned, copy 16 bytes at a time
1366 __ and3(count, 7, G4); // Save count
1367 __ srl(count, 3, count);
1368 generate_disjoint_long_copy_core(aligned);
1369 __ mov(G4, count); // Restore count
1370
1371 // copy tailing bytes
1372 __ BIND(L_copy_byte);
1373 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1374 __ delayed()->nop();
1375 __ align(OptoLoopAlignment);
1376 __ BIND(L_copy_byte_loop);
1377 __ ldub(from, offset, O3);
1378 __ deccc(count);
1379 __ stb(O3, to, offset);
1380 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1381 __ delayed()->inc(offset);
1382
1383 __ BIND(L_exit);
1384 // O3, O4 are used as temp registers
1385 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1386 __ retl();
1387 __ delayed()->mov(G0, O0); // return 0
1388 return start;
1389 }
1390
1391 //
1392 // Generate stub for conjoint byte copy. If "aligned" is true, the
1393 // "from" and "to" addresses are assumed to be heapword aligned.
1394 //
1465 // Also jump over aligned copy after the copy with shift completed.
1466
1467 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1468 L_aligned_copy, L_copy_byte);
1469 }
1470 // copy 4 elements (16 bytes) at a time
1471 __ align(OptoLoopAlignment);
1472 __ BIND(L_aligned_copy);
1473 __ dec(end_from, 16);
1474 __ ldx(end_from, 8, O3);
1475 __ ldx(end_from, 0, O4);
1476 __ dec(end_to, 16);
1477 __ deccc(count, 16);
1478 __ stx(O3, end_to, 8);
1479 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1480 __ delayed()->stx(O4, end_to, 0);
1481 __ inc(count, 16);
1482
1483 // copy 1 element (2 bytes) at a time
1484 __ BIND(L_copy_byte);
1485 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1486 __ delayed()->nop();
1487 __ align(OptoLoopAlignment);
1488 __ BIND(L_copy_byte_loop);
1489 __ dec(end_from);
1490 __ dec(end_to);
1491 __ ldub(end_from, 0, O4);
1492 __ deccc(count);
1493 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1494 __ delayed()->stb(O4, end_to, 0);
1495
1496 __ BIND(L_exit);
1497 // O3, O4 are used as temp registers
1498 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1499 __ retl();
1500 __ delayed()->mov(G0, O0); // return 0
1501 return start;
1502 }
1503
1504 //
1505 // Generate stub for disjoint short copy. If "aligned" is true, the
1506 // "from" and "to" addresses are assumed to be heapword aligned.
1583 if (!aligned)
1584 #endif
1585 {
1586 // Copy with shift 16 bytes per iteration if arrays do not have
1587 // the same alignment mod 8, otherwise fall through to the next
1588 // code for aligned copy.
1589 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1590 // Also jump over aligned copy after the copy with shift completed.
1591
1592 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
1593 }
1594
1595 // Both array are 8 bytes aligned, copy 16 bytes at a time
1596 __ and3(count, 3, G4); // Save
1597 __ srl(count, 2, count);
1598 generate_disjoint_long_copy_core(aligned);
1599 __ mov(G4, count); // restore
1600
1601 // copy 1 element at a time
1602 __ BIND(L_copy_2_bytes);
1603 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1604 __ delayed()->nop();
1605 __ align(OptoLoopAlignment);
1606 __ BIND(L_copy_2_bytes_loop);
1607 __ lduh(from, offset, O3);
1608 __ deccc(count);
1609 __ sth(O3, to, offset);
1610 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1611 __ delayed()->inc(offset, 2);
1612
1613 __ BIND(L_exit);
1614 // O3, O4 are used as temp registers
1615 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1616 __ retl();
1617 __ delayed()->mov(G0, O0); // return 0
1618 return start;
1619 }
1620
1621 //
1622 // Generate stub for disjoint short fill. If "aligned" is true, the
1623 // "to" address is assumed to be heapword aligned.
1624 //
1929 // Also jump over aligned copy after the copy with shift completed.
1930
1931 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1932 L_aligned_copy, L_copy_2_bytes);
1933 }
1934 // copy 4 elements (16 bytes) at a time
1935 __ align(OptoLoopAlignment);
1936 __ BIND(L_aligned_copy);
1937 __ dec(end_from, 16);
1938 __ ldx(end_from, 8, O3);
1939 __ ldx(end_from, 0, O4);
1940 __ dec(end_to, 16);
1941 __ deccc(count, 8);
1942 __ stx(O3, end_to, 8);
1943 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1944 __ delayed()->stx(O4, end_to, 0);
1945 __ inc(count, 8);
1946
1947 // copy 1 element (2 bytes) at a time
1948 __ BIND(L_copy_2_bytes);
1949 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1950 __ delayed()->nop();
1951 __ BIND(L_copy_2_bytes_loop);
1952 __ dec(end_from, 2);
1953 __ dec(end_to, 2);
1954 __ lduh(end_from, 0, O4);
1955 __ deccc(count);
1956 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1957 __ delayed()->sth(O4, end_to, 0);
1958
1959 __ BIND(L_exit);
1960 // O3, O4 are used as temp registers
1961 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1962 __ retl();
1963 __ delayed()->mov(G0, O0); // return 0
1964 return start;
1965 }
1966
1967 //
1968 // Generate core code for disjoint int copy (and oop copy on 32-bit).
1969 // If "aligned" is true, the "from" and "to" addresses are assumed
1970 // to be heapword aligned.
2043 __ sllx(O4, 32, O4);
2044 __ srlx(G4, 32, G3);
2045 __ bset(G3, O4);
2046 __ stx(O4, to, -8);
2047 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2048 __ delayed()->mov(G4, O3);
2049
2050 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2051 __ delayed()->inc(count, 4); // restore 'count'
2052
2053 __ BIND(L_aligned_copy);
2054 }
2055 // copy 4 elements (16 bytes) at a time
2056 __ and3(count, 1, G4); // Save
2057 __ srl(count, 1, count);
2058 generate_disjoint_long_copy_core(aligned);
2059 __ mov(G4, count); // Restore
2060
2061 // copy 1 element at a time
2062 __ BIND(L_copy_4_bytes);
2063 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
2064 __ delayed()->nop();
2065 __ BIND(L_copy_4_bytes_loop);
2066 __ ld(from, offset, O3);
2067 __ deccc(count);
2068 __ st(O3, to, offset);
2069 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
2070 __ delayed()->inc(offset, 4);
2071 __ BIND(L_exit);
2072 }
2073
2074 //
2075 // Generate stub for disjoint int copy. If "aligned" is true, the
2076 // "from" and "to" addresses are assumed to be heapword aligned.
2077 //
2078 // Arguments for generated stub:
2079 // from: O0
2080 // to: O1
2081 // count: O2 treated as signed
2082 //
2083 address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
2084 __ align(CodeEntryAlignment);
2176 __ delayed()->mov(O5, O3);
2177
2178 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2179 __ delayed()->inc(count, 4);
2180
2181 // copy 4 elements (16 bytes) at a time
2182 __ align(OptoLoopAlignment);
2183 __ BIND(L_aligned_copy);
2184 __ dec(end_from, 16);
2185 __ ldx(end_from, 8, O3);
2186 __ ldx(end_from, 0, O4);
2187 __ dec(end_to, 16);
2188 __ deccc(count, 4);
2189 __ stx(O3, end_to, 8);
2190 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
2191 __ delayed()->stx(O4, end_to, 0);
2192 __ inc(count, 4);
2193
2194 // copy 1 element (4 bytes) at a time
2195 __ BIND(L_copy_4_bytes);
2196 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
2197 __ delayed()->nop();
2198 __ BIND(L_copy_4_bytes_loop);
2199 __ dec(end_from, 4);
2200 __ dec(end_to, 4);
2201 __ ld(end_from, 0, O4);
2202 __ deccc(count);
2203 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
2204 __ delayed()->st(O4, end_to, 0);
2205 __ BIND(L_exit);
2206 }
2207
2208 //
2209 // Generate stub for conjoint int copy. If "aligned" is true, the
2210 // "from" and "to" addresses are assumed to be heapword aligned.
2211 //
2212 // Arguments for generated stub:
2213 // from: O0
2214 // to: O1
2215 // count: O2 treated as signed
2216 //
2217 address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
2559 Register temp,
2560 Label& L_success) {
2561 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2562
2563 BLOCK_COMMENT("type_check:");
2564
2565 Label L_miss, L_pop_to_miss;
2566
2567 assert_clean_int(super_check_offset, temp);
2568
2569 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
2570 &L_success, &L_miss, NULL,
2571 super_check_offset);
2572
2573 BLOCK_COMMENT("type_check_slow_path:");
2574 __ save_frame(0);
2575 __ check_klass_subtype_slow_path(sub_klass->after_save(),
2576 super_klass->after_save(),
2577 L0, L1, L2, L4,
2578 NULL, &L_pop_to_miss);
2579 __ ba(false, L_success);
2580 __ delayed()->restore();
2581
2582 __ bind(L_pop_to_miss);
2583 __ restore();
2584
2585 // Fall through on failure!
2586 __ BIND(L_miss);
2587 }
2588
2589
2590 // Generate stub for checked oop copy.
2591 //
2592 // Arguments for generated stub:
2593 // from: O0
2594 // to: O1
2595 // count: O2 treated as signed
2596 // ckoff: O3 (super_check_offset)
2597 // ckval: O4 (super_klass)
2598 // ret: O0 zero for success; (-1^K) where K is partial transfer count
2599 //
2656 __ delayed()->set(0, O0); // return 0 on (trivial) success
2657
2658 // ======== begin loop ========
2659 // (Loop is rotated; its entry is load_element.)
2660 // Loop variables:
2661 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2662 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2663 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2664 __ align(OptoLoopAlignment);
2665
2666 __ BIND(store_element);
2667 __ deccc(G1_remain); // decrement the count
2668 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2669 __ inc(O5_offset, heapOopSize); // step to next offset
2670 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2671 __ delayed()->set(0, O0); // return -1 on success
2672
2673 // ======== loop entry is here ========
2674 __ BIND(load_element);
2675 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
2676 __ br_null(G3_oop, true, Assembler::pt, store_element);
2677 __ delayed()->nop();
2678
2679 __ load_klass(G3_oop, G4_klass); // query the object klass
2680
2681 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2682 // branch to this on success:
2683 store_element);
2684 // ======== end loop ========
2685
2686 // It was a real error; we must depend on the caller to finish the job.
2687 // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2688 // Emit GC store barriers for the oops we have copied (O2 minus G1),
2689 // and report their number to the caller.
2690 __ BIND(fail);
2691 __ subcc(O2_count, G1_remain, O2_count);
2692 __ brx(Assembler::zero, false, Assembler::pt, done);
2693 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller
2694
2695 __ BIND(do_card_marks);
2696 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
2697
2879 __ delayed()->tst(dst_pos);
2880 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2881
2882 // if (length < 0) return -1;
2883 __ delayed()->tst(length);
2884 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2885
2886 BLOCK_COMMENT("arraycopy argument klass checks");
2887 // get src->klass()
2888 if (UseCompressedOops) {
2889 __ delayed()->nop(); // ??? not good
2890 __ load_klass(src, G3_src_klass);
2891 } else {
2892 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2893 }
2894
2895 #ifdef ASSERT
2896 // assert(src->klass() != NULL);
2897 BLOCK_COMMENT("assert klasses not null");
2898 { Label L_a, L_b;
2899 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
2900 __ delayed()->nop();
2901 __ bind(L_a);
2902 __ stop("broken null klass");
2903 __ bind(L_b);
2904 __ load_klass(dst, G4_dst_klass);
2905 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
2906 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp
2907 BLOCK_COMMENT("assert done");
2908 }
2909 #endif
2910
2911 // Load layout helper
2912 //
2913 // |array_tag| | header_size | element_type | |log2_element_size|
2914 // 32 30 24 16 8 2 0
2915 //
2916 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
2917 //
2918
2919 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2920 Klass::layout_helper_offset_in_bytes();
2921
2922 // Load 32-bits signed value. Use br() instruction with it to check icc.
2923 __ lduw(G3_src_klass, lh_offset, G5_lh);
2924
2925 if (UseCompressedOops) {
2926 __ load_klass(dst, G4_dst_klass);
2927 }
2928 // Handle objArrays completely differently...
2929 juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2930 __ set(objArray_lh, O5_temp);
2931 __ cmp(G5_lh, O5_temp);
2932 __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2933 if (UseCompressedOops) {
2934 __ delayed()->nop();
2935 } else {
2936 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2937 }
2938
2939 // if (src->klass() != dst->klass()) return -1;
2940 __ cmp(G3_src_klass, G4_dst_klass);
2941 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed);
2942 __ delayed()->nop();
2943
2944 // if (!src->is_Array()) return -1;
2945 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2946 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2947
2948 // At this point, it is known to be a typeArray (array_tag 0x3).
2949 #ifdef ASSERT
2950 __ delayed()->nop();
2951 { Label L;
2952 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2953 __ set(lh_prim_tag_in_place, O5_temp);
2954 __ cmp(G5_lh, O5_temp);
2955 __ br(Assembler::greaterEqual, false, Assembler::pt, L);
2956 __ delayed()->nop();
2957 __ stop("must be a primitive array");
2958 __ bind(L);
2959 }
2960 #else
2961 __ delayed(); // match next insn to prev branch
2962 #endif
2990 BLOCK_COMMENT("scale indexes to element size");
2991 __ sll_ptr(src_pos, G3_elsize, src_pos);
2992 __ sll_ptr(dst_pos, G3_elsize, dst_pos);
2993 __ add(src, src_pos, from); // src_addr
2994 __ add(dst, dst_pos, to); // dst_addr
2995
2996 BLOCK_COMMENT("choose copy loop based on element size");
2997 __ cmp(G3_elsize, 0);
2998 __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
2999 __ delayed()->signx(length, count); // length
3000
3001 __ cmp(G3_elsize, LogBytesPerShort);
3002 __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
3003 __ delayed()->signx(length, count); // length
3004
3005 __ cmp(G3_elsize, LogBytesPerInt);
3006 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
3007 __ delayed()->signx(length, count); // length
3008 #ifdef ASSERT
3009 { Label L;
3010 __ cmp(G3_elsize, LogBytesPerLong);
3011 __ br(Assembler::equal, false, Assembler::pt, L);
3012 __ delayed()->nop();
3013 __ stop("must be long copy, but elsize is wrong");
3014 __ bind(L);
3015 }
3016 #endif
3017 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
3018 __ delayed()->signx(length, count); // length
3019
3020 // objArrayKlass
3021 __ BIND(L_objArray);
3022 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
3023
3024 Label L_plain_copy, L_checkcast_copy;
3025 // test array classes for subtyping
3026 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality
3027 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
3028 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
3029
3030 // Identically typed arrays can be copied without element-wise checks.
3031 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
3032 O5_temp, G5_lh, L_failed);
|
133 // the code in frame::entry_frame_call_wrapper()
134
135 const Argument link = Argument(0, false); // used only for GC
136 const Argument result = Argument(1, false);
137 const Argument result_type = Argument(2, false);
138 const Argument method = Argument(3, false);
139 const Argument entry_point = Argument(4, false);
140 const Argument parameters = Argument(5, false);
141 const Argument parameter_size = Argument(6, false);
142 const Argument thread = Argument(7, false);
143
144 // setup thread register
145 __ ld_ptr(thread.as_address(), G2_thread);
146 __ reinit_heapbase();
147
148 #ifdef ASSERT
149 // make sure we have no pending exceptions
150 { const Register t = G3_scratch;
151 Label L;
152 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
153 __ br_null_short(t, Assembler::pt, L);
154 __ stop("StubRoutines::call_stub: entered with pending exception");
155 __ bind(L);
156 }
157 #endif
158
159 // create activation frame & allocate space for parameters
160 { const Register t = G3_scratch;
161 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words)
162 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words)
163 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words)
164 __ sll(t, Interpreter::logStackElementSize, t); // compute number of bytes
165 __ neg(t); // negate so it can be used with save
166 __ save(SP, t, SP); // setup new frame
167 }
168
169 // +---------------+ <--- sp + 0
170 // | |
171 // . reg save area .
172 // | |
173 // +---------------+ <--- sp + 0x40
189 // . extra 7 slots .
190 // | |
191 // +---------------+ <--- fp + 0x5c
192 // | param. size |
193 // +---------------+ <--- fp + 0x60
194 // | thread |
195 // +---------------+
196 // | |
197
198 // pass parameters if any
199 BLOCK_COMMENT("pass parameters if any");
200 { const Register src = parameters.as_in().as_register();
201 const Register dst = Lentry_args;
202 const Register tmp = G3_scratch;
203 const Register cnt = G4_scratch;
204
205 // test if any parameters & setup of Lentry_args
206 Label exit;
207 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter
208 __ add( FP, STACK_BIAS, dst );
209 __ cmp_zero_and_br(Assembler::zero, cnt, exit);
210 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args
211
212 // copy parameters if any
213 Label loop;
214 __ BIND(loop);
215 // Store parameter value
216 __ ld_ptr(src, 0, tmp);
217 __ add(src, BytesPerWord, src);
218 __ st_ptr(tmp, dst, 0);
219 __ deccc(cnt);
220 __ br(Assembler::greater, false, Assembler::pt, loop);
221 __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
222
223 // done
224 __ BIND(exit);
225 }
226
227 // setup parameters, method & call Java function
228 #ifdef ASSERT
229 // layout_activation_impl checks it's notion of saved SP against
263 // store result depending on type
264 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
265 // is treated as T_INT)
266 { const Register addr = result .as_in().as_register();
267 const Register type = result_type.as_in().as_register();
268 Label is_long, is_float, is_double, is_object, exit;
269 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object);
270 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float);
271 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double);
272 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long);
273 __ delayed()->nop();
274
275 // store int result
276 __ st(O0, addr, G0);
277
278 __ BIND(exit);
279 __ ret();
280 __ delayed()->restore();
281
282 __ BIND(is_object);
283 __ ba(exit);
284 __ delayed()->st_ptr(O0, addr, G0);
285
286 __ BIND(is_float);
287 __ ba(exit);
288 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
289
290 __ BIND(is_double);
291 __ ba(exit);
292 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
293
294 __ BIND(is_long);
295 #ifdef _LP64
296 __ ba(exit);
297 __ delayed()->st_long(O0, addr, G0); // store entire long
298 #else
299 #if defined(COMPILER2)
300 // All return values are where we want them, except for Longs. C2 returns
301 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
302 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
303 // build we simply always use G1.
304 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
305 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
306 // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
307
308 __ ba(exit);
309 __ delayed()->stx(G1, addr, G0); // store entire long
310 #else
311 __ st(O1, addr, BytesPerInt);
312 __ ba(exit);
313 __ delayed()->st(O0, addr, G0);
314 #endif /* COMPILER2 */
315 #endif /* _LP64 */
316 }
317 return start;
318 }
319
320
321 //----------------------------------------------------------------------------------------------------
322 // Return point for a Java call if there's an exception thrown in Java code.
323 // The exception is caught and transformed into a pending exception stored in
324 // JavaThread that can be tested from within the VM.
325 //
326 // Oexception: exception oop
327
328 address generate_catch_exception() {
329 StubCodeMark mark(this, "StubRoutines", "catch_exception");
330
331 address start = __ pc();
332 // verify that thread corresponds
363 //
364 // Contract with Java-level exception handler: O0 = exception
365 // O1 = throwing pc
366
367 address generate_forward_exception() {
368 StubCodeMark mark(this, "StubRoutines", "forward_exception");
369 address start = __ pc();
370
371 // Upon entry, O7 has the return address returning into Java
372 // (interpreted or compiled) code; i.e. the return address
373 // becomes the throwing pc.
374
375 const Register& handler_reg = Gtemp;
376
377 Address exception_addr(G2_thread, Thread::pending_exception_offset());
378
379 #ifdef ASSERT
380 // make sure that this code is only executed if there is a pending exception
381 { Label L;
382 __ ld_ptr(exception_addr, Gtemp);
383 __ br_notnull_short(Gtemp, Assembler::pt, L);
384 __ stop("StubRoutines::forward exception: no pending exception (1)");
385 __ bind(L);
386 }
387 #endif
388
389 // compute exception handler into handler_reg
390 __ get_thread();
391 __ ld_ptr(exception_addr, Oexception);
392 __ verify_oop(Oexception);
393 __ save_frame(0); // compensates for compiler weakness
394 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
395 BLOCK_COMMENT("call exception_handler_for_return_address");
396 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
397 __ mov(O0, handler_reg);
398 __ restore(); // compensates for compiler weakness
399
400 __ ld_ptr(exception_addr, Oexception);
401 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
402
403 #ifdef ASSERT
404 // make sure exception is set
405 { Label L;
406 __ br_notnull_short(Oexception, Assembler::pt, L);
407 __ stop("StubRoutines::forward exception: no pending exception (2)");
408 __ bind(L);
409 }
410 #endif
411 // jump to exception handler
412 __ jmp(handler_reg, 0);
413 // clear pending exception
414 __ delayed()->st_ptr(G0, exception_addr);
415
416 return start;
417 }
418
419
420 //------------------------------------------------------------------------------------------------------------------------
421 // Continuation point for throwing of implicit exceptions that are not handled in
422 // the current activation. Fabricates an exception oop and initiates normal
423 // exception dispatching in this frame. Only callee-saved registers are preserved
424 // (through the normal register window / RegisterMap handling).
425 // If the compiler needs all registers to be preserved between the fault
426 // point and the exception handler then it must assume responsibility for that in
480 if (arg2 != noreg) {
481 __ mov(arg2, O2);
482 }
483 // do the call
484 BLOCK_COMMENT("call runtime_entry");
485 __ call(runtime_entry, relocInfo::runtime_call_type);
486 if (!VerifyThread)
487 __ delayed()->mov(G2_thread, O0); // pass thread as first argument
488 else
489 __ delayed()->nop(); // (thread already passed)
490 __ restore_thread(noreg);
491 __ reset_last_Java_frame();
492
493 // check for pending exceptions. use Gtemp as scratch register.
494 #ifdef ASSERT
495 Label L;
496
497 Address exception_addr(G2_thread, Thread::pending_exception_offset());
498 Register scratch_reg = Gtemp;
499 __ ld_ptr(exception_addr, scratch_reg);
500 __ br_notnull_short(scratch_reg, Assembler::pt, L);
501 __ should_not_reach_here();
502 __ bind(L);
503 #endif // ASSERT
504 BLOCK_COMMENT("call forward_exception_entry");
505 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
506 // we use O7 linkage so that forward_exception_entry has the issuing PC
507 __ delayed()->restore();
508
509 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
510 return stub->entry_point();
511 }
512
513 #undef __
514 #define __ _masm->
515
516
517 // Generate a routine that sets all the registers so we
518 // can tell if the stop routine prints them correctly.
519 address generate_test_stop() {
520 StubCodeMark mark(this, "StubRoutines", "test_stop");
592 if (mark_oop_reg == noreg) {
593 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
594 __ set((intptr_t)lock_ptr, lock_ptr_reg);
595 } else {
596 assert(scratch_reg != noreg, "just checking");
597 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
598 __ set((intptr_t)lock_ptr, lock_ptr_reg);
599 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
600 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
601 }
602 }
603
604 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
605
606 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
607 __ set(StubRoutines::Sparc::locked, lock_reg);
608 // Initialize yield counter
609 __ mov(G0,yield_reg);
610
611 __ BIND(retry);
612 __ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield);
613
614 // This code can only be called from inside the VM, this
615 // stub is only invoked from Atomic::add(). We do not
616 // want to use call_VM, because _last_java_sp and such
617 // must already be set.
618 //
619 // Save the regs and make space for a C call
620 __ save(SP, -96, SP);
621 __ save_all_globals_into_locals();
622 BLOCK_COMMENT("call os::naked_sleep");
623 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
624 __ delayed()->nop();
625 __ restore_globals_from_locals();
626 __ restore();
627 // reset the counter
628 __ mov(G0,yield_reg);
629
630 __ BIND(dontyield);
631
632 // try to get lock
652 // dest: O1
653 //
654 // Results:
655 //
656 // O0: the value previously stored in dest
657 //
658 address generate_atomic_xchg() {
659 StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
660 address start = __ pc();
661
662 if (UseCASForSwap) {
663 // Use CAS instead of swap, just in case the MP hardware
664 // prefers to work with just one kind of synch. instruction.
665 Label retry;
666 __ BIND(retry);
667 __ mov(O0, O3); // scratch copy of exchange value
668 __ ld(O1, 0, O2); // observe the previous value
669 // try to replace O2 with O3
670 __ cas_under_lock(O1, O2, O3,
671 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
672 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);
673
674 __ retl(false);
675 __ delayed()->mov(O2, O0); // report previous value to caller
676
677 } else {
678 if (VM_Version::v9_instructions_work()) {
679 __ retl(false);
680 __ delayed()->swap(O1, 0, O0);
681 } else {
682 const Register& lock_reg = O2;
683 const Register& lock_ptr_reg = O3;
684 const Register& yield_reg = O4;
685
686 Label retry;
687 Label dontyield;
688
689 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
690 // got the lock, do the swap
691 __ swap(O1, 0, O0);
692
774 //
775 // Results:
776 //
777 // O0: the new value stored in dest
778 //
779 // Overwrites (v9): O3
780 // Overwrites (v8): O3,O4,O5
781 //
782 address generate_atomic_add() {
783 StubCodeMark mark(this, "StubRoutines", "atomic_add");
784 address start = __ pc();
785 __ BIND(_atomic_add_stub);
786
787 if (VM_Version::v9_instructions_work()) {
788 Label(retry);
789 __ BIND(retry);
790
791 __ lduw(O1, 0, O2);
792 __ add(O0, O2, O3);
793 __ cas(O1, O2, O3);
794 __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);
795 __ retl(false);
796 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
797 } else {
798 const Register& lock_reg = O2;
799 const Register& lock_ptr_reg = O3;
800 const Register& value_reg = O4;
801 const Register& yield_reg = O5;
802
803 Label(retry);
804 Label(dontyield);
805
806 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
807 // got lock, do the increment
808 __ ld(O1, 0, value_reg);
809 __ add(O0, value_reg, value_reg);
810 __ st(value_reg, O1, 0);
811
812 // %%% only for RMO and PSO
813 __ membar(Assembler::StoreStore);
814
1342 if (!aligned)
1343 #endif
1344 {
1345 // Copy with shift 16 bytes per iteration if arrays do not have
1346 // the same alignment mod 8, otherwise fall through to the next
1347 // code for aligned copy.
1348 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1349 // Also jump over aligned copy after the copy with shift completed.
1350
1351 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
1352 }
1353
1354 // Both array are 8 bytes aligned, copy 16 bytes at a time
1355 __ and3(count, 7, G4); // Save count
1356 __ srl(count, 3, count);
1357 generate_disjoint_long_copy_core(aligned);
1358 __ mov(G4, count); // Restore count
1359
1360 // copy tailing bytes
1361 __ BIND(L_copy_byte);
1362 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1363 __ align(OptoLoopAlignment);
1364 __ BIND(L_copy_byte_loop);
1365 __ ldub(from, offset, O3);
1366 __ deccc(count);
1367 __ stb(O3, to, offset);
1368 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1369 __ delayed()->inc(offset);
1370
1371 __ BIND(L_exit);
1372 // O3, O4 are used as temp registers
1373 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1374 __ retl();
1375 __ delayed()->mov(G0, O0); // return 0
1376 return start;
1377 }
1378
1379 //
1380 // Generate stub for conjoint byte copy. If "aligned" is true, the
1381 // "from" and "to" addresses are assumed to be heapword aligned.
1382 //
1453 // Also jump over aligned copy after the copy with shift completed.
1454
1455 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1456 L_aligned_copy, L_copy_byte);
1457 }
1458 // copy 4 elements (16 bytes) at a time
1459 __ align(OptoLoopAlignment);
1460 __ BIND(L_aligned_copy);
1461 __ dec(end_from, 16);
1462 __ ldx(end_from, 8, O3);
1463 __ ldx(end_from, 0, O4);
1464 __ dec(end_to, 16);
1465 __ deccc(count, 16);
1466 __ stx(O3, end_to, 8);
1467 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1468 __ delayed()->stx(O4, end_to, 0);
1469 __ inc(count, 16);
1470
1471 // copy 1 element (2 bytes) at a time
1472 __ BIND(L_copy_byte);
1473 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1474 __ align(OptoLoopAlignment);
1475 __ BIND(L_copy_byte_loop);
1476 __ dec(end_from);
1477 __ dec(end_to);
1478 __ ldub(end_from, 0, O4);
1479 __ deccc(count);
1480 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1481 __ delayed()->stb(O4, end_to, 0);
1482
1483 __ BIND(L_exit);
1484 // O3, O4 are used as temp registers
1485 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1486 __ retl();
1487 __ delayed()->mov(G0, O0); // return 0
1488 return start;
1489 }
1490
1491 //
1492 // Generate stub for disjoint short copy. If "aligned" is true, the
1493 // "from" and "to" addresses are assumed to be heapword aligned.
1570 if (!aligned)
1571 #endif
1572 {
1573 // Copy with shift 16 bytes per iteration if arrays do not have
1574 // the same alignment mod 8, otherwise fall through to the next
1575 // code for aligned copy.
1576 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1577 // Also jump over aligned copy after the copy with shift completed.
1578
1579 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
1580 }
1581
1582 // Both array are 8 bytes aligned, copy 16 bytes at a time
1583 __ and3(count, 3, G4); // Save
1584 __ srl(count, 2, count);
1585 generate_disjoint_long_copy_core(aligned);
1586 __ mov(G4, count); // restore
1587
1588 // copy 1 element at a time
1589 __ BIND(L_copy_2_bytes);
1590 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1591 __ align(OptoLoopAlignment);
1592 __ BIND(L_copy_2_bytes_loop);
1593 __ lduh(from, offset, O3);
1594 __ deccc(count);
1595 __ sth(O3, to, offset);
1596 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1597 __ delayed()->inc(offset, 2);
1598
1599 __ BIND(L_exit);
1600 // O3, O4 are used as temp registers
1601 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1602 __ retl();
1603 __ delayed()->mov(G0, O0); // return 0
1604 return start;
1605 }
1606
1607 //
1608 // Generate stub for disjoint short fill. If "aligned" is true, the
1609 // "to" address is assumed to be heapword aligned.
1610 //
1915 // Also jump over aligned copy after the copy with shift completed.
1916
1917 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1918 L_aligned_copy, L_copy_2_bytes);
1919 }
1920 // copy 4 elements (16 bytes) at a time
1921 __ align(OptoLoopAlignment);
1922 __ BIND(L_aligned_copy);
1923 __ dec(end_from, 16);
1924 __ ldx(end_from, 8, O3);
1925 __ ldx(end_from, 0, O4);
1926 __ dec(end_to, 16);
1927 __ deccc(count, 8);
1928 __ stx(O3, end_to, 8);
1929 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1930 __ delayed()->stx(O4, end_to, 0);
1931 __ inc(count, 8);
1932
1933 // copy 1 element (2 bytes) at a time
1934 __ BIND(L_copy_2_bytes);
1935 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
1936 __ BIND(L_copy_2_bytes_loop);
1937 __ dec(end_from, 2);
1938 __ dec(end_to, 2);
1939 __ lduh(end_from, 0, O4);
1940 __ deccc(count);
1941 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1942 __ delayed()->sth(O4, end_to, 0);
1943
1944 __ BIND(L_exit);
1945 // O3, O4 are used as temp registers
1946 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1947 __ retl();
1948 __ delayed()->mov(G0, O0); // return 0
1949 return start;
1950 }
1951
1952 //
1953 // Generate core code for disjoint int copy (and oop copy on 32-bit).
1954 // If "aligned" is true, the "from" and "to" addresses are assumed
1955 // to be heapword aligned.
2028 __ sllx(O4, 32, O4);
2029 __ srlx(G4, 32, G3);
2030 __ bset(G3, O4);
2031 __ stx(O4, to, -8);
2032 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2033 __ delayed()->mov(G4, O3);
2034
2035 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2036 __ delayed()->inc(count, 4); // restore 'count'
2037
2038 __ BIND(L_aligned_copy);
2039 }
2040 // copy 4 elements (16 bytes) at a time
2041 __ and3(count, 1, G4); // Save
2042 __ srl(count, 1, count);
2043 generate_disjoint_long_copy_core(aligned);
2044 __ mov(G4, count); // Restore
2045
2046 // copy 1 element at a time
2047 __ BIND(L_copy_4_bytes);
2048 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
2049 __ BIND(L_copy_4_bytes_loop);
2050 __ ld(from, offset, O3);
2051 __ deccc(count);
2052 __ st(O3, to, offset);
2053 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
2054 __ delayed()->inc(offset, 4);
2055 __ BIND(L_exit);
2056 }
2057
2058 //
2059 // Generate stub for disjoint int copy. If "aligned" is true, the
2060 // "from" and "to" addresses are assumed to be heapword aligned.
2061 //
2062 // Arguments for generated stub:
2063 // from: O0
2064 // to: O1
2065 // count: O2 treated as signed
2066 //
2067 address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
2068 __ align(CodeEntryAlignment);
2160 __ delayed()->mov(O5, O3);
2161
2162 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2163 __ delayed()->inc(count, 4);
2164
2165 // copy 4 elements (16 bytes) at a time
2166 __ align(OptoLoopAlignment);
2167 __ BIND(L_aligned_copy);
2168 __ dec(end_from, 16);
2169 __ ldx(end_from, 8, O3);
2170 __ ldx(end_from, 0, O4);
2171 __ dec(end_to, 16);
2172 __ deccc(count, 4);
2173 __ stx(O3, end_to, 8);
2174 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
2175 __ delayed()->stx(O4, end_to, 0);
2176 __ inc(count, 4);
2177
2178 // copy 1 element (4 bytes) at a time
2179 __ BIND(L_copy_4_bytes);
2180 __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
2181 __ BIND(L_copy_4_bytes_loop);
2182 __ dec(end_from, 4);
2183 __ dec(end_to, 4);
2184 __ ld(end_from, 0, O4);
2185 __ deccc(count);
2186 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
2187 __ delayed()->st(O4, end_to, 0);
2188 __ BIND(L_exit);
2189 }
2190
2191 //
2192 // Generate stub for conjoint int copy. If "aligned" is true, the
2193 // "from" and "to" addresses are assumed to be heapword aligned.
2194 //
2195 // Arguments for generated stub:
2196 // from: O0
2197 // to: O1
2198 // count: O2 treated as signed
2199 //
2200 address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
2542 Register temp,
2543 Label& L_success) {
2544 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2545
2546 BLOCK_COMMENT("type_check:");
2547
2548 Label L_miss, L_pop_to_miss;
2549
2550 assert_clean_int(super_check_offset, temp);
2551
2552 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
2553 &L_success, &L_miss, NULL,
2554 super_check_offset);
2555
2556 BLOCK_COMMENT("type_check_slow_path:");
2557 __ save_frame(0);
2558 __ check_klass_subtype_slow_path(sub_klass->after_save(),
2559 super_klass->after_save(),
2560 L0, L1, L2, L4,
2561 NULL, &L_pop_to_miss);
2562 __ ba(L_success);
2563 __ delayed()->restore();
2564
2565 __ bind(L_pop_to_miss);
2566 __ restore();
2567
2568 // Fall through on failure!
2569 __ BIND(L_miss);
2570 }
2571
2572
2573 // Generate stub for checked oop copy.
2574 //
2575 // Arguments for generated stub:
2576 // from: O0
2577 // to: O1
2578 // count: O2 treated as signed
2579 // ckoff: O3 (super_check_offset)
2580 // ckval: O4 (super_klass)
2581 // ret: O0 zero for success; (-1^K) where K is partial transfer count
2582 //
2639 __ delayed()->set(0, O0); // return 0 on (trivial) success
2640
2641 // ======== begin loop ========
2642 // (Loop is rotated; its entry is load_element.)
2643 // Loop variables:
2644 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2645 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2646 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2647 __ align(OptoLoopAlignment);
2648
2649 __ BIND(store_element);
2650 __ deccc(G1_remain); // decrement the count
2651 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2652 __ inc(O5_offset, heapOopSize); // step to next offset
2653 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2654 __ delayed()->set(0, O0); // return -1 on success
2655
2656 // ======== loop entry is here ========
2657 __ BIND(load_element);
2658 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
2659 __ br_null_short(G3_oop, Assembler::pt, store_element);
2660
2661 __ load_klass(G3_oop, G4_klass); // query the object klass
2662
2663 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2664 // branch to this on success:
2665 store_element);
2666 // ======== end loop ========
2667
2668 // It was a real error; we must depend on the caller to finish the job.
2669 // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2670 // Emit GC store barriers for the oops we have copied (O2 minus G1),
2671 // and report their number to the caller.
2672 __ BIND(fail);
2673 __ subcc(O2_count, G1_remain, O2_count);
2674 __ brx(Assembler::zero, false, Assembler::pt, done);
2675 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller
2676
2677 __ BIND(do_card_marks);
2678 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
2679
2861 __ delayed()->tst(dst_pos);
2862 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2863
2864 // if (length < 0) return -1;
2865 __ delayed()->tst(length);
2866 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2867
2868 BLOCK_COMMENT("arraycopy argument klass checks");
2869 // get src->klass()
2870 if (UseCompressedOops) {
2871 __ delayed()->nop(); // ??? not good
2872 __ load_klass(src, G3_src_klass);
2873 } else {
2874 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2875 }
2876
2877 #ifdef ASSERT
2878 // assert(src->klass() != NULL);
2879 BLOCK_COMMENT("assert klasses not null");
2880 { Label L_a, L_b;
2881 __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL
2882 __ bind(L_a);
2883 __ stop("broken null klass");
2884 __ bind(L_b);
2885 __ load_klass(dst, G4_dst_klass);
2886 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
2887 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp
2888 BLOCK_COMMENT("assert done");
2889 }
2890 #endif
2891
2892 // Load layout helper
2893 //
2894 // |array_tag| | header_size | element_type | |log2_element_size|
2895 // 32 30 24 16 8 2 0
2896 //
2897 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
2898 //
2899
2900 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2901 Klass::layout_helper_offset_in_bytes();
2902
2903 // Load 32-bits signed value. Use br() instruction with it to check icc.
2904 __ lduw(G3_src_klass, lh_offset, G5_lh);
2905
2906 if (UseCompressedOops) {
2907 __ load_klass(dst, G4_dst_klass);
2908 }
2909 // Handle objArrays completely differently...
2910 juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2911 __ set(objArray_lh, O5_temp);
2912 __ cmp(G5_lh, O5_temp);
2913 __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2914 if (UseCompressedOops) {
2915 __ delayed()->nop();
2916 } else {
2917 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2918 }
2919
2920 // if (src->klass() != dst->klass()) return -1;
2921 __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed);
2922
2923 // if (!src->is_Array()) return -1;
2924 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2925 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2926
2927 // At this point, it is known to be a typeArray (array_tag 0x3).
2928 #ifdef ASSERT
2929 __ delayed()->nop();
2930 { Label L;
2931 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2932 __ set(lh_prim_tag_in_place, O5_temp);
2933 __ cmp(G5_lh, O5_temp);
2934 __ br(Assembler::greaterEqual, false, Assembler::pt, L);
2935 __ delayed()->nop();
2936 __ stop("must be a primitive array");
2937 __ bind(L);
2938 }
2939 #else
2940 __ delayed(); // match next insn to prev branch
2941 #endif
2969 BLOCK_COMMENT("scale indexes to element size");
2970 __ sll_ptr(src_pos, G3_elsize, src_pos);
2971 __ sll_ptr(dst_pos, G3_elsize, dst_pos);
2972 __ add(src, src_pos, from); // src_addr
2973 __ add(dst, dst_pos, to); // dst_addr
2974
2975 BLOCK_COMMENT("choose copy loop based on element size");
2976 __ cmp(G3_elsize, 0);
2977 __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
2978 __ delayed()->signx(length, count); // length
2979
2980 __ cmp(G3_elsize, LogBytesPerShort);
2981 __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
2982 __ delayed()->signx(length, count); // length
2983
2984 __ cmp(G3_elsize, LogBytesPerInt);
2985 __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
2986 __ delayed()->signx(length, count); // length
2987 #ifdef ASSERT
2988 { Label L;
2989 __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L);
2990 __ stop("must be long copy, but elsize is wrong");
2991 __ bind(L);
2992 }
2993 #endif
2994 __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
2995 __ delayed()->signx(length, count); // length
2996
2997 // objArrayKlass
2998 __ BIND(L_objArray);
2999 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
3000
3001 Label L_plain_copy, L_checkcast_copy;
3002 // test array classes for subtyping
3003 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality
3004 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
3005 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
3006
3007 // Identically typed arrays can be copied without element-wise checks.
3008 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
3009 O5_temp, G5_lh, L_failed);
|