1 /*
2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
110
111 static void restore_result_registers(MacroAssembler* masm);
112 };
113
114 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
115 // Record volatile registers as callee-save values in an OopMap so their save locations will be
116 // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for
117 // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers
118 // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame
119 // (as the stub's I's) when the runtime routine called by the stub creates its frame.
120 int i;
121 // Always make the frame size 16 byte aligned.
122 int frame_size = round_to(additional_frame_words + register_save_size, 16);
123 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words
124 int frame_size_in_slots = frame_size / sizeof(jint);
125 // CodeBlob frame size is in words.
126 *total_frame_words = frame_size / wordSize;
127 // OopMap* map = new OopMap(*total_frame_words, 0);
128 OopMap* map = new OopMap(frame_size_in_slots, 0);
129
130 #if !defined(_LP64)
131
132 // Save 64-bit O registers; they will get their heads chopped off on a 'save'.
133 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
134 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
135 __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
136 __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
137 __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
138 __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
139 #endif /* _LP64 */
140
141 __ save(SP, -frame_size, SP);
142
143 #ifndef _LP64
144 // Reload the 64 bit Oregs. Although they are now Iregs we load them
145 // to Oregs here to avoid interrupts cutting off their heads
146
147 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
148 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
149 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
150 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
151 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
152 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
153
154 __ stx(O0, SP, o0_offset+STACK_BIAS);
155 map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg());
156
157 __ stx(O1, SP, o1_offset+STACK_BIAS);
158
159 map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg());
160
161 __ stx(O2, SP, o2_offset+STACK_BIAS);
162 map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg());
163
164 __ stx(O3, SP, o3_offset+STACK_BIAS);
165 map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg());
166
167 __ stx(O4, SP, o4_offset+STACK_BIAS);
168 map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg());
169
170 __ stx(O5, SP, o5_offset+STACK_BIAS);
171 map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg());
172 #endif /* _LP64 */
173
174
175 #ifdef _LP64
176 int debug_offset = 0;
177 #else
178 int debug_offset = 4;
179 #endif
180 // Save the G's
181 __ stx(G1, SP, g1_offset+STACK_BIAS);
182 map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg());
183
184 __ stx(G3, SP, g3_offset+STACK_BIAS);
185 map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg());
186
187 __ stx(G4, SP, g4_offset+STACK_BIAS);
188 map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg());
189
190 __ stx(G5, SP, g5_offset+STACK_BIAS);
191 map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg());
192
193 // This is really a waste but we'll keep things as they were for now
194 if (true) {
195 #ifndef _LP64
196 map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next());
197 map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next());
198 map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next());
199 map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next());
200 map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next());
201 map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next());
202 map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next());
203 map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next());
204 map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next());
205 map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next());
206 #endif /* _LP64 */
207 }
208
209
210 // Save the flags
211 __ rdccr( G5 );
212 __ stx(G5, SP, ccr_offset+STACK_BIAS);
213 __ stxfsr(SP, fsr_offset+STACK_BIAS);
214
215 // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles)
216 int offset = d00_offset;
217 for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
218 FloatRegister f = as_FloatRegister(i);
219 __ stf(FloatRegisterImpl::D, f, SP, offset+STACK_BIAS);
220 // Record as callee saved both halves of double registers (2 float registers).
221 map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg());
222 map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next());
223 offset += sizeof(double);
224 }
225
226 // And we're done.
233 // saved.
234 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
235
236 // Restore all the FP registers
237 for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
238 __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i));
239 }
240
241 __ ldx(SP, ccr_offset+STACK_BIAS, G1);
242 __ wrccr (G1) ;
243
244 // Restore the G's
245 // Note that G2 (AKA GThread) must be saved and restored separately.
246 // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr.
247
248 __ ldx(SP, g1_offset+STACK_BIAS, G1);
249 __ ldx(SP, g3_offset+STACK_BIAS, G3);
250 __ ldx(SP, g4_offset+STACK_BIAS, G4);
251 __ ldx(SP, g5_offset+STACK_BIAS, G5);
252
253
254 #if !defined(_LP64)
255 // Restore the 64-bit O's.
256 __ ldx(SP, o0_offset+STACK_BIAS, O0);
257 __ ldx(SP, o1_offset+STACK_BIAS, O1);
258 __ ldx(SP, o2_offset+STACK_BIAS, O2);
259 __ ldx(SP, o3_offset+STACK_BIAS, O3);
260 __ ldx(SP, o4_offset+STACK_BIAS, O4);
261 __ ldx(SP, o5_offset+STACK_BIAS, O5);
262
263 // And temporarily place them in TLS
264
265 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
266 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
267 __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
268 __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
269 __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
270 __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
271 #endif /* _LP64 */
272
273 // Restore flags
274
275 __ ldxfsr(SP, fsr_offset+STACK_BIAS);
276
277 __ restore();
278
279 #if !defined(_LP64)
280 // Now reload the 64bit Oregs after we've restore the window.
281 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
282 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
283 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
284 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
285 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
286 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
287 #endif /* _LP64 */
288
289 }
290
291 // Pop the current frame and restore the registers that might be holding
292 // a result.
293 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
294
295 #if !defined(_LP64)
296 // 32bit build returns longs in G1
297 __ ldx(SP, g1_offset+STACK_BIAS, G1);
298
299 // Retrieve the 64-bit O's.
300 __ ldx(SP, o0_offset+STACK_BIAS, O0);
301 __ ldx(SP, o1_offset+STACK_BIAS, O1);
302 // and save to TLS
303 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
304 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
305 #endif /* _LP64 */
306
307 __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0));
308
309 __ restore();
310
311 #if !defined(_LP64)
312 // Now reload the 64bit Oregs after we've restore the window.
313 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
314 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
315 #endif /* _LP64 */
316
317 }
318
319 // Is vector's size (in bytes) bigger than a size saved by default?
320 // 8 bytes FP registers are saved by default on SPARC.
321 bool SharedRuntime::is_wide_vector(int size) {
322 // Note, MaxVectorSize == 8 on SPARC.
323 assert(size <= 8, "%d bytes vectors are not supported", size);
324 return size > 8;
325 }
326
327 size_t SharedRuntime::trampoline_size() {
328 return 40;
329 }
330
331 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
332 __ set((intptr_t)destination, G3_scratch);
333 __ JMP(G3_scratch, 0);
334 __ delayed()->nop();
335 }
336
393 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
394 VMRegPair *regs,
395 int total_args_passed,
396 int is_outgoing) {
397 assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
398
399 const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
400 const int flt_reg_max = 8;
401
402 int int_reg = 0;
403 int flt_reg = 0;
404 int slot = 0;
405
406 for (int i = 0; i < total_args_passed; i++) {
407 switch (sig_bt[i]) {
408 case T_INT:
409 case T_SHORT:
410 case T_CHAR:
411 case T_BYTE:
412 case T_BOOLEAN:
413 #ifndef _LP64
414 case T_OBJECT:
415 case T_ARRAY:
416 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
417 #endif // _LP64
418 if (int_reg < int_reg_max) {
419 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
420 regs[i].set1(r->as_VMReg());
421 } else {
422 regs[i].set1(VMRegImpl::stack2reg(slot++));
423 }
424 break;
425
426 #ifdef _LP64
427 case T_LONG:
428 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting VOID in other half");
429 // fall-through
430 case T_OBJECT:
431 case T_ARRAY:
432 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
433 if (int_reg < int_reg_max) {
434 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
435 regs[i].set2(r->as_VMReg());
436 } else {
437 slot = round_to(slot, 2); // align
438 regs[i].set2(VMRegImpl::stack2reg(slot));
439 slot += 2;
440 }
441 break;
442 #else
443 case T_LONG:
444 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting VOID in other half");
445 // On 32-bit SPARC put longs always on the stack to keep the pressure off
446 // integer argument registers. They should be used for oops.
447 slot = round_to(slot, 2); // align
448 regs[i].set2(VMRegImpl::stack2reg(slot));
449 slot += 2;
450 #endif
451 break;
452
453 case T_FLOAT:
454 if (flt_reg < flt_reg_max) {
455 FloatRegister r = as_FloatRegister(flt_reg++);
456 regs[i].set1(r->as_VMReg());
457 } else {
458 regs[i].set1(VMRegImpl::stack2reg(slot++));
459 }
460 break;
461
462 case T_DOUBLE:
463 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
464 if (round_to(flt_reg, 2) + 1 < flt_reg_max) {
465 flt_reg = round_to(flt_reg, 2); // align
466 FloatRegister r = as_FloatRegister(flt_reg);
467 regs[i].set2(r->as_VMReg());
468 flt_reg += 2;
469 } else {
470 slot = round_to(slot, 2); // align
537
538
539 // Patch the callers callsite with entry to compiled code if it exists.
540 void AdapterGenerator::patch_callers_callsite() {
541 Label L;
542 __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch);
543 __ br_null(G3_scratch, false, Assembler::pt, L);
544 __ delayed()->nop();
545 // Call into the VM to patch the caller, then jump to compiled callee
546 __ save_frame(4); // Args in compiled layout; do not blow them
547
548 // Must save all the live Gregs the list is:
549 // G1: 1st Long arg (32bit build)
550 // G2: global allocated to TLS
551 // G3: used in inline cache check (scratch)
552 // G4: 2nd Long arg (32bit build);
553 // G5: used in inline cache check (Method*)
554
555 // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops.
556
557 #ifdef _LP64
558 // mov(s,d)
559 __ mov(G1, L1);
560 __ mov(G4, L4);
561 __ mov(G5_method, L5);
562 __ mov(G5_method, O0); // VM needs target method
563 __ mov(I7, O1); // VM needs caller's callsite
564 // Must be a leaf call...
565 // can be very far once the blob has been relocated
566 AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
567 __ relocate(relocInfo::runtime_call_type);
568 __ jumpl_to(dest, O7, O7);
569 __ delayed()->mov(G2_thread, L7_thread_cache);
570 __ mov(L7_thread_cache, G2_thread);
571 __ mov(L1, G1);
572 __ mov(L4, G4);
573 __ mov(L5, G5_method);
574 #else
575 __ stx(G1, FP, -8 + STACK_BIAS);
576 __ stx(G4, FP, -16 + STACK_BIAS);
577 __ mov(G5_method, L5);
578 __ mov(G5_method, O0); // VM needs target method
579 __ mov(I7, O1); // VM needs caller's callsite
580 // Must be a leaf call...
581 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type);
582 __ delayed()->mov(G2_thread, L7_thread_cache);
583 __ mov(L7_thread_cache, G2_thread);
584 __ ldx(FP, -8 + STACK_BIAS, G1);
585 __ ldx(FP, -16 + STACK_BIAS, G4);
586 __ mov(L5, G5_method);
587 #endif /* _LP64 */
588
589 __ restore(); // Restore args
590 __ bind(L);
591 }
592
593
594 RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) {
595 RegisterOrConstant roc(arg_offset(st_off));
596 return __ ensure_simm13_or_reg(roc, Rdisp);
597 }
598
599 RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) {
600 RegisterOrConstant roc(next_arg_offset(st_off));
601 return __ ensure_simm13_or_reg(roc, Rdisp);
602 }
603
604
605 // Stores long into offset pointed to by base
606 void AdapterGenerator::store_c2i_long(Register r, Register base,
607 const int st_off, bool is_stack) {
608 #ifdef _LP64
609 // In V9, longs are given 2 64-bit slots in the interpreter, but the
610 // data is passed in only 1 slot.
611 __ stx(r, base, next_arg_slot(st_off));
612 #else
613 #ifdef COMPILER2
614 // Misaligned store of 64-bit data
615 __ stw(r, base, arg_slot(st_off)); // lo bits
616 __ srlx(r, 32, r);
617 __ stw(r, base, next_arg_slot(st_off)); // hi bits
618 #else
619 if (is_stack) {
620 // Misaligned store of 64-bit data
621 __ stw(r, base, arg_slot(st_off)); // lo bits
622 __ srlx(r, 32, r);
623 __ stw(r, base, next_arg_slot(st_off)); // hi bits
624 } else {
625 __ stw(r->successor(), base, arg_slot(st_off) ); // lo bits
626 __ stw(r , base, next_arg_slot(st_off)); // hi bits
627 }
628 #endif // COMPILER2
629 #endif // _LP64
630 }
631
632 void AdapterGenerator::store_c2i_object(Register r, Register base,
633 const int st_off) {
634 __ st_ptr (r, base, arg_slot(st_off));
635 }
636
637 void AdapterGenerator::store_c2i_int(Register r, Register base,
638 const int st_off) {
639 __ st (r, base, arg_slot(st_off));
640 }
641
642 // Stores into offset pointed to by base
643 void AdapterGenerator::store_c2i_double(VMReg r_2,
644 VMReg r_1, Register base, const int st_off) {
645 #ifdef _LP64
646 // In V9, doubles are given 2 64-bit slots in the interpreter, but the
647 // data is passed in only 1 slot.
648 __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
649 #else
650 // Need to marshal 64-bit value from misaligned Lesp loads
651 __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
652 __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) );
653 #endif
654 }
655
656 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
657 const int st_off) {
658 __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off));
659 }
660
661 void AdapterGenerator::gen_c2i_adapter(
662 int total_args_passed,
663 // VMReg max_arg,
664 int comp_args_on_stack, // VMRegStackSlots
665 const BasicType *sig_bt,
666 const VMRegPair *regs,
667 Label& L_skip_fixup) {
668
669 // Before we get into the guts of the C2I adapter, see if we should be here
670 // at all. We've come from compiled code and are attempting to jump to the
671 // interpreter, which means the caller made a static call to get here
672 // (vcalls always get a compiled target if there is one). Check for a
673 // compiled target. If there is one, we need to patch the caller's call.
940
941 // Load in argument order going down.
942 const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
943 set_Rdisp(G1_scratch);
944
945 VMReg r_1 = regs[i].first();
946 VMReg r_2 = regs[i].second();
947 if (!r_1->is_valid()) {
948 assert(!r_2->is_valid(), "");
949 continue;
950 }
951 if (r_1->is_stack()) { // Pretend stack targets are loaded into F8/F9
952 r_1 = F8->as_VMReg(); // as part of the load/store shuffle
953 if (r_2->is_valid()) r_2 = r_1->next();
954 }
955 if (r_1->is_Register()) { // Register argument
956 Register r = r_1->as_Register()->after_restore();
957 if (!r_2->is_valid()) {
958 __ ld(Gargs, arg_slot(ld_off), r);
959 } else {
960 #ifdef _LP64
961 // In V9, longs are given 2 64-bit slots in the interpreter, but the
962 // data is passed in only 1 slot.
963 RegisterOrConstant slot = (sig_bt[i] == T_LONG) ?
964 next_arg_slot(ld_off) : arg_slot(ld_off);
965 __ ldx(Gargs, slot, r);
966 #else
967 fatal("longs should be on stack");
968 #endif
969 }
970 } else {
971 assert(r_1->is_FloatRegister(), "");
972 if (!r_2->is_valid()) {
973 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister());
974 } else {
975 #ifdef _LP64
976 // In V9, doubles are given 2 64-bit slots in the interpreter, but the
977 // data is passed in only 1 slot. This code also handles longs that
978 // are passed on the stack, but need a stack-to-stack move through a
979 // spare float register.
980 RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
981 next_arg_slot(ld_off) : arg_slot(ld_off);
982 __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister());
983 #else
984 // Need to marshal 64-bit value from misaligned Lesp loads
985 __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister());
986 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister());
987 #endif
988 }
989 }
990 // Was the argument really intended to be on the stack, but was loaded
991 // into F8/F9?
992 if (regs[i].first()->is_stack()) {
993 assert(r_1->as_FloatRegister() == F8, "fix this code");
994 // Convert stack slot to an SP offset
995 int st_off = reg2offset(regs[i].first()) + STACK_BIAS;
996 // Store down the shuffled stack word. Target address _is_ aligned.
997 RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp);
998 if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot);
999 else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);
1000 }
1001 }
1002
1003 // Jump to the compiled code just as if compiled code was doing it.
1004 __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3);
1005 #if INCLUDE_JVMCI
1006 if (EnableJVMCI) {
1007 // check if this call should be routed towards a specific entry point
1140
1141 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1142 VMRegPair *regs,
1143 VMRegPair *regs2,
1144 int total_args_passed) {
1145 assert(regs2 == NULL, "not needed on sparc");
1146
1147 // Return the number of VMReg stack_slots needed for the args.
1148 // This value does not include an abi space (like register window
1149 // save area).
1150
1151 // The native convention is V8 if !LP64
1152 // The LP64 convention is the V9 convention which is slightly more sane.
1153
1154 // We return the amount of VMReg stack slots we need to reserve for all
1155 // the arguments NOT counting out_preserve_stack_slots. Since we always
1156 // have space for storing at least 6 registers to memory we start with that.
1157 // See int_stk_helper for a further discussion.
1158 int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots();
1159
1160 #ifdef _LP64
1161 // V9 convention: All things "as-if" on double-wide stack slots.
1162 // Hoist any int/ptr/long's in the first 6 to int regs.
1163 // Hoist any flt/dbl's in the first 16 dbl regs.
1164 int j = 0; // Count of actual args, not HALVES
1165 VMRegPair param_array_reg; // location of the argument in the parameter array
1166 for (int i = 0; i < total_args_passed; i++, j++) {
1167 param_array_reg.set_bad();
1168 switch (sig_bt[i]) {
1169 case T_BOOLEAN:
1170 case T_BYTE:
1171 case T_CHAR:
1172 case T_INT:
1173 case T_SHORT:
1174 regs[i].set1(int_stk_helper(j));
1175 break;
1176 case T_LONG:
1177 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
1178 case T_ADDRESS: // raw pointers, like current thread, for VM calls
1179 case T_ARRAY:
1180 case T_OBJECT:
1224 case T_VOID:
1225 regs[i].set_bad();
1226 j--;
1227 break; // Do not count HALVES
1228 default:
1229 ShouldNotReachHere();
1230 }
1231 // Keep track of the deepest parameter array slot.
1232 if (!param_array_reg.first()->is_valid()) {
1233 param_array_reg = regs[i];
1234 }
1235 if (param_array_reg.first()->is_stack()) {
1236 int off = param_array_reg.first()->reg2stack();
1237 if (off > max_stack_slots) max_stack_slots = off;
1238 }
1239 if (param_array_reg.second()->is_stack()) {
1240 int off = param_array_reg.second()->reg2stack();
1241 if (off > max_stack_slots) max_stack_slots = off;
1242 }
1243 }
1244
1245 #else // _LP64
1246 // V8 convention: first 6 things in O-regs, rest on stack.
1247 // Alignment is willy-nilly.
1248 for (int i = 0; i < total_args_passed; i++) {
1249 switch (sig_bt[i]) {
1250 case T_ADDRESS: // raw pointers, like current thread, for VM calls
1251 case T_ARRAY:
1252 case T_BOOLEAN:
1253 case T_BYTE:
1254 case T_CHAR:
1255 case T_FLOAT:
1256 case T_INT:
1257 case T_OBJECT:
1258 case T_METADATA:
1259 case T_SHORT:
1260 regs[i].set1(int_stk_helper(i));
1261 break;
1262 case T_DOUBLE:
1263 case T_LONG:
1264 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1265 regs[i].set_pair(int_stk_helper(i + 1), int_stk_helper(i));
1266 break;
1267 case T_VOID: regs[i].set_bad(); break;
1268 default:
1269 ShouldNotReachHere();
1270 }
1271 if (regs[i].first()->is_stack()) {
1272 int off = regs[i].first()->reg2stack();
1273 if (off > max_stack_slots) max_stack_slots = off;
1274 }
1275 if (regs[i].second()->is_stack()) {
1276 int off = regs[i].second()->reg2stack();
1277 if (off > max_stack_slots) max_stack_slots = off;
1278 }
1279 }
1280 #endif // _LP64
1281
1282 return round_to(max_stack_slots + 1, 2);
1283
1284 }
1285
1286
1287 // ---------------------------------------------------------------------------
1288 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1289 switch (ret_type) {
1290 case T_FLOAT:
1291 __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS);
1292 break;
1293 case T_DOUBLE:
1294 __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS);
1295 break;
1296 }
1297 }
1298
1299 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1300 switch (ret_type) {
1301 case T_FLOAT:
1389 }
1390
1391
1392 // An oop arg. Must pass a handle not the oop itself
1393 static void object_move(MacroAssembler* masm,
1394 OopMap* map,
1395 int oop_handle_offset,
1396 int framesize_in_slots,
1397 VMRegPair src,
1398 VMRegPair dst,
1399 bool is_receiver,
1400 int* receiver_offset) {
1401
1402 // must pass a handle. First figure out the location we use as a handle
1403
1404 if (src.first()->is_stack()) {
1405 // Oop is already on the stack
1406 Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register();
1407 __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle);
1408 __ ld_ptr(rHandle, 0, L4);
1409 #ifdef _LP64
1410 __ movr( Assembler::rc_z, L4, G0, rHandle );
1411 #else
1412 __ tst( L4 );
1413 __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
1414 #endif
1415 if (dst.first()->is_stack()) {
1416 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
1417 }
1418 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1419 if (is_receiver) {
1420 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
1421 }
1422 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1423 } else {
1424 // Oop is in an input register pass we must flush it to the stack
1425 const Register rOop = src.first()->as_Register();
1426 const Register rHandle = L5;
1427 int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset;
1428 int offset = oop_slot * VMRegImpl::stack_slot_size;
1429 __ st_ptr(rOop, SP, offset + STACK_BIAS);
1430 if (is_receiver) {
1431 *receiver_offset = offset;
1432 }
1433 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1434 __ add(SP, offset + STACK_BIAS, rHandle);
1435 #ifdef _LP64
1436 __ movr( Assembler::rc_z, rOop, G0, rHandle );
1437 #else
1438 __ tst( rOop );
1439 __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
1440 #endif
1441
1442 if (dst.first()->is_stack()) {
1443 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
1444 } else {
1445 __ mov(rHandle, dst.first()->as_Register());
1446 }
1447 }
1448 }
1449
1450 // A float arg may have to do float reg int reg conversion
1451 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1452 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1453
1454 if (src.first()->is_stack()) {
1455 if (dst.first()->is_stack()) {
1456 // stack to stack the easiest of the bunch
1457 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
1458 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
1459 } else {
1460 // stack to reg
2051 __ delayed()->mov(obj_reg, hash);
2052 }
2053
2054 // Read the header and build a mask to get its hash field. Give up if the object is not unlocked.
2055 // We depend on hash_mask being at most 32 bits and avoid the use of
2056 // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
2057 // vm: see markOop.hpp.
2058 __ ld_ptr(obj_reg, oopDesc::mark_offset_in_bytes(), header);
2059 __ sethi(markOopDesc::hash_mask, mask);
2060 __ btst(markOopDesc::unlocked_value, header);
2061 __ br(Assembler::zero, false, Assembler::pn, slowCase);
2062 if (UseBiasedLocking) {
2063 // Check if biased and fall through to runtime if so
2064 __ delayed()->nop();
2065 __ btst(markOopDesc::biased_lock_bit_in_place, header);
2066 __ br(Assembler::notZero, false, Assembler::pn, slowCase);
2067 }
2068 __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask);
2069
2070 // Check for a valid (non-zero) hash code and get its value.
2071 #ifdef _LP64
2072 __ srlx(header, markOopDesc::hash_shift, hash);
2073 #else
2074 __ srl(header, markOopDesc::hash_shift, hash);
2075 #endif
2076 __ andcc(hash, mask, hash);
2077 __ br(Assembler::equal, false, Assembler::pn, slowCase);
2078 __ delayed()->nop();
2079
2080 // leaf return.
2081 __ bind(done);
2082 __ retl();
2083 __ delayed()->mov(hash, result);
2084 __ bind(slowCase);
2085 }
2086 #endif // COMPILER1
2087
2088
2089 // We have received a description of where all the java arg are located
2090 // on entry to the wrapper. We need to convert these args to where
2091 // the jni function will expect them. To figure out where they go
2092 // we convert the java signature to a C signature by inserting
2093 // the hidden arguments as arg[0] and possibly arg[1] (static method)
2094
2095 const int total_in_args = method->size_of_parameters();
2391 if (method->is_static() && !is_critical_native) {
2392 __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), O1);
2393
2394 // Now handlize the static class mirror in O1. It's known not-null.
2395 __ st_ptr(O1, SP, klass_offset + STACK_BIAS);
2396 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2397 __ add(SP, klass_offset + STACK_BIAS, O1);
2398 }
2399
2400
2401 const Register L6_handle = L6;
2402
2403 if (method->is_synchronized()) {
2404 assert(!is_critical_native, "unhandled");
2405 __ mov(O1, L6_handle);
2406 }
2407
2408 // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
2409 // except O6/O7. So if we must call out we must push a new frame. We immediately
2410 // push a new frame and flush the windows.
2411 #ifdef _LP64
2412 intptr_t thepc = (intptr_t) __ pc();
2413 {
2414 address here = __ pc();
2415 // Call the next instruction
2416 __ call(here + 8, relocInfo::none);
2417 __ delayed()->nop();
2418 }
2419 #else
2420 intptr_t thepc = __ load_pc_address(O7, 0);
2421 #endif /* _LP64 */
2422
2423 // We use the same pc/oopMap repeatedly when we call out
2424 oop_maps->add_gc_map(thepc - start, map);
2425
2426 // O7 now has the pc loaded that we will use when we finally call to native.
2427
2428 // Save thread in L7; it crosses a bunch of VM calls below
2429 // Don't use save_thread because it smashes G2 and we merely
2430 // want to save a copy
2431 __ mov(G2_thread, L7_thread_cache);
2432
2433
2434 // If we create an inner frame once is plenty
2435 // when we create it we must also save G2_thread
2436 bool inner_frame_created = false;
2437
2438 // dtrace method entry support
2439 {
2440 SkipIfEqual skip_if(
2441 masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
2536 // either as the flush traps and the current window goes too.
2537 __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
2538 __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
2539 }
2540
2541 // get JNIEnv* which is first argument to native
2542 if (!is_critical_native) {
2543 __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
2544 }
2545
2546 // Use that pc we placed in O7 a while back as the current frame anchor
2547 __ set_last_Java_frame(SP, O7);
2548
2549 // We flushed the windows ages ago now mark them as flushed before transitioning.
2550 __ set(JavaFrameAnchor::flushed, G3_scratch);
2551 __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
2552
2553 // Transition from _thread_in_Java to _thread_in_native.
2554 __ set(_thread_in_native, G3_scratch);
2555
2556 #ifdef _LP64
2557 AddressLiteral dest(native_func);
2558 __ relocate(relocInfo::runtime_call_type);
2559 __ jumpl_to(dest, O7, O7);
2560 #else
2561 __ call(native_func, relocInfo::runtime_call_type);
2562 #endif
2563 __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
2564
2565 __ restore_thread(L7_thread_cache); // restore G2_thread
2566
2567 // Unpack native results. For int-types, we do any needed sign-extension
2568 // and move things into I0. The return value there will survive any VM
2569 // calls for blocking or unlocking. An FP or OOP result (handle) is done
2570 // specially in the slow-path code.
2571 switch (ret_type) {
2572 case T_VOID: break; // Nothing to do!
2573 case T_FLOAT: break; // Got it where we want it (unless slow-path)
2574 case T_DOUBLE: break; // Got it where we want it (unless slow-path)
2575 // In 64 bits build result is in O0, in O0, O1 in 32bit build
2576 case T_LONG:
2577 #ifndef _LP64
2578 __ mov(O1, I1);
2579 #endif
2580 // Fall thru
2581 case T_OBJECT: // Really a handle
2582 case T_ARRAY:
2583 case T_INT:
2584 __ mov(O0, I0);
2585 break;
2586 case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false
2587 case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, I0); break;
2588 case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, I0); break; // cannot use and3, 0xFFFF too big as immediate value!
2589 case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, I0); break;
2590 break; // Cannot de-handlize until after reclaiming jvm_lock
2591 default:
2592 ShouldNotReachHere();
2593 }
2594
2595 Label after_transition;
2596 // must we block?
2597
2598 // Block, if necessary, before resuming in _thread_in_Java state.
2599 // In order for GC to work, don't clear the last_Java_sp until after blocking.
2765 __ verify_oop(I0);
2766 }
2767
2768 if (CheckJNICalls) {
2769 // clear_pending_jni_exception_check
2770 __ st_ptr(G0, G2_thread, JavaThread::pending_jni_exception_check_fn_offset());
2771 }
2772
2773 if (!is_critical_native) {
2774 // reset handle block
2775 __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
2776 __ st(G0, L5, JNIHandleBlock::top_offset_in_bytes());
2777
2778 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
2779 check_forward_pending_exception(masm, G3_scratch);
2780 }
2781
2782
2783 // Return
2784
2785 #ifndef _LP64
2786 if (ret_type == T_LONG) {
2787
2788 // Must leave proper result in O0,O1 and G1 (c2/tiered only)
2789 __ sllx(I0, 32, G1); // Shift bits into high G1
2790 __ srl (I1, 0, I1); // Zero extend O1 (harmless?)
2791 __ or3 (I1, G1, G1); // OR 64 bits into G1
2792 }
2793 #endif
2794
2795 __ ret();
2796 __ delayed()->restore();
2797
2798 __ flush();
2799
2800 nmethod *nm = nmethod::new_native_nmethod(method,
2801 compile_id,
2802 masm->code(),
2803 vep_offset,
2804 frame_complete,
2805 stack_slots / VMRegImpl::slots_per_word,
2806 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2807 in_ByteSize(lock_offset),
2808 oop_maps);
2809
2810 if (is_critical_native) {
2811 nm->set_lazy_critical_native(true);
2812 }
2813 return nm;
2814
2836
2837 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
2838 //
2839 // Common out the new frame generation for deopt and uncommon trap
2840 //
2841 Register G3pcs = G3_scratch; // Array of new pcs (input)
2842 Register Oreturn0 = O0;
2843 Register Oreturn1 = O1;
2844 Register O2UnrollBlock = O2;
2845 Register O3array = O3; // Array of frame sizes (input)
2846 Register O4array_size = O4; // number of frames (input)
2847 Register O7frame_size = O7; // number of frames (input)
2848
2849 __ ld_ptr(O3array, 0, O7frame_size);
2850 __ sub(G0, O7frame_size, O7frame_size);
2851 __ save(SP, O7frame_size, SP);
2852 __ ld_ptr(G3pcs, 0, I7); // load frame's new pc
2853
2854 #ifdef ASSERT
2855 // make sure that the frames are aligned properly
2856 #ifndef _LP64
2857 __ btst(wordSize*2-1, SP);
2858 __ breakpoint_trap(Assembler::notZero, Assembler::ptr_cc);
2859 #endif
2860 #endif
2861
2862 // Deopt needs to pass some extra live values from frame to frame
2863
2864 if (deopt) {
2865 __ mov(Oreturn0->after_save(), Oreturn0);
2866 __ mov(Oreturn1->after_save(), Oreturn1);
2867 }
2868
2869 __ mov(O4array_size->after_save(), O4array_size);
2870 __ sub(O4array_size, 1, O4array_size);
2871 __ mov(O3array->after_save(), O3array);
2872 __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
2873 __ add(G3pcs, wordSize, G3pcs); // point to next pc value
2874
2875 #ifdef ASSERT
2876 // trash registers to show a clear pattern in backtraces
2877 __ set(0xDEAD0000, I0);
2878 __ add(I0, 2, I1);
2879 __ add(I0, 4, I2);
2957 }
2958
2959 //------------------------------generate_deopt_blob----------------------------
2960 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2961 // instead.
2962 void SharedRuntime::generate_deopt_blob() {
2963 // allocate space for the code
2964 ResourceMark rm;
2965 // setup code generation tools
2966 int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
2967 #ifdef ASSERT
2968 if (UseStackBanging) {
2969 pad += (JavaThread::stack_shadow_zone_size() / os::vm_page_size())*16 + 32;
2970 }
2971 #endif
2972 #if INCLUDE_JVMCI
2973 if (EnableJVMCI) {
2974 pad += 1000; // Increase the buffer size when compiling for JVMCI
2975 }
2976 #endif
2977 #ifdef _LP64
2978 CodeBuffer buffer("deopt_blob", 2100+pad, 512);
2979 #else
2980 // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread)
2981 // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread)
2982 CodeBuffer buffer("deopt_blob", 1600+pad, 512);
2983 #endif /* _LP64 */
2984 MacroAssembler* masm = new MacroAssembler(&buffer);
2985 FloatRegister Freturn0 = F0;
2986 Register Greturn1 = G1;
2987 Register Oreturn0 = O0;
2988 Register Oreturn1 = O1;
2989 Register O2UnrollBlock = O2;
2990 Register L0deopt_mode = L0;
2991 Register G4deopt_mode = G4_scratch;
2992 int frame_size_words;
2993 Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS);
2994 #if !defined(_LP64) && defined(COMPILER2)
2995 Address saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS);
2996 #endif
2997 Label cont;
2998
2999 OopMapSet *oop_maps = new OopMapSet();
3000
3001 //
3002 // This is the entry point for code which is returning to a de-optimized
3003 // frame.
3004 // The steps taken by this frame are as follows:
3005 // - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1)
3006 // and all potentially live registers (at a pollpoint many registers can be live).
3007 //
3008 // - call the C routine: Deoptimization::fetch_unroll_info (this function
3009 // returns information about the number and size of interpreter frames
3010 // which are equivalent to the frame which is being deoptimized)
3011 // - deallocate the unpack frame, restoring only results values. Other
3012 // volatile registers will now be captured in the vframeArray as needed.
3013 // - deallocate the deoptimization frame
3014 // - in a loop using the information returned in the previous step
3015 // push new interpreter frames (take care to propagate the return
3016 // values through each new frame pushed)
3188 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
3189 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset()));
3190 __ bind(noException);
3191
3192 // deallocate the deoptimization frame taking care to preserve the return values
3193 __ mov(Oreturn0, Oreturn0->after_save());
3194 __ mov(Oreturn1, Oreturn1->after_save());
3195 __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
3196 __ restore();
3197
3198 // Allocate new interpreter frame(s) and possible c2i adapter frame
3199
3200 make_new_frames(masm, true);
3201
3202 // push a dummy "unpack_frame" taking care of float return values and
3203 // call Deoptimization::unpack_frames to have the unpacker layout
3204 // information in the interpreter frames just created and then return
3205 // to the interpreter entry point
3206 __ save(SP, -frame_size_words*wordSize, SP);
3207 __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr);
3208 #if !defined(_LP64)
3209 #if defined(COMPILER2)
3210 // 32-bit 1-register longs return longs in G1
3211 __ stx(Greturn1, saved_Greturn1_addr);
3212 #endif
3213 __ set_last_Java_frame(SP, noreg);
3214 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4deopt_mode);
3215 #else
3216 // LP64 uses g4 in set_last_Java_frame
3217 __ mov(G4deopt_mode, O1);
3218 __ set_last_Java_frame(SP, G0);
3219 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);
3220 #endif
3221 __ reset_last_Java_frame();
3222 __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);
3223
3224 #if !defined(_LP64) && defined(COMPILER2)
3225 // In 32 bit, C2 returns longs in G1 so restore the saved G1 into
3226 // I0/I1 if the return value is long.
3227 Label not_long;
3228 __ cmp_and_br_short(O0,T_LONG, Assembler::notEqual, Assembler::pt, not_long);
3229 __ ldd(saved_Greturn1_addr,I0);
3230 __ bind(not_long);
3231 #endif
3232 __ ret();
3233 __ delayed()->restore();
3234
3235 masm->flush();
3236 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
3237 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3238 #if INCLUDE_JVMCI
3239 if (EnableJVMCI) {
3240 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
3241 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
3242 }
3243 #endif
3244 }
3245
3246 #ifdef COMPILER2
3247
3248 //------------------------------generate_uncommon_trap_blob--------------------
3249 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
3250 // instead.
3251 void SharedRuntime::generate_uncommon_trap_blob() {
3252 // allocate space for the code
3253 ResourceMark rm;
3254 // setup code generation tools
3255 int pad = VerifyThread ? 512 : 0;
3256 #ifdef ASSERT
3257 if (UseStackBanging) {
3258 pad += (JavaThread::stack_shadow_zone_size() / os::vm_page_size())*16 + 32;
3259 }
3260 #endif
3261 #ifdef _LP64
3262 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
3263 #else
3264 // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
3265 // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
3266 CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
3267 #endif
3268 MacroAssembler* masm = new MacroAssembler(&buffer);
3269 Register O2UnrollBlock = O2;
3270 Register O2klass_index = O2;
3271
3272 //
3273 // This is the entry point for all traps the compiler takes when it thinks
3274 // it cannot handle further execution of compilation code. The frame is
3275 // deoptimized in these cases and converted into interpreter frames for
3276 // execution
3277 // The steps taken by this frame are as follows:
3278 // - push a fake "unpack_frame"
3279 // - call the C routine Deoptimization::uncommon_trap (this function
3280 // packs the current compiled frame into vframe arrays and returns
3281 // information about the number and size of interpreter frames which
3282 // are equivalent to the frame which is being deoptimized)
3283 // - deallocate the "unpack_frame"
3284 // - deallocate the deoptimization frame
3285 // - in a loop using the information returned in the previous step
3286 // push interpreter frames;
3287 // - create a dummy "unpack_frame"
|
1 /*
2 * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
110
111 static void restore_result_registers(MacroAssembler* masm);
112 };
113
114 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
115 // Record volatile registers as callee-save values in an OopMap so their save locations will be
116 // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for
117 // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers
118 // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame
119 // (as the stub's I's) when the runtime routine called by the stub creates its frame.
120 int i;
121 // Always make the frame size 16 byte aligned.
122 int frame_size = round_to(additional_frame_words + register_save_size, 16);
123 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words
124 int frame_size_in_slots = frame_size / sizeof(jint);
125 // CodeBlob frame size is in words.
126 *total_frame_words = frame_size / wordSize;
127 // OopMap* map = new OopMap(*total_frame_words, 0);
128 OopMap* map = new OopMap(frame_size_in_slots, 0);
129
130 __ save(SP, -frame_size, SP);
131
132
133 int debug_offset = 0;
134 // Save the G's
135 __ stx(G1, SP, g1_offset+STACK_BIAS);
136 map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg());
137
138 __ stx(G3, SP, g3_offset+STACK_BIAS);
139 map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg());
140
141 __ stx(G4, SP, g4_offset+STACK_BIAS);
142 map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg());
143
144 __ stx(G5, SP, g5_offset+STACK_BIAS);
145 map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg());
146
147 // This is really a waste but we'll keep things as they were for now
148 if (true) {
149 }
150
151
152 // Save the flags
153 __ rdccr( G5 );
154 __ stx(G5, SP, ccr_offset+STACK_BIAS);
155 __ stxfsr(SP, fsr_offset+STACK_BIAS);
156
157 // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles)
158 int offset = d00_offset;
159 for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
160 FloatRegister f = as_FloatRegister(i);
161 __ stf(FloatRegisterImpl::D, f, SP, offset+STACK_BIAS);
162 // Record as callee saved both halves of double registers (2 float registers).
163 map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg());
164 map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next());
165 offset += sizeof(double);
166 }
167
168 // And we're done.
175 // saved.
176 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
177
178 // Restore all the FP registers
179 for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
180 __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i));
181 }
182
183 __ ldx(SP, ccr_offset+STACK_BIAS, G1);
184 __ wrccr (G1) ;
185
186 // Restore the G's
187 // Note that G2 (AKA GThread) must be saved and restored separately.
188 // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr.
189
190 __ ldx(SP, g1_offset+STACK_BIAS, G1);
191 __ ldx(SP, g3_offset+STACK_BIAS, G3);
192 __ ldx(SP, g4_offset+STACK_BIAS, G4);
193 __ ldx(SP, g5_offset+STACK_BIAS, G5);
194
195 // Restore flags
196
197 __ ldxfsr(SP, fsr_offset+STACK_BIAS);
198
199 __ restore();
200
201 }
202
203 // Pop the current frame and restore the registers that might be holding
204 // a result.
205 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
206
207 __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0));
208
209 __ restore();
210
211 }
212
213 // Is vector's size (in bytes) bigger than a size saved by default?
214 // 8 bytes FP registers are saved by default on SPARC.
215 bool SharedRuntime::is_wide_vector(int size) {
216 // Note, MaxVectorSize == 8 on SPARC.
217 assert(size <= 8, "%d bytes vectors are not supported", size);
218 return size > 8;
219 }
220
221 size_t SharedRuntime::trampoline_size() {
222 return 40;
223 }
224
225 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
226 __ set((intptr_t)destination, G3_scratch);
227 __ JMP(G3_scratch, 0);
228 __ delayed()->nop();
229 }
230
287 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
288 VMRegPair *regs,
289 int total_args_passed,
290 int is_outgoing) {
291 assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
292
293 const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
294 const int flt_reg_max = 8;
295
296 int int_reg = 0;
297 int flt_reg = 0;
298 int slot = 0;
299
300 for (int i = 0; i < total_args_passed; i++) {
301 switch (sig_bt[i]) {
302 case T_INT:
303 case T_SHORT:
304 case T_CHAR:
305 case T_BYTE:
306 case T_BOOLEAN:
307 if (int_reg < int_reg_max) {
308 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
309 regs[i].set1(r->as_VMReg());
310 } else {
311 regs[i].set1(VMRegImpl::stack2reg(slot++));
312 }
313 break;
314
315 case T_LONG:
316 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting VOID in other half");
317 // fall-through
318 case T_OBJECT:
319 case T_ARRAY:
320 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
321 if (int_reg < int_reg_max) {
322 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
323 regs[i].set2(r->as_VMReg());
324 } else {
325 slot = round_to(slot, 2); // align
326 regs[i].set2(VMRegImpl::stack2reg(slot));
327 slot += 2;
328 }
329 break;
330 break;
331
332 case T_FLOAT:
333 if (flt_reg < flt_reg_max) {
334 FloatRegister r = as_FloatRegister(flt_reg++);
335 regs[i].set1(r->as_VMReg());
336 } else {
337 regs[i].set1(VMRegImpl::stack2reg(slot++));
338 }
339 break;
340
341 case T_DOUBLE:
342 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
343 if (round_to(flt_reg, 2) + 1 < flt_reg_max) {
344 flt_reg = round_to(flt_reg, 2); // align
345 FloatRegister r = as_FloatRegister(flt_reg);
346 regs[i].set2(r->as_VMReg());
347 flt_reg += 2;
348 } else {
349 slot = round_to(slot, 2); // align
416
417
418 // Patch the callers callsite with entry to compiled code if it exists.
419 void AdapterGenerator::patch_callers_callsite() {
420 Label L;
421 __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch);
422 __ br_null(G3_scratch, false, Assembler::pt, L);
423 __ delayed()->nop();
424 // Call into the VM to patch the caller, then jump to compiled callee
425 __ save_frame(4); // Args in compiled layout; do not blow them
426
427 // Must save all the live Gregs the list is:
428 // G1: 1st Long arg (32bit build)
429 // G2: global allocated to TLS
430 // G3: used in inline cache check (scratch)
431 // G4: 2nd Long arg (32bit build);
432 // G5: used in inline cache check (Method*)
433
434 // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops.
435
436 // mov(s,d)
437 __ mov(G1, L1);
438 __ mov(G4, L4);
439 __ mov(G5_method, L5);
440 __ mov(G5_method, O0); // VM needs target method
441 __ mov(I7, O1); // VM needs caller's callsite
442 // Must be a leaf call...
443 // can be very far once the blob has been relocated
444 AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
445 __ relocate(relocInfo::runtime_call_type);
446 __ jumpl_to(dest, O7, O7);
447 __ delayed()->mov(G2_thread, L7_thread_cache);
448 __ mov(L7_thread_cache, G2_thread);
449 __ mov(L1, G1);
450 __ mov(L4, G4);
451 __ mov(L5, G5_method);
452
453 __ restore(); // Restore args
454 __ bind(L);
455 }
456
457
458 RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) {
459 RegisterOrConstant roc(arg_offset(st_off));
460 return __ ensure_simm13_or_reg(roc, Rdisp);
461 }
462
463 RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) {
464 RegisterOrConstant roc(next_arg_offset(st_off));
465 return __ ensure_simm13_or_reg(roc, Rdisp);
466 }
467
468
469 // Stores long into offset pointed to by base
470 void AdapterGenerator::store_c2i_long(Register r, Register base,
471 const int st_off, bool is_stack) {
472 // In V9, longs are given 2 64-bit slots in the interpreter, but the
473 // data is passed in only 1 slot.
474 __ stx(r, base, next_arg_slot(st_off));
475 }
476
477 void AdapterGenerator::store_c2i_object(Register r, Register base,
478 const int st_off) {
479 __ st_ptr (r, base, arg_slot(st_off));
480 }
481
482 void AdapterGenerator::store_c2i_int(Register r, Register base,
483 const int st_off) {
484 __ st (r, base, arg_slot(st_off));
485 }
486
487 // Stores into offset pointed to by base
488 void AdapterGenerator::store_c2i_double(VMReg r_2,
489 VMReg r_1, Register base, const int st_off) {
490 // In V9, doubles are given 2 64-bit slots in the interpreter, but the
491 // data is passed in only 1 slot.
492 __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
493 }
494
495 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
496 const int st_off) {
497 __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off));
498 }
499
500 void AdapterGenerator::gen_c2i_adapter(
501 int total_args_passed,
502 // VMReg max_arg,
503 int comp_args_on_stack, // VMRegStackSlots
504 const BasicType *sig_bt,
505 const VMRegPair *regs,
506 Label& L_skip_fixup) {
507
508 // Before we get into the guts of the C2I adapter, see if we should be here
509 // at all. We've come from compiled code and are attempting to jump to the
510 // interpreter, which means the caller made a static call to get here
511 // (vcalls always get a compiled target if there is one). Check for a
512 // compiled target. If there is one, we need to patch the caller's call.
779
780 // Load in argument order going down.
781 const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
782 set_Rdisp(G1_scratch);
783
784 VMReg r_1 = regs[i].first();
785 VMReg r_2 = regs[i].second();
786 if (!r_1->is_valid()) {
787 assert(!r_2->is_valid(), "");
788 continue;
789 }
790 if (r_1->is_stack()) { // Pretend stack targets are loaded into F8/F9
791 r_1 = F8->as_VMReg(); // as part of the load/store shuffle
792 if (r_2->is_valid()) r_2 = r_1->next();
793 }
794 if (r_1->is_Register()) { // Register argument
795 Register r = r_1->as_Register()->after_restore();
796 if (!r_2->is_valid()) {
797 __ ld(Gargs, arg_slot(ld_off), r);
798 } else {
799 // In V9, longs are given 2 64-bit slots in the interpreter, but the
800 // data is passed in only 1 slot.
801 RegisterOrConstant slot = (sig_bt[i] == T_LONG) ?
802 next_arg_slot(ld_off) : arg_slot(ld_off);
803 __ ldx(Gargs, slot, r);
804 }
805 } else {
806 assert(r_1->is_FloatRegister(), "");
807 if (!r_2->is_valid()) {
808 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister());
809 } else {
810 // In V9, doubles are given 2 64-bit slots in the interpreter, but the
811 // data is passed in only 1 slot. This code also handles longs that
812 // are passed on the stack, but need a stack-to-stack move through a
813 // spare float register.
814 RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
815 next_arg_slot(ld_off) : arg_slot(ld_off);
816 __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister());
817 }
818 }
819 // Was the argument really intended to be on the stack, but was loaded
820 // into F8/F9?
821 if (regs[i].first()->is_stack()) {
822 assert(r_1->as_FloatRegister() == F8, "fix this code");
823 // Convert stack slot to an SP offset
824 int st_off = reg2offset(regs[i].first()) + STACK_BIAS;
825 // Store down the shuffled stack word. Target address _is_ aligned.
826 RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp);
827 if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot);
828 else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);
829 }
830 }
831
832 // Jump to the compiled code just as if compiled code was doing it.
833 __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3);
834 #if INCLUDE_JVMCI
835 if (EnableJVMCI) {
836 // check if this call should be routed towards a specific entry point
969
970 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
971 VMRegPair *regs,
972 VMRegPair *regs2,
973 int total_args_passed) {
974 assert(regs2 == NULL, "not needed on sparc");
975
976 // Return the number of VMReg stack_slots needed for the args.
977 // This value does not include an abi space (like register window
978 // save area).
979
980 // The native convention is V8 if !LP64
981 // The LP64 convention is the V9 convention which is slightly more sane.
982
983 // We return the amount of VMReg stack slots we need to reserve for all
984 // the arguments NOT counting out_preserve_stack_slots. Since we always
985 // have space for storing at least 6 registers to memory we start with that.
986 // See int_stk_helper for a further discussion.
987 int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots();
988
989 // V9 convention: All things "as-if" on double-wide stack slots.
990 // Hoist any int/ptr/long's in the first 6 to int regs.
991 // Hoist any flt/dbl's in the first 16 dbl regs.
992 int j = 0; // Count of actual args, not HALVES
993 VMRegPair param_array_reg; // location of the argument in the parameter array
994 for (int i = 0; i < total_args_passed; i++, j++) {
995 param_array_reg.set_bad();
996 switch (sig_bt[i]) {
997 case T_BOOLEAN:
998 case T_BYTE:
999 case T_CHAR:
1000 case T_INT:
1001 case T_SHORT:
1002 regs[i].set1(int_stk_helper(j));
1003 break;
1004 case T_LONG:
1005 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
1006 case T_ADDRESS: // raw pointers, like current thread, for VM calls
1007 case T_ARRAY:
1008 case T_OBJECT:
1052 case T_VOID:
1053 regs[i].set_bad();
1054 j--;
1055 break; // Do not count HALVES
1056 default:
1057 ShouldNotReachHere();
1058 }
1059 // Keep track of the deepest parameter array slot.
1060 if (!param_array_reg.first()->is_valid()) {
1061 param_array_reg = regs[i];
1062 }
1063 if (param_array_reg.first()->is_stack()) {
1064 int off = param_array_reg.first()->reg2stack();
1065 if (off > max_stack_slots) max_stack_slots = off;
1066 }
1067 if (param_array_reg.second()->is_stack()) {
1068 int off = param_array_reg.second()->reg2stack();
1069 if (off > max_stack_slots) max_stack_slots = off;
1070 }
1071 }
1072 return round_to(max_stack_slots + 1, 2);
1073
1074 }
1075
1076
1077 // ---------------------------------------------------------------------------
1078 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1079 switch (ret_type) {
1080 case T_FLOAT:
1081 __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS);
1082 break;
1083 case T_DOUBLE:
1084 __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS);
1085 break;
1086 }
1087 }
1088
1089 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1090 switch (ret_type) {
1091 case T_FLOAT:
1179 }
1180
1181
1182 // An oop arg. Must pass a handle not the oop itself
1183 static void object_move(MacroAssembler* masm,
1184 OopMap* map,
1185 int oop_handle_offset,
1186 int framesize_in_slots,
1187 VMRegPair src,
1188 VMRegPair dst,
1189 bool is_receiver,
1190 int* receiver_offset) {
1191
1192 // must pass a handle. First figure out the location we use as a handle
1193
1194 if (src.first()->is_stack()) {
1195 // Oop is already on the stack
1196 Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register();
1197 __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle);
1198 __ ld_ptr(rHandle, 0, L4);
1199 __ movr( Assembler::rc_z, L4, G0, rHandle );
1200 if (dst.first()->is_stack()) {
1201 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
1202 }
1203 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1204 if (is_receiver) {
1205 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
1206 }
1207 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1208 } else {
1209 // Oop is in an input register pass we must flush it to the stack
1210 const Register rOop = src.first()->as_Register();
1211 const Register rHandle = L5;
1212 int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset;
1213 int offset = oop_slot * VMRegImpl::stack_slot_size;
1214 __ st_ptr(rOop, SP, offset + STACK_BIAS);
1215 if (is_receiver) {
1216 *receiver_offset = offset;
1217 }
1218 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1219 __ add(SP, offset + STACK_BIAS, rHandle);
1220 __ movr( Assembler::rc_z, rOop, G0, rHandle );
1221
1222 if (dst.first()->is_stack()) {
1223 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
1224 } else {
1225 __ mov(rHandle, dst.first()->as_Register());
1226 }
1227 }
1228 }
1229
1230 // A float arg may have to do float reg int reg conversion
1231 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1232 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1233
1234 if (src.first()->is_stack()) {
1235 if (dst.first()->is_stack()) {
1236 // stack to stack the easiest of the bunch
1237 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
1238 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
1239 } else {
1240 // stack to reg
1831 __ delayed()->mov(obj_reg, hash);
1832 }
1833
1834 // Read the header and build a mask to get its hash field. Give up if the object is not unlocked.
1835 // We depend on hash_mask being at most 32 bits and avoid the use of
1836 // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
1837 // vm: see markOop.hpp.
1838 __ ld_ptr(obj_reg, oopDesc::mark_offset_in_bytes(), header);
1839 __ sethi(markOopDesc::hash_mask, mask);
1840 __ btst(markOopDesc::unlocked_value, header);
1841 __ br(Assembler::zero, false, Assembler::pn, slowCase);
1842 if (UseBiasedLocking) {
1843 // Check if biased and fall through to runtime if so
1844 __ delayed()->nop();
1845 __ btst(markOopDesc::biased_lock_bit_in_place, header);
1846 __ br(Assembler::notZero, false, Assembler::pn, slowCase);
1847 }
1848 __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask);
1849
1850 // Check for a valid (non-zero) hash code and get its value.
1851 __ srlx(header, markOopDesc::hash_shift, hash);
1852 __ andcc(hash, mask, hash);
1853 __ br(Assembler::equal, false, Assembler::pn, slowCase);
1854 __ delayed()->nop();
1855
1856 // leaf return.
1857 __ bind(done);
1858 __ retl();
1859 __ delayed()->mov(hash, result);
1860 __ bind(slowCase);
1861 }
1862 #endif // COMPILER1
1863
1864
1865 // We have received a description of where all the java arg are located
1866 // on entry to the wrapper. We need to convert these args to where
1867 // the jni function will expect them. To figure out where they go
1868 // we convert the java signature to a C signature by inserting
1869 // the hidden arguments as arg[0] and possibly arg[1] (static method)
1870
1871 const int total_in_args = method->size_of_parameters();
2167 if (method->is_static() && !is_critical_native) {
2168 __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), O1);
2169
2170 // Now handlize the static class mirror in O1. It's known not-null.
2171 __ st_ptr(O1, SP, klass_offset + STACK_BIAS);
2172 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2173 __ add(SP, klass_offset + STACK_BIAS, O1);
2174 }
2175
2176
2177 const Register L6_handle = L6;
2178
2179 if (method->is_synchronized()) {
2180 assert(!is_critical_native, "unhandled");
2181 __ mov(O1, L6_handle);
2182 }
2183
2184 // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
2185 // except O6/O7. So if we must call out we must push a new frame. We immediately
2186 // push a new frame and flush the windows.
2187 intptr_t thepc = (intptr_t) __ pc();
2188 {
2189 address here = __ pc();
2190 // Call the next instruction
2191 __ call(here + 8, relocInfo::none);
2192 __ delayed()->nop();
2193 }
2194
2195 // We use the same pc/oopMap repeatedly when we call out
2196 oop_maps->add_gc_map(thepc - start, map);
2197
2198 // O7 now has the pc loaded that we will use when we finally call to native.
2199
2200 // Save thread in L7; it crosses a bunch of VM calls below
2201 // Don't use save_thread because it smashes G2 and we merely
2202 // want to save a copy
2203 __ mov(G2_thread, L7_thread_cache);
2204
2205
2206 // If we create an inner frame once is plenty
2207 // when we create it we must also save G2_thread
2208 bool inner_frame_created = false;
2209
2210 // dtrace method entry support
2211 {
2212 SkipIfEqual skip_if(
2213 masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
2308 // either as the flush traps and the current window goes too.
2309 __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
2310 __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
2311 }
2312
2313 // get JNIEnv* which is first argument to native
2314 if (!is_critical_native) {
2315 __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
2316 }
2317
2318 // Use that pc we placed in O7 a while back as the current frame anchor
2319 __ set_last_Java_frame(SP, O7);
2320
2321 // We flushed the windows ages ago now mark them as flushed before transitioning.
2322 __ set(JavaFrameAnchor::flushed, G3_scratch);
2323 __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
2324
2325 // Transition from _thread_in_Java to _thread_in_native.
2326 __ set(_thread_in_native, G3_scratch);
2327
2328 AddressLiteral dest(native_func);
2329 __ relocate(relocInfo::runtime_call_type);
2330 __ jumpl_to(dest, O7, O7);
2331 __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
2332
2333 __ restore_thread(L7_thread_cache); // restore G2_thread
2334
2335 // Unpack native results. For int-types, we do any needed sign-extension
2336 // and move things into I0. The return value there will survive any VM
2337 // calls for blocking or unlocking. An FP or OOP result (handle) is done
2338 // specially in the slow-path code.
2339 switch (ret_type) {
2340 case T_VOID: break; // Nothing to do!
2341 case T_FLOAT: break; // Got it where we want it (unless slow-path)
2342 case T_DOUBLE: break; // Got it where we want it (unless slow-path)
2343 // In 64 bits build result is in O0, in O0, O1 in 32bit build
2344 case T_LONG:
2345 // Fall thru
2346 case T_OBJECT: // Really a handle
2347 case T_ARRAY:
2348 case T_INT:
2349 __ mov(O0, I0);
2350 break;
2351 case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false
2352 case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, I0); break;
2353 case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, I0); break; // cannot use and3, 0xFFFF too big as immediate value!
2354 case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, I0); break;
2355 break; // Cannot de-handlize until after reclaiming jvm_lock
2356 default:
2357 ShouldNotReachHere();
2358 }
2359
2360 Label after_transition;
2361 // must we block?
2362
2363 // Block, if necessary, before resuming in _thread_in_Java state.
2364 // In order for GC to work, don't clear the last_Java_sp until after blocking.
2530 __ verify_oop(I0);
2531 }
2532
2533 if (CheckJNICalls) {
2534 // clear_pending_jni_exception_check
2535 __ st_ptr(G0, G2_thread, JavaThread::pending_jni_exception_check_fn_offset());
2536 }
2537
2538 if (!is_critical_native) {
2539 // reset handle block
2540 __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
2541 __ st(G0, L5, JNIHandleBlock::top_offset_in_bytes());
2542
2543 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
2544 check_forward_pending_exception(masm, G3_scratch);
2545 }
2546
2547
2548 // Return
2549
2550 __ ret();
2551 __ delayed()->restore();
2552
2553 __ flush();
2554
2555 nmethod *nm = nmethod::new_native_nmethod(method,
2556 compile_id,
2557 masm->code(),
2558 vep_offset,
2559 frame_complete,
2560 stack_slots / VMRegImpl::slots_per_word,
2561 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2562 in_ByteSize(lock_offset),
2563 oop_maps);
2564
2565 if (is_critical_native) {
2566 nm->set_lazy_critical_native(true);
2567 }
2568 return nm;
2569
2591
2592 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
2593 //
2594 // Common out the new frame generation for deopt and uncommon trap
2595 //
2596 Register G3pcs = G3_scratch; // Array of new pcs (input)
2597 Register Oreturn0 = O0;
2598 Register Oreturn1 = O1;
2599 Register O2UnrollBlock = O2;
2600 Register O3array = O3; // Array of frame sizes (input)
2601 Register O4array_size = O4; // number of frames (input)
2602 Register O7frame_size = O7; // number of frames (input)
2603
2604 __ ld_ptr(O3array, 0, O7frame_size);
2605 __ sub(G0, O7frame_size, O7frame_size);
2606 __ save(SP, O7frame_size, SP);
2607 __ ld_ptr(G3pcs, 0, I7); // load frame's new pc
2608
2609 #ifdef ASSERT
2610 // make sure that the frames are aligned properly
2611 #endif
2612
2613 // Deopt needs to pass some extra live values from frame to frame
2614
2615 if (deopt) {
2616 __ mov(Oreturn0->after_save(), Oreturn0);
2617 __ mov(Oreturn1->after_save(), Oreturn1);
2618 }
2619
2620 __ mov(O4array_size->after_save(), O4array_size);
2621 __ sub(O4array_size, 1, O4array_size);
2622 __ mov(O3array->after_save(), O3array);
2623 __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
2624 __ add(G3pcs, wordSize, G3pcs); // point to next pc value
2625
2626 #ifdef ASSERT
2627 // trash registers to show a clear pattern in backtraces
2628 __ set(0xDEAD0000, I0);
2629 __ add(I0, 2, I1);
2630 __ add(I0, 4, I2);
2708 }
2709
2710 //------------------------------generate_deopt_blob----------------------------
2711 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2712 // instead.
2713 void SharedRuntime::generate_deopt_blob() {
2714 // allocate space for the code
2715 ResourceMark rm;
2716 // setup code generation tools
2717 int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
2718 #ifdef ASSERT
2719 if (UseStackBanging) {
2720 pad += (JavaThread::stack_shadow_zone_size() / os::vm_page_size())*16 + 32;
2721 }
2722 #endif
2723 #if INCLUDE_JVMCI
2724 if (EnableJVMCI) {
2725 pad += 1000; // Increase the buffer size when compiling for JVMCI
2726 }
2727 #endif
2728 CodeBuffer buffer("deopt_blob", 2100+pad, 512);
2729 MacroAssembler* masm = new MacroAssembler(&buffer);
2730 FloatRegister Freturn0 = F0;
2731 Register Greturn1 = G1;
2732 Register Oreturn0 = O0;
2733 Register Oreturn1 = O1;
2734 Register O2UnrollBlock = O2;
2735 Register L0deopt_mode = L0;
2736 Register G4deopt_mode = G4_scratch;
2737 int frame_size_words;
2738 Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS);
2739 Label cont;
2740
2741 OopMapSet *oop_maps = new OopMapSet();
2742
2743 //
2744 // This is the entry point for code which is returning to a de-optimized
2745 // frame.
2746 // The steps taken by this frame are as follows:
2747 // - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1)
2748 // and all potentially live registers (at a pollpoint many registers can be live).
2749 //
2750 // - call the C routine: Deoptimization::fetch_unroll_info (this function
2751 // returns information about the number and size of interpreter frames
2752 // which are equivalent to the frame which is being deoptimized)
2753 // - deallocate the unpack frame, restoring only results values. Other
2754 // volatile registers will now be captured in the vframeArray as needed.
2755 // - deallocate the deoptimization frame
2756 // - in a loop using the information returned in the previous step
2757 // push new interpreter frames (take care to propagate the return
2758 // values through each new frame pushed)
2930 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
2931 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset()));
2932 __ bind(noException);
2933
2934 // deallocate the deoptimization frame taking care to preserve the return values
2935 __ mov(Oreturn0, Oreturn0->after_save());
2936 __ mov(Oreturn1, Oreturn1->after_save());
2937 __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
2938 __ restore();
2939
2940 // Allocate new interpreter frame(s) and possible c2i adapter frame
2941
2942 make_new_frames(masm, true);
2943
2944 // push a dummy "unpack_frame" taking care of float return values and
2945 // call Deoptimization::unpack_frames to have the unpacker layout
2946 // information in the interpreter frames just created and then return
2947 // to the interpreter entry point
2948 __ save(SP, -frame_size_words*wordSize, SP);
2949 __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr);
2950 // LP64 uses g4 in set_last_Java_frame
2951 __ mov(G4deopt_mode, O1);
2952 __ set_last_Java_frame(SP, G0);
2953 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);
2954 __ reset_last_Java_frame();
2955 __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);
2956
2957 __ ret();
2958 __ delayed()->restore();
2959
2960 masm->flush();
2961 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
2962 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2963 #if INCLUDE_JVMCI
2964 if (EnableJVMCI) {
2965 _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
2966 _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
2967 }
2968 #endif
2969 }
2970
2971 #ifdef COMPILER2
2972
2973 //------------------------------generate_uncommon_trap_blob--------------------
2974 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2975 // instead.
2976 void SharedRuntime::generate_uncommon_trap_blob() {
2977 // allocate space for the code
2978 ResourceMark rm;
2979 // setup code generation tools
2980 int pad = VerifyThread ? 512 : 0;
2981 #ifdef ASSERT
2982 if (UseStackBanging) {
2983 pad += (JavaThread::stack_shadow_zone_size() / os::vm_page_size())*16 + 32;
2984 }
2985 #endif
2986 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
2987 MacroAssembler* masm = new MacroAssembler(&buffer);
2988 Register O2UnrollBlock = O2;
2989 Register O2klass_index = O2;
2990
2991 //
2992 // This is the entry point for all traps the compiler takes when it thinks
2993 // it cannot handle further execution of compilation code. The frame is
2994 // deoptimized in these cases and converted into interpreter frames for
2995 // execution
2996 // The steps taken by this frame are as follows:
2997 // - push a fake "unpack_frame"
2998 // - call the C routine Deoptimization::uncommon_trap (this function
2999 // packs the current compiled frame into vframe arrays and returns
3000 // information about the number and size of interpreter frames which
3001 // are equivalent to the frame which is being deoptimized)
3002 // - deallocate the "unpack_frame"
3003 // - deallocate the deoptimization frame
3004 // - in a loop using the information returned in the previous step
3005 // push interpreter frames;
3006 // - create a dummy "unpack_frame"
|