< prev index next >

src/cpu/sparc/vm/sharedRuntime_sparc.cpp

Print this page


   1 /*
   2  * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


 110 
 111   static void restore_result_registers(MacroAssembler* masm);
 112 };
 113 
 114 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
 115   // Record volatile registers as callee-save values in an OopMap so their save locations will be
 116   // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for
 117   // deoptimization; see compiledVFrame::create_stack_value).  The caller's I, L and O registers
 118   // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame
 119   // (as the stub's I's) when the runtime routine called by the stub creates its frame.
 120   int i;
 121   // Always make the frame size 16 byte aligned.
 122   int frame_size = round_to(additional_frame_words + register_save_size, 16);
 123   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words
 124   int frame_size_in_slots = frame_size / sizeof(jint);
 125   // CodeBlob frame size is in words.
 126   *total_frame_words = frame_size / wordSize;
 127   // OopMap* map = new OopMap(*total_frame_words, 0);
 128   OopMap* map = new OopMap(frame_size_in_slots, 0);
 129 
 130 #if !defined(_LP64)
 131 
 132   // Save 64-bit O registers; they will get their heads chopped off on a 'save'.
 133   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
 134   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
 135   __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
 136   __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
 137   __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
 138   __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
 139 #endif /* _LP64 */
 140 
 141   __ save(SP, -frame_size, SP);
 142 
 143 #ifndef _LP64
 144   // Reload the 64 bit Oregs. Although they are now Iregs we load them
 145   // to Oregs here to avoid interrupts cutting off their heads
 146 
 147   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
 148   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
 149   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
 150   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
 151   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
 152   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
 153 
 154   __ stx(O0, SP, o0_offset+STACK_BIAS);
 155   map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg());
 156 
 157   __ stx(O1, SP, o1_offset+STACK_BIAS);
 158 
 159   map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg());
 160 
 161   __ stx(O2, SP, o2_offset+STACK_BIAS);
 162   map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg());
 163 
 164   __ stx(O3, SP, o3_offset+STACK_BIAS);
 165   map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg());
 166 
 167   __ stx(O4, SP, o4_offset+STACK_BIAS);
 168   map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg());
 169 
 170   __ stx(O5, SP, o5_offset+STACK_BIAS);
 171   map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg());
 172 #endif /* _LP64 */
 173 
 174 
 175 #ifdef _LP64
 176   int debug_offset = 0;
 177 #else
 178   int debug_offset = 4;
 179 #endif
 180   // Save the G's
 181   __ stx(G1, SP, g1_offset+STACK_BIAS);
 182   map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg());
 183 
 184   __ stx(G3, SP, g3_offset+STACK_BIAS);
 185   map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg());
 186 
 187   __ stx(G4, SP, g4_offset+STACK_BIAS);
 188   map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg());
 189 
 190   __ stx(G5, SP, g5_offset+STACK_BIAS);
 191   map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg());
 192 
 193   // This is really a waste but we'll keep things as they were for now
 194   if (true) {
 195 #ifndef _LP64
 196     map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next());
 197     map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next());
 198     map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next());
 199     map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next());
 200     map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next());
 201     map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next());
 202     map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next());
 203     map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next());
 204     map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next());
 205     map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next());
 206 #endif /* _LP64 */
 207   }
 208 
 209 
 210   // Save the flags
 211   __ rdccr( G5 );
 212   __ stx(G5, SP, ccr_offset+STACK_BIAS);
 213   __ stxfsr(SP, fsr_offset+STACK_BIAS);
 214 
 215   // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles)
 216   int offset = d00_offset;
 217   for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
 218     FloatRegister f = as_FloatRegister(i);
 219     __ stf(FloatRegisterImpl::D,  f, SP, offset+STACK_BIAS);
 220     // Record as callee saved both halves of double registers (2 float registers).
 221     map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg());
 222     map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next());
 223     offset += sizeof(double);
 224   }
 225 
 226   // And we're done.


 233 // saved.
 234 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
 235 
 236   // Restore all the FP registers
 237   for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
 238     __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i));
 239   }
 240 
 241   __ ldx(SP, ccr_offset+STACK_BIAS, G1);
 242   __ wrccr (G1) ;
 243 
 244   // Restore the G's
 245   // Note that G2 (AKA GThread) must be saved and restored separately.
 246   // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr.
 247 
 248   __ ldx(SP, g1_offset+STACK_BIAS, G1);
 249   __ ldx(SP, g3_offset+STACK_BIAS, G3);
 250   __ ldx(SP, g4_offset+STACK_BIAS, G4);
 251   __ ldx(SP, g5_offset+STACK_BIAS, G5);
 252 
 253 
 254 #if !defined(_LP64)
 255   // Restore the 64-bit O's.
 256   __ ldx(SP, o0_offset+STACK_BIAS, O0);
 257   __ ldx(SP, o1_offset+STACK_BIAS, O1);
 258   __ ldx(SP, o2_offset+STACK_BIAS, O2);
 259   __ ldx(SP, o3_offset+STACK_BIAS, O3);
 260   __ ldx(SP, o4_offset+STACK_BIAS, O4);
 261   __ ldx(SP, o5_offset+STACK_BIAS, O5);
 262 
 263   // And temporarily place them in TLS
 264 
 265   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
 266   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
 267   __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8);
 268   __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8);
 269   __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8);
 270   __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8);
 271 #endif /* _LP64 */
 272 
 273   // Restore flags
 274 
 275   __ ldxfsr(SP, fsr_offset+STACK_BIAS);
 276 
 277   __ restore();
 278 
 279 #if !defined(_LP64)
 280   // Now reload the 64bit Oregs after we've restore the window.
 281   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
 282   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
 283   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2);
 284   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3);
 285   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4);
 286   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5);
 287 #endif /* _LP64 */
 288 
 289 }
 290 
 291 // Pop the current frame and restore the registers that might be holding
 292 // a result.
 293 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 294 
 295 #if !defined(_LP64)
 296   // 32bit build returns longs in G1
 297   __ ldx(SP, g1_offset+STACK_BIAS, G1);
 298 
 299   // Retrieve the 64-bit O's.
 300   __ ldx(SP, o0_offset+STACK_BIAS, O0);
 301   __ ldx(SP, o1_offset+STACK_BIAS, O1);
 302   // and save to TLS
 303   __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8);
 304   __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8);
 305 #endif /* _LP64 */
 306 
 307   __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0));
 308 
 309   __ restore();
 310 
 311 #if !defined(_LP64)
 312   // Now reload the 64bit Oregs after we've restore the window.
 313   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0);
 314   __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1);
 315 #endif /* _LP64 */
 316 
 317 }
 318 
 319 // Is vector's size (in bytes) bigger than a size saved by default?
 320 // 8 bytes FP registers are saved by default on SPARC.
 321 bool SharedRuntime::is_wide_vector(int size) {
 322   // Note, MaxVectorSize == 8 on SPARC.
 323   assert(size <= 8, "%d bytes vectors are not supported", size);
 324   return size > 8;
 325 }
 326 
 327 size_t SharedRuntime::trampoline_size() {
 328   return 40;
 329 }
 330 
 331 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 332   __ set((intptr_t)destination, G3_scratch);
 333   __ JMP(G3_scratch, 0);
 334   __ delayed()->nop();
 335 }
 336 


 393 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 394                                            VMRegPair *regs,
 395                                            int total_args_passed,
 396                                            int is_outgoing) {
 397   assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
 398 
 399   const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
 400   const int flt_reg_max = 8;
 401 
 402   int int_reg = 0;
 403   int flt_reg = 0;
 404   int slot = 0;
 405 
 406   for (int i = 0; i < total_args_passed; i++) {
 407     switch (sig_bt[i]) {
 408     case T_INT:
 409     case T_SHORT:
 410     case T_CHAR:
 411     case T_BYTE:
 412     case T_BOOLEAN:
 413 #ifndef _LP64
 414     case T_OBJECT:
 415     case T_ARRAY:
 416     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
 417 #endif // _LP64
 418       if (int_reg < int_reg_max) {
 419         Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
 420         regs[i].set1(r->as_VMReg());
 421       } else {
 422         regs[i].set1(VMRegImpl::stack2reg(slot++));
 423       }
 424       break;
 425 
 426 #ifdef _LP64
 427     case T_LONG:
 428       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting VOID in other half");
 429       // fall-through
 430     case T_OBJECT:
 431     case T_ARRAY:
 432     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
 433       if (int_reg < int_reg_max) {
 434         Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
 435         regs[i].set2(r->as_VMReg());
 436       } else {
 437         slot = round_to(slot, 2);  // align
 438         regs[i].set2(VMRegImpl::stack2reg(slot));
 439         slot += 2;
 440       }
 441       break;
 442 #else
 443     case T_LONG:
 444       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting VOID in other half");
 445       // On 32-bit SPARC put longs always on the stack to keep the pressure off
 446       // integer argument registers.  They should be used for oops.
 447       slot = round_to(slot, 2);  // align
 448       regs[i].set2(VMRegImpl::stack2reg(slot));
 449       slot += 2;
 450 #endif
 451       break;
 452 
 453     case T_FLOAT:
 454       if (flt_reg < flt_reg_max) {
 455         FloatRegister r = as_FloatRegister(flt_reg++);
 456         regs[i].set1(r->as_VMReg());
 457       } else {
 458         regs[i].set1(VMRegImpl::stack2reg(slot++));
 459       }
 460       break;
 461 
 462     case T_DOUBLE:
 463       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 464       if (round_to(flt_reg, 2) + 1 < flt_reg_max) {
 465         flt_reg = round_to(flt_reg, 2);  // align
 466         FloatRegister r = as_FloatRegister(flt_reg);
 467         regs[i].set2(r->as_VMReg());
 468         flt_reg += 2;
 469       } else {
 470         slot = round_to(slot, 2);  // align


 537 
 538 
 539 // Patch the callers callsite with entry to compiled code if it exists.
 540 void AdapterGenerator::patch_callers_callsite() {
 541   Label L;
 542   __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch);
 543   __ br_null(G3_scratch, false, Assembler::pt, L);
 544   __ delayed()->nop();
 545   // Call into the VM to patch the caller, then jump to compiled callee
 546   __ save_frame(4);     // Args in compiled layout; do not blow them
 547 
 548   // Must save all the live Gregs the list is:
 549   // G1: 1st Long arg (32bit build)
 550   // G2: global allocated to TLS
 551   // G3: used in inline cache check (scratch)
 552   // G4: 2nd Long arg (32bit build);
 553   // G5: used in inline cache check (Method*)
 554 
 555   // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops.
 556 
 557 #ifdef _LP64
 558   // mov(s,d)
 559   __ mov(G1, L1);
 560   __ mov(G4, L4);
 561   __ mov(G5_method, L5);
 562   __ mov(G5_method, O0);         // VM needs target method
 563   __ mov(I7, O1);                // VM needs caller's callsite
 564   // Must be a leaf call...
 565   // can be very far once the blob has been relocated
 566   AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 567   __ relocate(relocInfo::runtime_call_type);
 568   __ jumpl_to(dest, O7, O7);
 569   __ delayed()->mov(G2_thread, L7_thread_cache);
 570   __ mov(L7_thread_cache, G2_thread);
 571   __ mov(L1, G1);
 572   __ mov(L4, G4);
 573   __ mov(L5, G5_method);
 574 #else
 575   __ stx(G1, FP, -8 + STACK_BIAS);
 576   __ stx(G4, FP, -16 + STACK_BIAS);
 577   __ mov(G5_method, L5);
 578   __ mov(G5_method, O0);         // VM needs target method
 579   __ mov(I7, O1);                // VM needs caller's callsite
 580   // Must be a leaf call...
 581   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type);
 582   __ delayed()->mov(G2_thread, L7_thread_cache);
 583   __ mov(L7_thread_cache, G2_thread);
 584   __ ldx(FP, -8 + STACK_BIAS, G1);
 585   __ ldx(FP, -16 + STACK_BIAS, G4);
 586   __ mov(L5, G5_method);
 587 #endif /* _LP64 */
 588 
 589   __ restore();      // Restore args
 590   __ bind(L);
 591 }
 592 
 593 
 594 RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) {
 595   RegisterOrConstant roc(arg_offset(st_off));
 596   return __ ensure_simm13_or_reg(roc, Rdisp);
 597 }
 598 
 599 RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) {
 600   RegisterOrConstant roc(next_arg_offset(st_off));
 601   return __ ensure_simm13_or_reg(roc, Rdisp);
 602 }
 603 
 604 
 605 // Stores long into offset pointed to by base
 606 void AdapterGenerator::store_c2i_long(Register r, Register base,
 607                                       const int st_off, bool is_stack) {
 608 #ifdef _LP64
 609   // In V9, longs are given 2 64-bit slots in the interpreter, but the
 610   // data is passed in only 1 slot.
 611   __ stx(r, base, next_arg_slot(st_off));
 612 #else
 613 #ifdef COMPILER2
 614   // Misaligned store of 64-bit data
 615   __ stw(r, base, arg_slot(st_off));    // lo bits
 616   __ srlx(r, 32, r);
 617   __ stw(r, base, next_arg_slot(st_off));  // hi bits
 618 #else
 619   if (is_stack) {
 620     // Misaligned store of 64-bit data
 621     __ stw(r, base, arg_slot(st_off));    // lo bits
 622     __ srlx(r, 32, r);
 623     __ stw(r, base, next_arg_slot(st_off));  // hi bits
 624   } else {
 625     __ stw(r->successor(), base, arg_slot(st_off)     ); // lo bits
 626     __ stw(r             , base, next_arg_slot(st_off)); // hi bits
 627   }
 628 #endif // COMPILER2
 629 #endif // _LP64
 630 }
 631 
 632 void AdapterGenerator::store_c2i_object(Register r, Register base,
 633                       const int st_off) {
 634   __ st_ptr (r, base, arg_slot(st_off));
 635 }
 636 
 637 void AdapterGenerator::store_c2i_int(Register r, Register base,
 638                    const int st_off) {
 639   __ st (r, base, arg_slot(st_off));
 640 }
 641 
 642 // Stores into offset pointed to by base
 643 void AdapterGenerator::store_c2i_double(VMReg r_2,
 644                       VMReg r_1, Register base, const int st_off) {
 645 #ifdef _LP64
 646   // In V9, doubles are given 2 64-bit slots in the interpreter, but the
 647   // data is passed in only 1 slot.
 648   __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
 649 #else
 650   // Need to marshal 64-bit value from misaligned Lesp loads
 651   __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off));
 652   __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) );
 653 #endif
 654 }
 655 
 656 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
 657                                        const int st_off) {
 658   __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off));
 659 }
 660 
 661 void AdapterGenerator::gen_c2i_adapter(
 662                             int total_args_passed,
 663                             // VMReg max_arg,
 664                             int comp_args_on_stack, // VMRegStackSlots
 665                             const BasicType *sig_bt,
 666                             const VMRegPair *regs,
 667                             Label& L_skip_fixup) {
 668 
 669   // Before we get into the guts of the C2I adapter, see if we should be here
 670   // at all.  We've come from compiled code and are attempting to jump to the
 671   // interpreter, which means the caller made a static call to get here
 672   // (vcalls always get a compiled target if there is one).  Check for a
 673   // compiled target.  If there is one, we need to patch the caller's call.


 940 
 941     // Load in argument order going down.
 942     const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
 943     set_Rdisp(G1_scratch);
 944 
 945     VMReg r_1 = regs[i].first();
 946     VMReg r_2 = regs[i].second();
 947     if (!r_1->is_valid()) {
 948       assert(!r_2->is_valid(), "");
 949       continue;
 950     }
 951     if (r_1->is_stack()) {        // Pretend stack targets are loaded into F8/F9
 952       r_1 = F8->as_VMReg();        // as part of the load/store shuffle
 953       if (r_2->is_valid()) r_2 = r_1->next();
 954     }
 955     if (r_1->is_Register()) {  // Register argument
 956       Register r = r_1->as_Register()->after_restore();
 957       if (!r_2->is_valid()) {
 958         __ ld(Gargs, arg_slot(ld_off), r);
 959       } else {
 960 #ifdef _LP64
 961         // In V9, longs are given 2 64-bit slots in the interpreter, but the
 962         // data is passed in only 1 slot.
 963         RegisterOrConstant slot = (sig_bt[i] == T_LONG) ?
 964               next_arg_slot(ld_off) : arg_slot(ld_off);
 965         __ ldx(Gargs, slot, r);
 966 #else
 967         fatal("longs should be on stack");
 968 #endif
 969       }
 970     } else {
 971       assert(r_1->is_FloatRegister(), "");
 972       if (!r_2->is_valid()) {
 973         __ ldf(FloatRegisterImpl::S, Gargs,      arg_slot(ld_off), r_1->as_FloatRegister());
 974       } else {
 975 #ifdef _LP64
 976         // In V9, doubles are given 2 64-bit slots in the interpreter, but the
 977         // data is passed in only 1 slot.  This code also handles longs that
 978         // are passed on the stack, but need a stack-to-stack move through a
 979         // spare float register.
 980         RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
 981               next_arg_slot(ld_off) : arg_slot(ld_off);
 982         __ ldf(FloatRegisterImpl::D, Gargs,                  slot, r_1->as_FloatRegister());
 983 #else
 984         // Need to marshal 64-bit value from misaligned Lesp loads
 985         __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister());
 986         __ ldf(FloatRegisterImpl::S, Gargs,      arg_slot(ld_off), r_2->as_FloatRegister());
 987 #endif
 988       }
 989     }
 990     // Was the argument really intended to be on the stack, but was loaded
 991     // into F8/F9?
 992     if (regs[i].first()->is_stack()) {
 993       assert(r_1->as_FloatRegister() == F8, "fix this code");
 994       // Convert stack slot to an SP offset
 995       int st_off = reg2offset(regs[i].first()) + STACK_BIAS;
 996       // Store down the shuffled stack word.  Target address _is_ aligned.
 997       RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp);
 998       if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot);
 999       else                  __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);
1000     }
1001   }
1002 
1003   // Jump to the compiled code just as if compiled code was doing it.
1004   __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3);
1005 #if INCLUDE_JVMCI
1006   if (EnableJVMCI) {
1007     // check if this call should be routed towards a specific entry point


1140 
1141 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1142                                          VMRegPair *regs,
1143                                          VMRegPair *regs2,
1144                                          int total_args_passed) {
1145     assert(regs2 == NULL, "not needed on sparc");
1146 
1147     // Return the number of VMReg stack_slots needed for the args.
1148     // This value does not include an abi space (like register window
1149     // save area).
1150 
1151     // The native convention is V8 if !LP64
1152     // The LP64 convention is the V9 convention which is slightly more sane.
1153 
1154     // We return the amount of VMReg stack slots we need to reserve for all
1155     // the arguments NOT counting out_preserve_stack_slots. Since we always
1156     // have space for storing at least 6 registers to memory we start with that.
1157     // See int_stk_helper for a further discussion.
1158     int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots();
1159 
1160 #ifdef _LP64
1161     // V9 convention: All things "as-if" on double-wide stack slots.
1162     // Hoist any int/ptr/long's in the first 6 to int regs.
1163     // Hoist any flt/dbl's in the first 16 dbl regs.
1164     int j = 0;                  // Count of actual args, not HALVES
1165     VMRegPair param_array_reg;  // location of the argument in the parameter array
1166     for (int i = 0; i < total_args_passed; i++, j++) {
1167       param_array_reg.set_bad();
1168       switch (sig_bt[i]) {
1169       case T_BOOLEAN:
1170       case T_BYTE:
1171       case T_CHAR:
1172       case T_INT:
1173       case T_SHORT:
1174         regs[i].set1(int_stk_helper(j));
1175         break;
1176       case T_LONG:
1177         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
1178       case T_ADDRESS: // raw pointers, like current thread, for VM calls
1179       case T_ARRAY:
1180       case T_OBJECT:


1224       case T_VOID:
1225         regs[i].set_bad();
1226         j--;
1227         break; // Do not count HALVES
1228       default:
1229         ShouldNotReachHere();
1230       }
1231       // Keep track of the deepest parameter array slot.
1232       if (!param_array_reg.first()->is_valid()) {
1233         param_array_reg = regs[i];
1234       }
1235       if (param_array_reg.first()->is_stack()) {
1236         int off = param_array_reg.first()->reg2stack();
1237         if (off > max_stack_slots) max_stack_slots = off;
1238       }
1239       if (param_array_reg.second()->is_stack()) {
1240         int off = param_array_reg.second()->reg2stack();
1241         if (off > max_stack_slots) max_stack_slots = off;
1242       }
1243     }
1244 
1245 #else // _LP64
1246     // V8 convention: first 6 things in O-regs, rest on stack.
1247     // Alignment is willy-nilly.
1248     for (int i = 0; i < total_args_passed; i++) {
1249       switch (sig_bt[i]) {
1250       case T_ADDRESS: // raw pointers, like current thread, for VM calls
1251       case T_ARRAY:
1252       case T_BOOLEAN:
1253       case T_BYTE:
1254       case T_CHAR:
1255       case T_FLOAT:
1256       case T_INT:
1257       case T_OBJECT:
1258       case T_METADATA:
1259       case T_SHORT:
1260         regs[i].set1(int_stk_helper(i));
1261         break;
1262       case T_DOUBLE:
1263       case T_LONG:
1264         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1265         regs[i].set_pair(int_stk_helper(i + 1), int_stk_helper(i));
1266         break;
1267       case T_VOID: regs[i].set_bad(); break;
1268       default:
1269         ShouldNotReachHere();
1270       }
1271       if (regs[i].first()->is_stack()) {
1272         int off = regs[i].first()->reg2stack();
1273         if (off > max_stack_slots) max_stack_slots = off;
1274       }
1275       if (regs[i].second()->is_stack()) {
1276         int off = regs[i].second()->reg2stack();
1277         if (off > max_stack_slots) max_stack_slots = off;
1278       }
1279     }
1280 #endif // _LP64
1281 
1282   return round_to(max_stack_slots + 1, 2);
1283 
1284 }
1285 
1286 
1287 // ---------------------------------------------------------------------------
1288 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1289   switch (ret_type) {
1290   case T_FLOAT:
1291     __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS);
1292     break;
1293   case T_DOUBLE:
1294     __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS);
1295     break;
1296   }
1297 }
1298 
1299 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1300   switch (ret_type) {
1301   case T_FLOAT:


1389 }
1390 
1391 
1392 // An oop arg. Must pass a handle not the oop itself
1393 static void object_move(MacroAssembler* masm,
1394                         OopMap* map,
1395                         int oop_handle_offset,
1396                         int framesize_in_slots,
1397                         VMRegPair src,
1398                         VMRegPair dst,
1399                         bool is_receiver,
1400                         int* receiver_offset) {
1401 
1402   // must pass a handle. First figure out the location we use as a handle
1403 
1404   if (src.first()->is_stack()) {
1405     // Oop is already on the stack
1406     Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register();
1407     __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle);
1408     __ ld_ptr(rHandle, 0, L4);
1409 #ifdef _LP64
1410     __ movr( Assembler::rc_z, L4, G0, rHandle );
1411 #else
1412     __ tst( L4 );
1413     __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
1414 #endif
1415     if (dst.first()->is_stack()) {
1416       __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
1417     }
1418     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1419     if (is_receiver) {
1420       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
1421     }
1422     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1423   } else {
1424     // Oop is in an input register pass we must flush it to the stack
1425     const Register rOop = src.first()->as_Register();
1426     const Register rHandle = L5;
1427     int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset;
1428     int offset = oop_slot * VMRegImpl::stack_slot_size;
1429     __ st_ptr(rOop, SP, offset + STACK_BIAS);
1430     if (is_receiver) {
1431        *receiver_offset = offset;
1432     }
1433     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1434     __ add(SP, offset + STACK_BIAS, rHandle);
1435 #ifdef _LP64
1436     __ movr( Assembler::rc_z, rOop, G0, rHandle );
1437 #else
1438     __ tst( rOop );
1439     __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle );
1440 #endif
1441 
1442     if (dst.first()->is_stack()) {
1443       __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
1444     } else {
1445       __ mov(rHandle, dst.first()->as_Register());
1446     }
1447   }
1448 }
1449 
1450 // A float arg may have to do float reg int reg conversion
1451 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1452   assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1453 
1454   if (src.first()->is_stack()) {
1455     if (dst.first()->is_stack()) {
1456       // stack to stack the easiest of the bunch
1457       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
1458       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
1459     } else {
1460       // stack to reg


2051       __ delayed()->mov(obj_reg, hash);
2052     }
2053 
2054     // Read the header and build a mask to get its hash field.  Give up if the object is not unlocked.
2055     // We depend on hash_mask being at most 32 bits and avoid the use of
2056     // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
2057     // vm: see markOop.hpp.
2058     __ ld_ptr(obj_reg, oopDesc::mark_offset_in_bytes(), header);
2059     __ sethi(markOopDesc::hash_mask, mask);
2060     __ btst(markOopDesc::unlocked_value, header);
2061     __ br(Assembler::zero, false, Assembler::pn, slowCase);
2062     if (UseBiasedLocking) {
2063       // Check if biased and fall through to runtime if so
2064       __ delayed()->nop();
2065       __ btst(markOopDesc::biased_lock_bit_in_place, header);
2066       __ br(Assembler::notZero, false, Assembler::pn, slowCase);
2067     }
2068     __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask);
2069 
2070     // Check for a valid (non-zero) hash code and get its value.
2071 #ifdef _LP64
2072     __ srlx(header, markOopDesc::hash_shift, hash);
2073 #else
2074     __ srl(header, markOopDesc::hash_shift, hash);
2075 #endif
2076     __ andcc(hash, mask, hash);
2077     __ br(Assembler::equal, false, Assembler::pn, slowCase);
2078     __ delayed()->nop();
2079 
2080     // leaf return.
2081     __ bind(done);
2082     __ retl();
2083     __ delayed()->mov(hash, result);
2084     __ bind(slowCase);
2085   }
2086 #endif // COMPILER1
2087 
2088 
2089   // We have received a description of where all the java arg are located
2090   // on entry to the wrapper. We need to convert these args to where
2091   // the jni function will expect them. To figure out where they go
2092   // we convert the java signature to a C signature by inserting
2093   // the hidden arguments as arg[0] and possibly arg[1] (static method)
2094 
2095   const int total_in_args = method->size_of_parameters();


2391   if (method->is_static() && !is_critical_native) {
2392     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), O1);
2393 
2394     // Now handlize the static class mirror in O1.  It's known not-null.
2395     __ st_ptr(O1, SP, klass_offset + STACK_BIAS);
2396     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2397     __ add(SP, klass_offset + STACK_BIAS, O1);
2398   }
2399 
2400 
2401   const Register L6_handle = L6;
2402 
2403   if (method->is_synchronized()) {
2404     assert(!is_critical_native, "unhandled");
2405     __ mov(O1, L6_handle);
2406   }
2407 
2408   // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
2409   // except O6/O7. So if we must call out we must push a new frame. We immediately
2410   // push a new frame and flush the windows.
2411 #ifdef _LP64
2412   intptr_t thepc = (intptr_t) __ pc();
2413   {
2414     address here = __ pc();
2415     // Call the next instruction
2416     __ call(here + 8, relocInfo::none);
2417     __ delayed()->nop();
2418   }
2419 #else
2420   intptr_t thepc = __ load_pc_address(O7, 0);
2421 #endif /* _LP64 */
2422 
2423   // We use the same pc/oopMap repeatedly when we call out
2424   oop_maps->add_gc_map(thepc - start, map);
2425 
2426   // O7 now has the pc loaded that we will use when we finally call to native.
2427 
2428   // Save thread in L7; it crosses a bunch of VM calls below
2429   // Don't use save_thread because it smashes G2 and we merely
2430   // want to save a copy
2431   __ mov(G2_thread, L7_thread_cache);
2432 
2433 
2434   // If we create an inner frame once is plenty
2435   // when we create it we must also save G2_thread
2436   bool inner_frame_created = false;
2437 
2438   // dtrace method entry support
2439   {
2440     SkipIfEqual skip_if(
2441       masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);


2536     // either as the flush traps and the current window goes too.
2537     __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
2538     __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
2539   }
2540 
2541   // get JNIEnv* which is first argument to native
2542   if (!is_critical_native) {
2543     __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
2544   }
2545 
2546   // Use that pc we placed in O7 a while back as the current frame anchor
2547   __ set_last_Java_frame(SP, O7);
2548 
2549   // We flushed the windows ages ago now mark them as flushed before transitioning.
2550   __ set(JavaFrameAnchor::flushed, G3_scratch);
2551   __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
2552 
2553   // Transition from _thread_in_Java to _thread_in_native.
2554   __ set(_thread_in_native, G3_scratch);
2555 
2556 #ifdef _LP64
2557   AddressLiteral dest(native_func);
2558   __ relocate(relocInfo::runtime_call_type);
2559   __ jumpl_to(dest, O7, O7);
2560 #else
2561   __ call(native_func, relocInfo::runtime_call_type);
2562 #endif
2563   __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
2564 
2565   __ restore_thread(L7_thread_cache); // restore G2_thread
2566 
2567   // Unpack native results.  For int-types, we do any needed sign-extension
2568   // and move things into I0.  The return value there will survive any VM
2569   // calls for blocking or unlocking.  An FP or OOP result (handle) is done
2570   // specially in the slow-path code.
2571   switch (ret_type) {
2572   case T_VOID:    break;        // Nothing to do!
2573   case T_FLOAT:   break;        // Got it where we want it (unless slow-path)
2574   case T_DOUBLE:  break;        // Got it where we want it (unless slow-path)
2575   // In 64 bits build result is in O0, in O0, O1 in 32bit build
2576   case T_LONG:
2577 #ifndef _LP64
2578                   __ mov(O1, I1);
2579 #endif
2580                   // Fall thru
2581   case T_OBJECT:                // Really a handle
2582   case T_ARRAY:
2583   case T_INT:
2584                   __ mov(O0, I0);
2585                   break;
2586   case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false
2587   case T_BYTE   : __ sll(O0, 24, O0); __ sra(O0, 24, I0);   break;
2588   case T_CHAR   : __ sll(O0, 16, O0); __ srl(O0, 16, I0);   break; // cannot use and3, 0xFFFF too big as immediate value!
2589   case T_SHORT  : __ sll(O0, 16, O0); __ sra(O0, 16, I0);   break;
2590     break;                      // Cannot de-handlize until after reclaiming jvm_lock
2591   default:
2592     ShouldNotReachHere();
2593   }
2594 
2595   Label after_transition;
2596   // must we block?
2597 
2598   // Block, if necessary, before resuming in _thread_in_Java state.
2599   // In order for GC to work, don't clear the last_Java_sp until after blocking.


2765       __ verify_oop(I0);
2766   }
2767 
2768   if (CheckJNICalls) {
2769     // clear_pending_jni_exception_check
2770     __ st_ptr(G0, G2_thread, JavaThread::pending_jni_exception_check_fn_offset());
2771   }
2772 
2773   if (!is_critical_native) {
2774     // reset handle block
2775     __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
2776     __ st(G0, L5, JNIHandleBlock::top_offset_in_bytes());
2777 
2778     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
2779     check_forward_pending_exception(masm, G3_scratch);
2780   }
2781 
2782 
2783   // Return
2784 
2785 #ifndef _LP64
2786   if (ret_type == T_LONG) {
2787 
2788     // Must leave proper result in O0,O1 and G1 (c2/tiered only)
2789     __ sllx(I0, 32, G1);          // Shift bits into high G1
2790     __ srl (I1, 0, I1);           // Zero extend O1 (harmless?)
2791     __ or3 (I1, G1, G1);          // OR 64 bits into G1
2792   }
2793 #endif
2794 
2795   __ ret();
2796   __ delayed()->restore();
2797 
2798   __ flush();
2799 
2800   nmethod *nm = nmethod::new_native_nmethod(method,
2801                                             compile_id,
2802                                             masm->code(),
2803                                             vep_offset,
2804                                             frame_complete,
2805                                             stack_slots / VMRegImpl::slots_per_word,
2806                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2807                                             in_ByteSize(lock_offset),
2808                                             oop_maps);
2809 
2810   if (is_critical_native) {
2811     nm->set_lazy_critical_native(true);
2812   }
2813   return nm;
2814 


2836 
2837 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
2838 //
2839 // Common out the new frame generation for deopt and uncommon trap
2840 //
2841   Register        G3pcs              = G3_scratch; // Array of new pcs (input)
2842   Register        Oreturn0           = O0;
2843   Register        Oreturn1           = O1;
2844   Register        O2UnrollBlock      = O2;
2845   Register        O3array            = O3;         // Array of frame sizes (input)
2846   Register        O4array_size       = O4;         // number of frames (input)
2847   Register        O7frame_size       = O7;         // number of frames (input)
2848 
2849   __ ld_ptr(O3array, 0, O7frame_size);
2850   __ sub(G0, O7frame_size, O7frame_size);
2851   __ save(SP, O7frame_size, SP);
2852   __ ld_ptr(G3pcs, 0, I7);                      // load frame's new pc
2853 
2854   #ifdef ASSERT
2855   // make sure that the frames are aligned properly
2856 #ifndef _LP64
2857   __ btst(wordSize*2-1, SP);
2858   __ breakpoint_trap(Assembler::notZero, Assembler::ptr_cc);
2859 #endif
2860   #endif
2861 
2862   // Deopt needs to pass some extra live values from frame to frame
2863 
2864   if (deopt) {
2865     __ mov(Oreturn0->after_save(), Oreturn0);
2866     __ mov(Oreturn1->after_save(), Oreturn1);
2867   }
2868 
2869   __ mov(O4array_size->after_save(), O4array_size);
2870   __ sub(O4array_size, 1, O4array_size);
2871   __ mov(O3array->after_save(), O3array);
2872   __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
2873   __ add(G3pcs, wordSize, G3pcs);               // point to next pc value
2874 
2875   #ifdef ASSERT
2876   // trash registers to show a clear pattern in backtraces
2877   __ set(0xDEAD0000, I0);
2878   __ add(I0,  2, I1);
2879   __ add(I0,  4, I2);


2957 }
2958 
2959 //------------------------------generate_deopt_blob----------------------------
2960 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2961 // instead.
2962 void SharedRuntime::generate_deopt_blob() {
2963   // allocate space for the code
2964   ResourceMark rm;
2965   // setup code generation tools
2966   int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
2967 #ifdef ASSERT
2968   if (UseStackBanging) {
2969     pad += (JavaThread::stack_shadow_zone_size() / os::vm_page_size())*16 + 32;
2970   }
2971 #endif
2972 #if INCLUDE_JVMCI
2973   if (EnableJVMCI) {
2974     pad += 1000; // Increase the buffer size when compiling for JVMCI
2975   }
2976 #endif
2977 #ifdef _LP64
2978   CodeBuffer buffer("deopt_blob", 2100+pad, 512);
2979 #else
2980   // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread)
2981   // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread)
2982   CodeBuffer buffer("deopt_blob", 1600+pad, 512);
2983 #endif /* _LP64 */
2984   MacroAssembler* masm               = new MacroAssembler(&buffer);
2985   FloatRegister   Freturn0           = F0;
2986   Register        Greturn1           = G1;
2987   Register        Oreturn0           = O0;
2988   Register        Oreturn1           = O1;
2989   Register        O2UnrollBlock      = O2;
2990   Register        L0deopt_mode       = L0;
2991   Register        G4deopt_mode       = G4_scratch;
2992   int             frame_size_words;
2993   Address         saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS);
2994 #if !defined(_LP64) && defined(COMPILER2)
2995   Address         saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS);
2996 #endif
2997   Label           cont;
2998 
2999   OopMapSet *oop_maps = new OopMapSet();
3000 
3001   //
3002   // This is the entry point for code which is returning to a de-optimized
3003   // frame.
3004   // The steps taken by this frame are as follows:
3005   //   - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1)
3006   //     and all potentially live registers (at a pollpoint many registers can be live).
3007   //
3008   //   - call the C routine: Deoptimization::fetch_unroll_info (this function
3009   //     returns information about the number and size of interpreter frames
3010   //     which are equivalent to the frame which is being deoptimized)
3011   //   - deallocate the unpack frame, restoring only results values. Other
3012   //     volatile registers will now be captured in the vframeArray as needed.
3013   //   - deallocate the deoptimization frame
3014   //   - in a loop using the information returned in the previous step
3015   //     push new interpreter frames (take care to propagate the return
3016   //     values through each new frame pushed)


3188   __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
3189   __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset()));
3190   __ bind(noException);
3191 
3192   // deallocate the deoptimization frame taking care to preserve the return values
3193   __ mov(Oreturn0,     Oreturn0->after_save());
3194   __ mov(Oreturn1,     Oreturn1->after_save());
3195   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
3196   __ restore();
3197 
3198   // Allocate new interpreter frame(s) and possible c2i adapter frame
3199 
3200   make_new_frames(masm, true);
3201 
3202   // push a dummy "unpack_frame" taking care of float return values and
3203   // call Deoptimization::unpack_frames to have the unpacker layout
3204   // information in the interpreter frames just created and then return
3205   // to the interpreter entry point
3206   __ save(SP, -frame_size_words*wordSize, SP);
3207   __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr);
3208 #if !defined(_LP64)
3209 #if defined(COMPILER2)
3210   // 32-bit 1-register longs return longs in G1
3211   __ stx(Greturn1, saved_Greturn1_addr);
3212 #endif
3213   __ set_last_Java_frame(SP, noreg);
3214   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4deopt_mode);
3215 #else
3216   // LP64 uses g4 in set_last_Java_frame
3217   __ mov(G4deopt_mode, O1);
3218   __ set_last_Java_frame(SP, G0);
3219   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);
3220 #endif
3221   __ reset_last_Java_frame();
3222   __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);
3223 
3224 #if !defined(_LP64) && defined(COMPILER2)
3225   // In 32 bit, C2 returns longs in G1 so restore the saved G1 into
3226   // I0/I1 if the return value is long.
3227   Label not_long;
3228   __ cmp_and_br_short(O0,T_LONG, Assembler::notEqual, Assembler::pt, not_long);
3229   __ ldd(saved_Greturn1_addr,I0);
3230   __ bind(not_long);
3231 #endif
3232   __ ret();
3233   __ delayed()->restore();
3234 
3235   masm->flush();
3236   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
3237   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3238 #if INCLUDE_JVMCI
3239   if (EnableJVMCI) {
3240     _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
3241     _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
3242   }
3243 #endif
3244 }
3245 
3246 #ifdef COMPILER2
3247 
3248 //------------------------------generate_uncommon_trap_blob--------------------
3249 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
3250 // instead.
3251 void SharedRuntime::generate_uncommon_trap_blob() {
3252   // allocate space for the code
3253   ResourceMark rm;
3254   // setup code generation tools
3255   int pad = VerifyThread ? 512 : 0;
3256 #ifdef ASSERT
3257   if (UseStackBanging) {
3258     pad += (JavaThread::stack_shadow_zone_size() / os::vm_page_size())*16 + 32;
3259   }
3260 #endif
3261 #ifdef _LP64
3262   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
3263 #else
3264   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
3265   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
3266   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
3267 #endif
3268   MacroAssembler* masm               = new MacroAssembler(&buffer);
3269   Register        O2UnrollBlock      = O2;
3270   Register        O2klass_index      = O2;
3271 
3272   //
3273   // This is the entry point for all traps the compiler takes when it thinks
3274   // it cannot handle further execution of compilation code. The frame is
3275   // deoptimized in these cases and converted into interpreter frames for
3276   // execution
3277   // The steps taken by this frame are as follows:
3278   //   - push a fake "unpack_frame"
3279   //   - call the C routine Deoptimization::uncommon_trap (this function
3280   //     packs the current compiled frame into vframe arrays and returns
3281   //     information about the number and size of interpreter frames which
3282   //     are equivalent to the frame which is being deoptimized)
3283   //   - deallocate the "unpack_frame"
3284   //   - deallocate the deoptimization frame
3285   //   - in a loop using the information returned in the previous step
3286   //     push interpreter frames;
3287   //   - create a dummy "unpack_frame"


   1 /*
   2  * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


 110 
 111   static void restore_result_registers(MacroAssembler* masm);
 112 };
 113 
 114 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
 115   // Record volatile registers as callee-save values in an OopMap so their save locations will be
 116   // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for
 117   // deoptimization; see compiledVFrame::create_stack_value).  The caller's I, L and O registers
 118   // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame
 119   // (as the stub's I's) when the runtime routine called by the stub creates its frame.
 120   int i;
 121   // Always make the frame size 16 byte aligned.
 122   int frame_size = round_to(additional_frame_words + register_save_size, 16);
 123   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words
 124   int frame_size_in_slots = frame_size / sizeof(jint);
 125   // CodeBlob frame size is in words.
 126   *total_frame_words = frame_size / wordSize;
 127   // OopMap* map = new OopMap(*total_frame_words, 0);
 128   OopMap* map = new OopMap(frame_size_in_slots, 0);
 129 











 130   __ save(SP, -frame_size, SP);
 131 































 132 

 133   int debug_offset = 0;



 134   // Save the G's
 135   __ stx(G1, SP, g1_offset+STACK_BIAS);
 136   map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg());
 137 
 138   __ stx(G3, SP, g3_offset+STACK_BIAS);
 139   map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg());
 140 
 141   __ stx(G4, SP, g4_offset+STACK_BIAS);
 142   map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg());
 143 
 144   __ stx(G5, SP, g5_offset+STACK_BIAS);
 145   map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg());
 146 
 147   // This is really a waste but we'll keep things as they were for now
 148   if (true) {












 149   }
 150 
 151 
 152   // Save the flags
 153   __ rdccr( G5 );
 154   __ stx(G5, SP, ccr_offset+STACK_BIAS);
 155   __ stxfsr(SP, fsr_offset+STACK_BIAS);
 156 
 157   // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles)
 158   int offset = d00_offset;
 159   for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
 160     FloatRegister f = as_FloatRegister(i);
 161     __ stf(FloatRegisterImpl::D,  f, SP, offset+STACK_BIAS);
 162     // Record as callee saved both halves of double registers (2 float registers).
 163     map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg());
 164     map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next());
 165     offset += sizeof(double);
 166   }
 167 
 168   // And we're done.


 175 // saved.
 176 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
 177 
 178   // Restore all the FP registers
 179   for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) {
 180     __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i));
 181   }
 182 
 183   __ ldx(SP, ccr_offset+STACK_BIAS, G1);
 184   __ wrccr (G1) ;
 185 
 186   // Restore the G's
 187   // Note that G2 (AKA GThread) must be saved and restored separately.
 188   // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr.
 189 
 190   __ ldx(SP, g1_offset+STACK_BIAS, G1);
 191   __ ldx(SP, g3_offset+STACK_BIAS, G3);
 192   __ ldx(SP, g4_offset+STACK_BIAS, G4);
 193   __ ldx(SP, g5_offset+STACK_BIAS, G5);
 194 




















 195   // Restore flags
 196 
 197   __ ldxfsr(SP, fsr_offset+STACK_BIAS);
 198 
 199   __ restore();
 200 










 201 }
 202 
 203 // Pop the current frame and restore the registers that might be holding
 204 // a result.
 205 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 206 












 207   __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0));
 208 
 209   __ restore();
 210 






 211 }
 212 
 213 // Is vector's size (in bytes) bigger than a size saved by default?
 214 // 8 bytes FP registers are saved by default on SPARC.
 215 bool SharedRuntime::is_wide_vector(int size) {
 216   // Note, MaxVectorSize == 8 on SPARC.
 217   assert(size <= 8, "%d bytes vectors are not supported", size);
 218   return size > 8;
 219 }
 220 
 221 size_t SharedRuntime::trampoline_size() {
 222   return 40;
 223 }
 224 
 225 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 226   __ set((intptr_t)destination, G3_scratch);
 227   __ JMP(G3_scratch, 0);
 228   __ delayed()->nop();
 229 }
 230 


 287 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 288                                            VMRegPair *regs,
 289                                            int total_args_passed,
 290                                            int is_outgoing) {
 291   assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
 292 
 293   const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
 294   const int flt_reg_max = 8;
 295 
 296   int int_reg = 0;
 297   int flt_reg = 0;
 298   int slot = 0;
 299 
 300   for (int i = 0; i < total_args_passed; i++) {
 301     switch (sig_bt[i]) {
 302     case T_INT:
 303     case T_SHORT:
 304     case T_CHAR:
 305     case T_BYTE:
 306     case T_BOOLEAN:





 307       if (int_reg < int_reg_max) {
 308         Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
 309         regs[i].set1(r->as_VMReg());
 310       } else {
 311         regs[i].set1(VMRegImpl::stack2reg(slot++));
 312       }
 313       break;
 314 

 315     case T_LONG:
 316       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting VOID in other half");
 317       // fall-through
 318     case T_OBJECT:
 319     case T_ARRAY:
 320     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
 321       if (int_reg < int_reg_max) {
 322         Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
 323         regs[i].set2(r->as_VMReg());
 324       } else {
 325         slot = round_to(slot, 2);  // align
 326         regs[i].set2(VMRegImpl::stack2reg(slot));
 327         slot += 2;
 328       }
 329       break;









 330       break;
 331 
 332     case T_FLOAT:
 333       if (flt_reg < flt_reg_max) {
 334         FloatRegister r = as_FloatRegister(flt_reg++);
 335         regs[i].set1(r->as_VMReg());
 336       } else {
 337         regs[i].set1(VMRegImpl::stack2reg(slot++));
 338       }
 339       break;
 340 
 341     case T_DOUBLE:
 342       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 343       if (round_to(flt_reg, 2) + 1 < flt_reg_max) {
 344         flt_reg = round_to(flt_reg, 2);  // align
 345         FloatRegister r = as_FloatRegister(flt_reg);
 346         regs[i].set2(r->as_VMReg());
 347         flt_reg += 2;
 348       } else {
 349         slot = round_to(slot, 2);  // align


 416 
 417 
 418 // Patch the callers callsite with entry to compiled code if it exists.
 419 void AdapterGenerator::patch_callers_callsite() {
 420   Label L;
 421   __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch);
 422   __ br_null(G3_scratch, false, Assembler::pt, L);
 423   __ delayed()->nop();
 424   // Call into the VM to patch the caller, then jump to compiled callee
 425   __ save_frame(4);     // Args in compiled layout; do not blow them
 426 
 427   // Must save all the live Gregs the list is:
 428   // G1: 1st Long arg (32bit build)
 429   // G2: global allocated to TLS
 430   // G3: used in inline cache check (scratch)
 431   // G4: 2nd Long arg (32bit build);
 432   // G5: used in inline cache check (Method*)
 433 
 434   // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops.
 435 

 436   // mov(s,d)
 437   __ mov(G1, L1);
 438   __ mov(G4, L4);
 439   __ mov(G5_method, L5);
 440   __ mov(G5_method, O0);         // VM needs target method
 441   __ mov(I7, O1);                // VM needs caller's callsite
 442   // Must be a leaf call...
 443   // can be very far once the blob has been relocated
 444   AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 445   __ relocate(relocInfo::runtime_call_type);
 446   __ jumpl_to(dest, O7, O7);
 447   __ delayed()->mov(G2_thread, L7_thread_cache);
 448   __ mov(L7_thread_cache, G2_thread);
 449   __ mov(L1, G1);
 450   __ mov(L4, G4);
 451   __ mov(L5, G5_method);














 452 
 453   __ restore();      // Restore args
 454   __ bind(L);
 455 }
 456 
 457 
 458 RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) {
 459   RegisterOrConstant roc(arg_offset(st_off));
 460   return __ ensure_simm13_or_reg(roc, Rdisp);
 461 }
 462 
 463 RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) {
 464   RegisterOrConstant roc(next_arg_offset(st_off));
 465   return __ ensure_simm13_or_reg(roc, Rdisp);
 466 }
 467 
 468 
 469 // Stores long into offset pointed to by base
 470 void AdapterGenerator::store_c2i_long(Register r, Register base,
 471                                       const int st_off, bool is_stack) {

 472   // In V9, longs are given 2 64-bit slots in the interpreter, but the
 473   // data is passed in only 1 slot.
 474   __ stx(r, base, next_arg_slot(st_off));


















 475 }
 476 
 477 void AdapterGenerator::store_c2i_object(Register r, Register base,
 478                       const int st_off) {
 479   __ st_ptr (r, base, arg_slot(st_off));
 480 }
 481 
 482 void AdapterGenerator::store_c2i_int(Register r, Register base,
 483                    const int st_off) {
 484   __ st (r, base, arg_slot(st_off));
 485 }
 486 
 487 // Stores into offset pointed to by base
 488 void AdapterGenerator::store_c2i_double(VMReg r_2,
 489                       VMReg r_1, Register base, const int st_off) {

 490   // In V9, doubles are given 2 64-bit slots in the interpreter, but the
 491   // data is passed in only 1 slot.
 492   __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off));





 493 }
 494 
 495 void AdapterGenerator::store_c2i_float(FloatRegister f, Register base,
 496                                        const int st_off) {
 497   __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off));
 498 }
 499 
 500 void AdapterGenerator::gen_c2i_adapter(
 501                             int total_args_passed,
 502                             // VMReg max_arg,
 503                             int comp_args_on_stack, // VMRegStackSlots
 504                             const BasicType *sig_bt,
 505                             const VMRegPair *regs,
 506                             Label& L_skip_fixup) {
 507 
 508   // Before we get into the guts of the C2I adapter, see if we should be here
 509   // at all.  We've come from compiled code and are attempting to jump to the
 510   // interpreter, which means the caller made a static call to get here
 511   // (vcalls always get a compiled target if there is one).  Check for a
 512   // compiled target.  If there is one, we need to patch the caller's call.


 779 
 780     // Load in argument order going down.
 781     const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
 782     set_Rdisp(G1_scratch);
 783 
 784     VMReg r_1 = regs[i].first();
 785     VMReg r_2 = regs[i].second();
 786     if (!r_1->is_valid()) {
 787       assert(!r_2->is_valid(), "");
 788       continue;
 789     }
 790     if (r_1->is_stack()) {        // Pretend stack targets are loaded into F8/F9
 791       r_1 = F8->as_VMReg();        // as part of the load/store shuffle
 792       if (r_2->is_valid()) r_2 = r_1->next();
 793     }
 794     if (r_1->is_Register()) {  // Register argument
 795       Register r = r_1->as_Register()->after_restore();
 796       if (!r_2->is_valid()) {
 797         __ ld(Gargs, arg_slot(ld_off), r);
 798       } else {

 799         // In V9, longs are given 2 64-bit slots in the interpreter, but the
 800         // data is passed in only 1 slot.
 801         RegisterOrConstant slot = (sig_bt[i] == T_LONG) ?
 802               next_arg_slot(ld_off) : arg_slot(ld_off);
 803         __ ldx(Gargs, slot, r);



 804       }
 805     } else {
 806       assert(r_1->is_FloatRegister(), "");
 807       if (!r_2->is_valid()) {
 808         __ ldf(FloatRegisterImpl::S, Gargs,      arg_slot(ld_off), r_1->as_FloatRegister());
 809       } else {

 810         // In V9, doubles are given 2 64-bit slots in the interpreter, but the
 811         // data is passed in only 1 slot.  This code also handles longs that
 812         // are passed on the stack, but need a stack-to-stack move through a
 813         // spare float register.
 814         RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
 815               next_arg_slot(ld_off) : arg_slot(ld_off);
 816         __ ldf(FloatRegisterImpl::D, Gargs,                  slot, r_1->as_FloatRegister());





 817       }
 818     }
 819     // Was the argument really intended to be on the stack, but was loaded
 820     // into F8/F9?
 821     if (regs[i].first()->is_stack()) {
 822       assert(r_1->as_FloatRegister() == F8, "fix this code");
 823       // Convert stack slot to an SP offset
 824       int st_off = reg2offset(regs[i].first()) + STACK_BIAS;
 825       // Store down the shuffled stack word.  Target address _is_ aligned.
 826       RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp);
 827       if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot);
 828       else                  __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);
 829     }
 830   }
 831 
 832   // Jump to the compiled code just as if compiled code was doing it.
 833   __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3);
 834 #if INCLUDE_JVMCI
 835   if (EnableJVMCI) {
 836     // check if this call should be routed towards a specific entry point


 969 
 970 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 971                                          VMRegPair *regs,
 972                                          VMRegPair *regs2,
 973                                          int total_args_passed) {
 974     assert(regs2 == NULL, "not needed on sparc");
 975 
 976     // Return the number of VMReg stack_slots needed for the args.
 977     // This value does not include an abi space (like register window
 978     // save area).
 979 
 980     // The native convention is V8 if !LP64
 981     // The LP64 convention is the V9 convention which is slightly more sane.
 982 
 983     // We return the amount of VMReg stack slots we need to reserve for all
 984     // the arguments NOT counting out_preserve_stack_slots. Since we always
 985     // have space for storing at least 6 registers to memory we start with that.
 986     // See int_stk_helper for a further discussion.
 987     int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots();
 988 

 989     // V9 convention: All things "as-if" on double-wide stack slots.
 990     // Hoist any int/ptr/long's in the first 6 to int regs.
 991     // Hoist any flt/dbl's in the first 16 dbl regs.
 992     int j = 0;                  // Count of actual args, not HALVES
 993     VMRegPair param_array_reg;  // location of the argument in the parameter array
 994     for (int i = 0; i < total_args_passed; i++, j++) {
 995       param_array_reg.set_bad();
 996       switch (sig_bt[i]) {
 997       case T_BOOLEAN:
 998       case T_BYTE:
 999       case T_CHAR:
1000       case T_INT:
1001       case T_SHORT:
1002         regs[i].set1(int_stk_helper(j));
1003         break;
1004       case T_LONG:
1005         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
1006       case T_ADDRESS: // raw pointers, like current thread, for VM calls
1007       case T_ARRAY:
1008       case T_OBJECT:


1052       case T_VOID:
1053         regs[i].set_bad();
1054         j--;
1055         break; // Do not count HALVES
1056       default:
1057         ShouldNotReachHere();
1058       }
1059       // Keep track of the deepest parameter array slot.
1060       if (!param_array_reg.first()->is_valid()) {
1061         param_array_reg = regs[i];
1062       }
1063       if (param_array_reg.first()->is_stack()) {
1064         int off = param_array_reg.first()->reg2stack();
1065         if (off > max_stack_slots) max_stack_slots = off;
1066       }
1067       if (param_array_reg.second()->is_stack()) {
1068         int off = param_array_reg.second()->reg2stack();
1069         if (off > max_stack_slots) max_stack_slots = off;
1070       }
1071     }






































1072   return round_to(max_stack_slots + 1, 2);
1073 
1074 }
1075 
1076 
1077 // ---------------------------------------------------------------------------
1078 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1079   switch (ret_type) {
1080   case T_FLOAT:
1081     __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS);
1082     break;
1083   case T_DOUBLE:
1084     __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS);
1085     break;
1086   }
1087 }
1088 
1089 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1090   switch (ret_type) {
1091   case T_FLOAT:


1179 }
1180 
1181 
1182 // An oop arg. Must pass a handle not the oop itself
1183 static void object_move(MacroAssembler* masm,
1184                         OopMap* map,
1185                         int oop_handle_offset,
1186                         int framesize_in_slots,
1187                         VMRegPair src,
1188                         VMRegPair dst,
1189                         bool is_receiver,
1190                         int* receiver_offset) {
1191 
1192   // must pass a handle. First figure out the location we use as a handle
1193 
1194   if (src.first()->is_stack()) {
1195     // Oop is already on the stack
1196     Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register();
1197     __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle);
1198     __ ld_ptr(rHandle, 0, L4);

1199     __ movr( Assembler::rc_z, L4, G0, rHandle );




1200     if (dst.first()->is_stack()) {
1201       __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
1202     }
1203     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1204     if (is_receiver) {
1205       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
1206     }
1207     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1208   } else {
1209     // Oop is in an input register pass we must flush it to the stack
1210     const Register rOop = src.first()->as_Register();
1211     const Register rHandle = L5;
1212     int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset;
1213     int offset = oop_slot * VMRegImpl::stack_slot_size;
1214     __ st_ptr(rOop, SP, offset + STACK_BIAS);
1215     if (is_receiver) {
1216        *receiver_offset = offset;
1217     }
1218     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1219     __ add(SP, offset + STACK_BIAS, rHandle);

1220     __ movr( Assembler::rc_z, rOop, G0, rHandle );




1221 
1222     if (dst.first()->is_stack()) {
1223       __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS);
1224     } else {
1225       __ mov(rHandle, dst.first()->as_Register());
1226     }
1227   }
1228 }
1229 
1230 // A float arg may have to do float reg int reg conversion
1231 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1232   assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1233 
1234   if (src.first()->is_stack()) {
1235     if (dst.first()->is_stack()) {
1236       // stack to stack the easiest of the bunch
1237       __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
1238       __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
1239     } else {
1240       // stack to reg


1831       __ delayed()->mov(obj_reg, hash);
1832     }
1833 
1834     // Read the header and build a mask to get its hash field.  Give up if the object is not unlocked.
1835     // We depend on hash_mask being at most 32 bits and avoid the use of
1836     // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
1837     // vm: see markOop.hpp.
1838     __ ld_ptr(obj_reg, oopDesc::mark_offset_in_bytes(), header);
1839     __ sethi(markOopDesc::hash_mask, mask);
1840     __ btst(markOopDesc::unlocked_value, header);
1841     __ br(Assembler::zero, false, Assembler::pn, slowCase);
1842     if (UseBiasedLocking) {
1843       // Check if biased and fall through to runtime if so
1844       __ delayed()->nop();
1845       __ btst(markOopDesc::biased_lock_bit_in_place, header);
1846       __ br(Assembler::notZero, false, Assembler::pn, slowCase);
1847     }
1848     __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask);
1849 
1850     // Check for a valid (non-zero) hash code and get its value.

1851     __ srlx(header, markOopDesc::hash_shift, hash);



1852     __ andcc(hash, mask, hash);
1853     __ br(Assembler::equal, false, Assembler::pn, slowCase);
1854     __ delayed()->nop();
1855 
1856     // leaf return.
1857     __ bind(done);
1858     __ retl();
1859     __ delayed()->mov(hash, result);
1860     __ bind(slowCase);
1861   }
1862 #endif // COMPILER1
1863 
1864 
1865   // We have received a description of where all the java arg are located
1866   // on entry to the wrapper. We need to convert these args to where
1867   // the jni function will expect them. To figure out where they go
1868   // we convert the java signature to a C signature by inserting
1869   // the hidden arguments as arg[0] and possibly arg[1] (static method)
1870 
1871   const int total_in_args = method->size_of_parameters();


2167   if (method->is_static() && !is_critical_native) {
2168     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), O1);
2169 
2170     // Now handlize the static class mirror in O1.  It's known not-null.
2171     __ st_ptr(O1, SP, klass_offset + STACK_BIAS);
2172     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2173     __ add(SP, klass_offset + STACK_BIAS, O1);
2174   }
2175 
2176 
2177   const Register L6_handle = L6;
2178 
2179   if (method->is_synchronized()) {
2180     assert(!is_critical_native, "unhandled");
2181     __ mov(O1, L6_handle);
2182   }
2183 
2184   // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
2185   // except O6/O7. So if we must call out we must push a new frame. We immediately
2186   // push a new frame and flush the windows.

2187   intptr_t thepc = (intptr_t) __ pc();
2188   {
2189     address here = __ pc();
2190     // Call the next instruction
2191     __ call(here + 8, relocInfo::none);
2192     __ delayed()->nop();
2193   }



2194 
2195   // We use the same pc/oopMap repeatedly when we call out
2196   oop_maps->add_gc_map(thepc - start, map);
2197 
2198   // O7 now has the pc loaded that we will use when we finally call to native.
2199 
2200   // Save thread in L7; it crosses a bunch of VM calls below
2201   // Don't use save_thread because it smashes G2 and we merely
2202   // want to save a copy
2203   __ mov(G2_thread, L7_thread_cache);
2204 
2205 
2206   // If we create an inner frame once is plenty
2207   // when we create it we must also save G2_thread
2208   bool inner_frame_created = false;
2209 
2210   // dtrace method entry support
2211   {
2212     SkipIfEqual skip_if(
2213       masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);


2308     // either as the flush traps and the current window goes too.
2309     __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
2310     __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
2311   }
2312 
2313   // get JNIEnv* which is first argument to native
2314   if (!is_critical_native) {
2315     __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
2316   }
2317 
2318   // Use that pc we placed in O7 a while back as the current frame anchor
2319   __ set_last_Java_frame(SP, O7);
2320 
2321   // We flushed the windows ages ago now mark them as flushed before transitioning.
2322   __ set(JavaFrameAnchor::flushed, G3_scratch);
2323   __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
2324 
2325   // Transition from _thread_in_Java to _thread_in_native.
2326   __ set(_thread_in_native, G3_scratch);
2327 

2328   AddressLiteral dest(native_func);
2329   __ relocate(relocInfo::runtime_call_type);
2330   __ jumpl_to(dest, O7, O7);



2331   __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
2332 
2333   __ restore_thread(L7_thread_cache); // restore G2_thread
2334 
2335   // Unpack native results.  For int-types, we do any needed sign-extension
2336   // and move things into I0.  The return value there will survive any VM
2337   // calls for blocking or unlocking.  An FP or OOP result (handle) is done
2338   // specially in the slow-path code.
2339   switch (ret_type) {
2340   case T_VOID:    break;        // Nothing to do!
2341   case T_FLOAT:   break;        // Got it where we want it (unless slow-path)
2342   case T_DOUBLE:  break;        // Got it where we want it (unless slow-path)
2343   // In 64 bits build result is in O0, in O0, O1 in 32bit build
2344   case T_LONG:



2345                   // Fall thru
2346   case T_OBJECT:                // Really a handle
2347   case T_ARRAY:
2348   case T_INT:
2349                   __ mov(O0, I0);
2350                   break;
2351   case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false
2352   case T_BYTE   : __ sll(O0, 24, O0); __ sra(O0, 24, I0);   break;
2353   case T_CHAR   : __ sll(O0, 16, O0); __ srl(O0, 16, I0);   break; // cannot use and3, 0xFFFF too big as immediate value!
2354   case T_SHORT  : __ sll(O0, 16, O0); __ sra(O0, 16, I0);   break;
2355     break;                      // Cannot de-handlize until after reclaiming jvm_lock
2356   default:
2357     ShouldNotReachHere();
2358   }
2359 
2360   Label after_transition;
2361   // must we block?
2362 
2363   // Block, if necessary, before resuming in _thread_in_Java state.
2364   // In order for GC to work, don't clear the last_Java_sp until after blocking.


2530       __ verify_oop(I0);
2531   }
2532 
2533   if (CheckJNICalls) {
2534     // clear_pending_jni_exception_check
2535     __ st_ptr(G0, G2_thread, JavaThread::pending_jni_exception_check_fn_offset());
2536   }
2537 
2538   if (!is_critical_native) {
2539     // reset handle block
2540     __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
2541     __ st(G0, L5, JNIHandleBlock::top_offset_in_bytes());
2542 
2543     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
2544     check_forward_pending_exception(masm, G3_scratch);
2545   }
2546 
2547 
2548   // Return
2549 










2550   __ ret();
2551   __ delayed()->restore();
2552 
2553   __ flush();
2554 
2555   nmethod *nm = nmethod::new_native_nmethod(method,
2556                                             compile_id,
2557                                             masm->code(),
2558                                             vep_offset,
2559                                             frame_complete,
2560                                             stack_slots / VMRegImpl::slots_per_word,
2561                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2562                                             in_ByteSize(lock_offset),
2563                                             oop_maps);
2564 
2565   if (is_critical_native) {
2566     nm->set_lazy_critical_native(true);
2567   }
2568   return nm;
2569 


2591 
2592 static void gen_new_frame(MacroAssembler* masm, bool deopt) {
2593 //
2594 // Common out the new frame generation for deopt and uncommon trap
2595 //
2596   Register        G3pcs              = G3_scratch; // Array of new pcs (input)
2597   Register        Oreturn0           = O0;
2598   Register        Oreturn1           = O1;
2599   Register        O2UnrollBlock      = O2;
2600   Register        O3array            = O3;         // Array of frame sizes (input)
2601   Register        O4array_size       = O4;         // number of frames (input)
2602   Register        O7frame_size       = O7;         // number of frames (input)
2603 
2604   __ ld_ptr(O3array, 0, O7frame_size);
2605   __ sub(G0, O7frame_size, O7frame_size);
2606   __ save(SP, O7frame_size, SP);
2607   __ ld_ptr(G3pcs, 0, I7);                      // load frame's new pc
2608 
2609   #ifdef ASSERT
2610   // make sure that the frames are aligned properly




2611   #endif
2612 
2613   // Deopt needs to pass some extra live values from frame to frame
2614 
2615   if (deopt) {
2616     __ mov(Oreturn0->after_save(), Oreturn0);
2617     __ mov(Oreturn1->after_save(), Oreturn1);
2618   }
2619 
2620   __ mov(O4array_size->after_save(), O4array_size);
2621   __ sub(O4array_size, 1, O4array_size);
2622   __ mov(O3array->after_save(), O3array);
2623   __ mov(O2UnrollBlock->after_save(), O2UnrollBlock);
2624   __ add(G3pcs, wordSize, G3pcs);               // point to next pc value
2625 
2626   #ifdef ASSERT
2627   // trash registers to show a clear pattern in backtraces
2628   __ set(0xDEAD0000, I0);
2629   __ add(I0,  2, I1);
2630   __ add(I0,  4, I2);


2708 }
2709 
2710 //------------------------------generate_deopt_blob----------------------------
2711 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2712 // instead.
2713 void SharedRuntime::generate_deopt_blob() {
2714   // allocate space for the code
2715   ResourceMark rm;
2716   // setup code generation tools
2717   int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
2718 #ifdef ASSERT
2719   if (UseStackBanging) {
2720     pad += (JavaThread::stack_shadow_zone_size() / os::vm_page_size())*16 + 32;
2721   }
2722 #endif
2723 #if INCLUDE_JVMCI
2724   if (EnableJVMCI) {
2725     pad += 1000; // Increase the buffer size when compiling for JVMCI
2726   }
2727 #endif

2728   CodeBuffer buffer("deopt_blob", 2100+pad, 512);





2729   MacroAssembler* masm               = new MacroAssembler(&buffer);
2730   FloatRegister   Freturn0           = F0;
2731   Register        Greturn1           = G1;
2732   Register        Oreturn0           = O0;
2733   Register        Oreturn1           = O1;
2734   Register        O2UnrollBlock      = O2;
2735   Register        L0deopt_mode       = L0;
2736   Register        G4deopt_mode       = G4_scratch;
2737   int             frame_size_words;
2738   Address         saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS);



2739   Label           cont;
2740 
2741   OopMapSet *oop_maps = new OopMapSet();
2742 
2743   //
2744   // This is the entry point for code which is returning to a de-optimized
2745   // frame.
2746   // The steps taken by this frame are as follows:
2747   //   - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1)
2748   //     and all potentially live registers (at a pollpoint many registers can be live).
2749   //
2750   //   - call the C routine: Deoptimization::fetch_unroll_info (this function
2751   //     returns information about the number and size of interpreter frames
2752   //     which are equivalent to the frame which is being deoptimized)
2753   //   - deallocate the unpack frame, restoring only results values. Other
2754   //     volatile registers will now be captured in the vframeArray as needed.
2755   //   - deallocate the deoptimization frame
2756   //   - in a loop using the information returned in the previous step
2757   //     push new interpreter frames (take care to propagate the return
2758   //     values through each new frame pushed)


2930   __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
2931   __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset()));
2932   __ bind(noException);
2933 
2934   // deallocate the deoptimization frame taking care to preserve the return values
2935   __ mov(Oreturn0,     Oreturn0->after_save());
2936   __ mov(Oreturn1,     Oreturn1->after_save());
2937   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
2938   __ restore();
2939 
2940   // Allocate new interpreter frame(s) and possible c2i adapter frame
2941 
2942   make_new_frames(masm, true);
2943 
2944   // push a dummy "unpack_frame" taking care of float return values and
2945   // call Deoptimization::unpack_frames to have the unpacker layout
2946   // information in the interpreter frames just created and then return
2947   // to the interpreter entry point
2948   __ save(SP, -frame_size_words*wordSize, SP);
2949   __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr);








2950   // LP64 uses g4 in set_last_Java_frame
2951   __ mov(G4deopt_mode, O1);
2952   __ set_last_Java_frame(SP, G0);
2953   __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1);

2954   __ reset_last_Java_frame();
2955   __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0);
2956 








2957   __ ret();
2958   __ delayed()->restore();
2959 
2960   masm->flush();
2961   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
2962   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2963 #if INCLUDE_JVMCI
2964   if (EnableJVMCI) {
2965     _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
2966     _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
2967   }
2968 #endif
2969 }
2970 
2971 #ifdef COMPILER2
2972 
2973 //------------------------------generate_uncommon_trap_blob--------------------
2974 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2975 // instead.
2976 void SharedRuntime::generate_uncommon_trap_blob() {
2977   // allocate space for the code
2978   ResourceMark rm;
2979   // setup code generation tools
2980   int pad = VerifyThread ? 512 : 0;
2981 #ifdef ASSERT
2982   if (UseStackBanging) {
2983     pad += (JavaThread::stack_shadow_zone_size() / os::vm_page_size())*16 + 32;
2984   }
2985 #endif

2986   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);





2987   MacroAssembler* masm               = new MacroAssembler(&buffer);
2988   Register        O2UnrollBlock      = O2;
2989   Register        O2klass_index      = O2;
2990 
2991   //
2992   // This is the entry point for all traps the compiler takes when it thinks
2993   // it cannot handle further execution of compilation code. The frame is
2994   // deoptimized in these cases and converted into interpreter frames for
2995   // execution
2996   // The steps taken by this frame are as follows:
2997   //   - push a fake "unpack_frame"
2998   //   - call the C routine Deoptimization::uncommon_trap (this function
2999   //     packs the current compiled frame into vframe arrays and returns
3000   //     information about the number and size of interpreter frames which
3001   //     are equivalent to the frame which is being deoptimized)
3002   //   - deallocate the "unpack_frame"
3003   //   - deallocate the deoptimization frame
3004   //   - in a loop using the information returned in the previous step
3005   //     push interpreter frames;
3006   //   - create a dummy "unpack_frame"


< prev index next >