New src/hotspot/cpu/aarch32/sharedRuntime

   1 /*
   2  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2015, Linaro Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "asm/macroAssembler.inline.hpp"
  30 #include "code/debugInfoRec.hpp"
  31 #include "code/icBuffer.hpp"
  32 #include "code/vtableStubs.hpp"
  33 #include "interp_masm_aarch32.hpp"
  34 #include "interpreter/interpreter.hpp"
  35 #include "logging/log.hpp"
  36 #include "oops/compiledICHolder.hpp"
  37 #include "runtime/safepointMechanism.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/vframeArray.hpp"
  40 #include "utilities/align.hpp"
  41 #include "utilities/formatBuffer.hpp"
  42 #include "vmreg_aarch32.inline.hpp"
  43 #include "register_aarch32.hpp"
  44 #include "vm_version_aarch32.hpp"
  45 #ifdef COMPILER1
  46 #include "c1/c1_Runtime1.hpp"
  47 #endif
  48 #if COMPILER2_OR_JVMCI
  49 #include "adfiles/ad_aarch32.hpp"
  50 #include "opto/runtime.hpp"
  51 #endif
  52 #if INCLUDE_JVMCI
  53 #include "jvmci/jvmciJavaClasses.hpp"
  54 #endif
  55 
  56 
  57 #define __ masm->
  58 
  59 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  60 
  61 class SimpleRuntimeFrame {
  62 
  63   public:
  64 
  65   // Most of the runtime stubs have this simple frame layout.
  66   // This class exists to make the layout shared in one place.
  67   // Offsets are for compiler stack slots, which are jints.
  68   enum layout {
  69     // The frame sender code expects that rbp will be in the "natural" place and
  70     // will override any oopMap setting for it. We must therefore force the layout
  71     // so that it agrees with the frame sender code.
  72     // we don't expect any arg reg save area so aarch32 asserts that
  73     // frame::arg_reg_save_area_bytes == 0
  74     rbp_off = 0,
  75     rbp_off2,
  76     return_off, return_off2,
  77     framesize
  78   };
  79 };
  80 
  81 // FIXME -- this is used by C1
  82 class RegisterSaver {
  83  public:
  84   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool lr_pushed = false);
  85   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
  86 
  87   // Capture info about frame layout
  88   enum layout {
  89       fpu_state_off = 0,
  90       fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
  91       // The frame sender code expects that rfp will be in
  92       // the "natural" place and will override any oopMap
  93       // setting for it. We must therefore force the layout
  94       // so that it agrees with the frame sender code.
  95       //
  96       // FIXME there are extra saved register (from `push_CPU_state`) note that r11 == rfp
  97       r0_off,
  98       r1_off,
  99       r2_off,
 100       r3_off,
 101       r4_off,
 102       r5_off,
 103       r6_off,
 104       r7_off,
 105       r8_off,  rmethod_off = r8_off,
 106       r9_off,  rscratch1_off = r9_off,
 107       r10_off,
 108       r11_off,
 109       r12_off,
 110       r14_off, // with C2 can hold value different to LR entry in the frame
 111       reg_save_size,
 112   };
 113 
 114 
 115   // Offsets into the register save area
 116   // Used by deoptimization when it is managing result register
 117   // values on its own
 118 
 119   static int offset_in_bytes(int offset)    { return offset * wordSize; }
 120 
 121 // During deoptimization only the result registers need to be restored,
 122   // all the other values have already been extracted.
 123   static void restore_result_registers(MacroAssembler* masm);
 124 
 125 };
 126 
 127 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool lr_pushed) {
 128   int frame_size_in_bytes = additional_frame_words*wordSize + (reg_save_size + frame::get_frame_size()) *BytesPerInt;
 129   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 130   int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
 131   *total_frame_words = frame_size_in_bytes / wordSize;;
 132 
 133   if (lr_pushed) {
 134     if (FrameAPCS)
 135       Unimplemented();
 136 
 137     __ push(rfp);
 138     __ add(rfp, sp, wordSize);
 139   } else
 140     __ enter();
 141   __ push_CPU_state();
 142 
 143   // Set an oopmap for the call site.  This oopmap will map all
 144   // oop-registers and debug-info registers as callee-saved.  This
 145   // will allow deoptimization at this safepoint to find all possible
 146   // debug-info recordings, as well as let GC find all oops.
 147 
 148   OopMapSet *oop_maps = new OopMapSet();
 149   OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
 150 
 151   oop_map->set_callee_saved(VMRegImpl::stack2reg(r0_off + additional_frame_slots), r0->as_VMReg());
 152   oop_map->set_callee_saved(VMRegImpl::stack2reg(r1_off + additional_frame_slots), r1->as_VMReg());
 153   oop_map->set_callee_saved(VMRegImpl::stack2reg(r2_off + additional_frame_slots), r2->as_VMReg());
 154   oop_map->set_callee_saved(VMRegImpl::stack2reg(r3_off + additional_frame_slots), r3->as_VMReg());
 155   oop_map->set_callee_saved(VMRegImpl::stack2reg(r4_off + additional_frame_slots), r4->as_VMReg());
 156   oop_map->set_callee_saved(VMRegImpl::stack2reg(r5_off + additional_frame_slots), r5->as_VMReg());
 157   oop_map->set_callee_saved(VMRegImpl::stack2reg(r6_off + additional_frame_slots), r6->as_VMReg());
 158   oop_map->set_callee_saved(VMRegImpl::stack2reg(r7_off + additional_frame_slots), r7->as_VMReg());
 159   oop_map->set_callee_saved(VMRegImpl::stack2reg(r8_off + additional_frame_slots), r8->as_VMReg());
 160   oop_map->set_callee_saved(VMRegImpl::stack2reg(r9_off + additional_frame_slots), r9->as_VMReg());
 161   oop_map->set_callee_saved(VMRegImpl::stack2reg(r10_off + additional_frame_slots), r10->as_VMReg());
 162   oop_map->set_callee_saved(VMRegImpl::stack2reg(r12_off + additional_frame_slots), r12->as_VMReg());
 163   oop_map->set_callee_saved(VMRegImpl::stack2reg(r14_off + additional_frame_slots), r14->as_VMReg());
 164   if (hasFPU()) {
 165     for (int i = 0; i < FPUStateSizeInWords; ++i) {
 166       oop_map->set_callee_saved(VMRegImpl::stack2reg(fpu_state_off + i + additional_frame_slots),
 167               as_FloatRegister(i)->as_VMReg());
 168     }
 169   }
 170 
 171   return oop_map;
 172 }
 173 
 174 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
 175   __ pop_CPU_state();
 176   if (restore_lr)
 177     __ leave();
 178   else {
 179     if (FrameAPCS)
 180       Unimplemented();
 181 
 182     __ sub(sp, rfp, wordSize);
 183     __ pop(rfp);
 184   }
 185 }
 186 
 187 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 188 
 189   // Just restore result register. Only used by deoptimization. By
 190   // now any callee save register that needs to be restored to a c2
 191   // caller of the deoptee has been extracted into the vframeArray
 192   // and will be stuffed into the c2i adapter we create for later
 193   // restoration so only result registers need to be restored here.
 194 
 195 
 196   if(hasFPU()) {
 197   // Restore fp result register
 198   __ vldr_f64(d0, Address(sp, offset_in_bytes(fpu_state_off)));
 199   }
 200 
 201   // Restore integer result register
 202   __ ldr(r0, Address(sp, offset_in_bytes(r0_off)));
 203   __ ldr(r1, Address(sp, offset_in_bytes(r1_off)));
 204 
 205   // Pop all of the register save are off the stack
 206   __ add(sp, sp, (reg_save_size + frame::get_frame_size()) * wordSize);
 207 }
 208 
 209 // Is vector's size (in bytes) bigger than a size saved by default?
 210 // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
 211 bool SharedRuntime::is_wide_vector(int size) {
 212   return size > 16;
 213 }
 214 
 215 size_t SharedRuntime::trampoline_size() {
 216   return NativeCall::instruction_size;
 217 }
 218 
 219 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 220   __ mov(rscratch1, destination);
 221   __ b(rscratch1);
 222 }
 223 // This functions returns offset from fp to java arguments on stack.
 224 //
 225 // The java_calling_convention describes stack locations as ideal slots on
 226 // a frame with no abi restrictions. Since we must observe abi restrictions
 227 // (like the placement of the register window) the slots must be biased by
 228 // the following value.
 229 static int reg2offset_in(VMReg r) {
 230   // After stack frame created, fp points to 1 slot after previous sp value.
 231   return (r->reg2stack() + 1) * VMRegImpl::stack_slot_size;
 232 }
 233 
 234 static int reg2offset_out(VMReg r) {
 235   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 236 }
 237 
 238 template <class T> static const T& min (const T& a, const T& b) {
 239   return (a > b) ? b : a;
 240 }
 241 
 242 // ---------------------------------------------------------------------------
 243 // Read the array of BasicTypes from a signature, and compute where the
 244 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte
 245 // quantities.  Values less than VMRegImpl::stack0 are registers, those above
 246 // refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
 247 // as framesizes are fixed.
 248 // VMRegImpl::stack0 refers to the first slot 0(sp).
 249 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
 250 // up to RegisterImpl::number_of_registers) are the 64-bit
 251 // integer registers.
 252 
 253 // Note: the INPUTS in sig_bt are in units of Java argument words,
 254 // which are 64-bit.  The OUTPUTS are in 32-bit units.
 255 
 256 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 257                                            VMRegPair *regs,
 258                                            int total_args_passed,
 259                                            int is_outgoing) {
 260 
 261   assert(j_rarg0 == c_rarg0, "assumed");
 262 
 263 #ifndef HARD_FLOAT_CC
 264   if (hasFPU()) {
 265     // Create the mapping between argument positions and
 266     // registers.
 267     static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
 268       j_rarg0, j_rarg1, j_rarg2, j_rarg3
 269     };
 270     const uint FP_ArgReg_N = 16;
 271     static const FloatRegister FP_ArgReg[] = {
 272       f0, f1, f2, f3,
 273       f4, f5, f6, f7,
 274       f8, f9, f10, f11,
 275       f12, f13, f14, f15,
 276     };
 277 
 278     uint int_args = 0;
 279     uint fp_args = 0;
 280     uint stk_args = 0;
 281 
 282     for (int i = 0; i < total_args_passed; i++) {
 283       switch (sig_bt[i]) {
 284       case T_FLOAT:
 285         if (fp_args < FP_ArgReg_N) {
 286           regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
 287         } else {
 288           regs[i].set1(VMRegImpl::stack2reg(stk_args));
 289           stk_args += 1;
 290         }
 291         break;
 292       case T_BOOLEAN:
 293       case T_CHAR:
 294       case T_BYTE:
 295       case T_SHORT:
 296       case T_INT:
 297       case T_OBJECT:
 298       case T_ARRAY:
 299       case T_ADDRESS:
 300         if (int_args < Argument::n_int_register_parameters_j) {
 301           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 302         } else {
 303           regs[i].set1(VMRegImpl::stack2reg(stk_args));
 304           stk_args += 1;
 305         }
 306         break;
 307       case T_VOID:
 308         // halves of T_LONG or T_DOUBLE
 309         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 310         regs[i].set_bad();
 311         break;
 312       case T_DOUBLE:
 313         assert(sig_bt[i + 1] == T_VOID, "expecting half");
 314         fp_args = align_up(fp_args, 2);
 315         if (fp_args < FP_ArgReg_N) {
 316           regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
 317           fp_args += 2;
 318         } else {
 319           regs[i].set2(VMRegImpl::stack2reg(stk_args));
 320           stk_args += 2;
 321         }
 322         break;
 323       case T_LONG:
 324         assert(sig_bt[i + 1] == T_VOID, "expecting half");
 325         if (int_args + 1 < Argument::n_int_register_parameters_j) {
 326           if ((int_args % 2) != 0) {
 327             ++int_args;
 328           }
 329           regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
 330           int_args += 2;
 331         } else {
 332           if (stk_args % 2 != 0) {
 333             ++stk_args;
 334           }
 335           regs[i].set2(VMRegImpl::stack2reg(stk_args));
 336           stk_args += 2;
 337           int_args = Argument::n_int_register_parameters_j;
 338         }
 339         break;
 340       default:
 341         ShouldNotReachHere();
 342         break;
 343       }
 344     }
 345 
 346     return align_up(stk_args, StackAlignmentInBytes/wordSize);
 347   } else
 348 #endif // ndef HARD_FLOAT_CC
 349   {
 350     // in aarch32 pure soft-float mode the java calling convention is set the same as C one
 351     return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 352   }
 353 }
 354 
 355 // Patch the callers callsite with entry to compiled code if it exists.
 356 static void patch_callers_callsite(MacroAssembler *masm) {
 357   Label L;
 358   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
 359   __ cbz(rscratch1, L);
 360 
 361   __ enter();
 362   __ push_CPU_state();
 363 
 364   // VM needs caller's callsite
 365   // VM needs target method
 366   // This needs to be a long call since we will relocate this adapter to
 367   // the codeBuffer and it may not reach
 368 
 369 #ifndef PRODUCT
 370   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 371 #endif
 372 
 373   __ mov(c_rarg0, rmethod);
 374   __ mov(c_rarg1, lr);
 375   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
 376   __ bl(rscratch1);
 377   __ maybe_isb();
 378 
 379   __ pop_CPU_state();
 380   // restore sp
 381   __ leave();
 382   __ bind(L);
 383 }
 384 
 385 static void gen_c2i_adapter(MacroAssembler *masm,
 386                             int total_args_passed,
 387                             int comp_args_on_stack,
 388                             const BasicType *sig_bt,
 389                             const VMRegPair *regs,
 390                             Label& skip_fixup) {
 391   // Before we get into the guts of the C2I adapter, see if we should be here
 392   // at all.  We've come from compiled code and are attempting to jump to the
 393   // interpreter, which means the caller made a static call to get here
 394   // (vcalls always get a compiled target if there is one).  Check for a
 395   // compiled target.  If there is one, we need to patch the caller's call.
 396   patch_callers_callsite(masm);
 397 
 398   __ bind(skip_fixup);
 399 
 400   // Since all args are passed on the stack, total_args_passed *
 401   // Interpreter::stackElementSize is the space we need.
 402 
 403   const int extraspace = total_args_passed * Interpreter::stackElementSize;
 404   const Register compArgPos = lr;
 405   int ld_shift = 0;
 406 
 407   __ str(compArgPos, Address(sp, -(extraspace + wordSize)));
 408   __ mov(compArgPos, sp);
 409 
 410   // Now write the args into the outgoing interpreter space
 411   for (int i = 0; i < total_args_passed; i++) {
 412 
 413     if (sig_bt[i] == T_VOID) {
 414       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 415       continue;
 416     }
 417 
 418     // next stack slot offset
 419     const int next_off = -Interpreter::stackElementSize;
 420 
 421     VMReg r_1 = regs[i].first();
 422     VMReg r_2 = regs[i].second();
 423     if (!r_1->is_valid()) {
 424       assert(!r_2->is_valid(), "");
 425       continue;
 426     }
 427 
 428     if (r_2->is_valid()) {
 429       assert(i + 1 < total_args_passed && sig_bt[i + 1] == T_VOID, "going to overrwrite reg_2 value");
 430     }
 431 
 432     if (r_1->is_stack()) {
 433       // memory to memory use rscratch1
 434       int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size - ld_shift;
 435       if (!r_2->is_valid()) {
 436         __ ldr(rscratch1, Address(compArgPos, ld_off));
 437         __ str(rscratch1, Address(sp, next_off, Address::pre));
 438       } else {
 439           int tmp_off = ld_off;
 440           // ldrd accepts only imm8
 441           if(abs(ld_off) > (255 << 2)) {
 442               if(__ is_valid_for_imm12(ld_off)) {
 443                 __ add(compArgPos, compArgPos, ld_off);
 444               } else {
 445                 // add operates encoded imm12, NOT plain
 446                 __ mov(rscratch1, ld_off);
 447                 __ add(compArgPos, compArgPos, rscratch1);
 448               }
 449               tmp_off = 0;
 450               ld_shift += ld_off;
 451           }
 452         __ ldrd(rscratch1, rscratch2, Address(compArgPos, tmp_off));
 453         __ strd(rscratch1, rscratch2, Address(sp, 2* next_off, Address::pre));
 454       }
 455     } else if (r_1->is_Register()) {
 456       Register r = r_1->as_Register();
 457       assert(r != compArgPos, "compArgPos was modified");
 458       if (!r_2->is_valid()) {
 459         __ str(r, Address(sp, next_off, Address::pre));
 460       } else {
 461         assert(r_2->as_Register() != compArgPos, "compArgPos was modified");
 462         __ strd(r, r_2->as_Register(), Address(sp, 2 * next_off, Address::pre));
 463       }
 464     } else {
 465       assert(r_1->is_FloatRegister(), "");
 466       if (!r_2->is_valid()) {
 467         // Can't do pre or post addressing for vldr, vstr
 468         __ add(sp, sp, next_off);
 469         __ vstr_f32(r_1->as_FloatRegister(), Address(sp));
 470       } else {
 471     // TODO assert(r_2->is_FloatRegister() && r_2->as_FloatRegister() == r_1->as_FloatRegister() + 1, "");
 472         // Can't do pre or post addressing for vldr, vstr
 473         __ add(sp, sp, 2 * next_off);
 474         __ vstr_f64(r_1->as_FloatRegister(), Address(sp));
 475       }
 476     }
 477   }
 478 
 479   // hope, sp is returned to desired value
 480   __ ldr(compArgPos, Address(sp, -wordSize));
 481 
 482   // set sender sp
 483   if(__ is_valid_for_imm12(extraspace)) {
 484     __ add(r4, sp, extraspace);
 485   } else {
 486     __ mov(rscratch1, extraspace);
 487     __ add(r4, sp, rscratch1);
 488   }
 489 
 490   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::interpreter_entry_offset())));
 491   __ b(rscratch1);
 492 }
 493 
 494 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 495                         address code_start, address code_end,
 496                         Label& L_ok) {
 497   Label L_fail;
 498   __ lea(temp_reg, ExternalAddress(code_start));
 499   __ cmp(pc_reg, temp_reg);
 500   __ b(L_fail, Assembler::LO);
 501   __ lea(temp_reg, ExternalAddress(code_end));
 502   __ cmp(pc_reg, temp_reg);
 503   __ b(L_ok, Assembler::LO);
 504   __ bind(L_fail);
 505 }
 506 
 507 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 508                             int total_args_passed,
 509                             int comp_args_on_stack,
 510                             const BasicType *sig_bt,
 511                             const VMRegPair *regs) {
 512 
 513   // Note: r13 contains the senderSP on entry. We must preserve it since
 514   // we may do a i2c -> c2i transition if we lose a race where compiled
 515   // code goes non-entrant while we get args ready.
 516 
 517   // In addition we use r13 to locate all the interpreter args because
 518   // we must align the stack to 16 bytes.
 519 
 520   // Adapters are frameless.
 521 
 522   // An i2c adapter is frameless because the *caller* frame, which is
 523   // interpreted, routinely repairs its own sp (from
 524   // interpreter_frame_last_sp), even if a callee has modified the
 525   // stack pointer.  It also recalculates and aligns sp.
 526 
 527   // A c2i adapter is frameless because the *callee* frame, which is
 528   // interpreted, routinely repairs its caller's sp (from sender_sp,
 529   // which is set up via the senderSP register).
 530 
 531   // In other words, if *either* the caller or callee is interpreted, we can
 532   // get the stack pointer repaired after a call.
 533 
 534   // This is why c2i and i2c adapters cannot be indefinitely composed.
 535   // In particular, if a c2i adapter were to somehow call an i2c adapter,
 536   // both caller and callee would be compiled methods, and neither would
 537   // clean up the stack pointer changes performed by the two adapters.
 538   // If this happens, control eventually transfers back to the compiled
 539   // caller, but with an uncorrected stack, causing delayed havoc.
 540 
 541   if (VerifyAdapterCalls &&
 542       (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
 543     // So, let's test for cascading c2i/i2c adapters right now.
 544     //  assert(Interpreter::contains($return_addr) ||
 545     //         StubRoutines::contains($return_addr),
 546     //         "i2c adapter must return to an interpreter frame");
 547     __ block_comment("verify_i2c { ");
 548     Label L_ok;
 549     if (Interpreter::code() != NULL)
 550       range_check(masm, lr, rscratch1,
 551                   Interpreter::code()->code_start(), Interpreter::code()->code_end(),
 552                   L_ok);
 553     if (StubRoutines::code1() != NULL)
 554       range_check(masm, lr, rscratch1,
 555                   StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
 556                   L_ok);
 557     if (StubRoutines::code2() != NULL)
 558       range_check(masm, lr, rscratch1,
 559                   StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
 560                   L_ok);
 561     const char* msg = "i2c adapter must return to an interpreter frame";
 562     __ block_comment(msg);
 563     __ stop(msg);
 564     __ bind(L_ok);
 565     __ block_comment("} verify_i2ce ");
 566   }
 567 
 568   const int stack_space = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
 569   const int ld_high = total_args_passed *Interpreter::stackElementSize;
 570   // Point to interpreter value (vs. tag)
 571   const int next_off =  -Interpreter::stackElementSize; // offset from ld ptr
 572   const Register loadCounter = lr;
 573 
 574   // Align sp to StackAlignmentInBytes so compiled frame starts always aligned
 575   // This is required by APCS, so all native code depends on it. The compiled
 576   // Java code is not required to follow this standard however doing so
 577   // simplifies the code because allows to have fixed size for compiled frames
 578   __ mov(rscratch2, sp);
 579   __ align_stack();
 580   if(total_args_passed) {
 581     // put below reserved stack space, imm12 should be enough
 582     __ str(loadCounter, Address(sp, -(stack_space + wordSize)));
 583 
 584     if(__ is_valid_for_imm12(ld_high)) {
 585         __ add(loadCounter, rscratch2, ld_high);
 586     } else {
 587         // add operates encoded imm12, we need plain
 588         __ mov(rscratch1, ld_high);
 589         __ add(loadCounter, rscratch2, rscratch1);
 590     }
 591   }
 592 
 593   if(comp_args_on_stack) {
 594     if(__ is_valid_for_imm12(stack_space)) {
 595         __ sub(sp, sp, stack_space);
 596     } else {
 597         // add operates encoded imm12, we need plain
 598         __ mov(rscratch1, stack_space);
 599         __ sub(sp, sp, rscratch1);
 600     }
 601   }
 602 
 603   // +------+   -> r4
 604   // |   0  | \
 605   // |   1  |  \
 606   // |   2  |   - >  Load in argument order going down.
 607   // |   x  |  /
 608   // |   N  | /
 609   // +------+ -> inital sp
 610   // | pad  | maybe 1 word to align the stack to 8 bytes
 611   // |   M  | \
 612   // |   x  |  \
 613   // |   2  |    ->  Load in argument order going up.
 614   // |   1  |  /
 615   // |   0  | /
 616   // +------+ ->
 617 
 618 
 619   int sp_offset = 0;
 620 
 621   // Now generate the shuffle code.
 622   for (int i = 0; i < total_args_passed; i++) {
 623 
 624     if (sig_bt[i] == T_VOID) {
 625       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 626       continue;
 627     }
 628 
 629     // Pick up 0, 1 or 2 words from SP+offset.
 630 
 631     //
 632     //
 633     //
 634     VMReg r_1 = regs[i].first();
 635     VMReg r_2 = regs[i].second();
 636     if (!r_1->is_valid()) {
 637       assert(!r_2->is_valid(), "");
 638       continue;
 639     }
 640 
 641     if (r_2->is_valid()) {
 642       assert(i + 1 < total_args_passed && sig_bt[i + 1] == T_VOID, "going to overrwrite reg_2 value");
 643     }
 644 
 645     if (r_1->is_stack()) {
 646       // Convert stack slot to an SP offset
 647       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size - sp_offset;
 648 
 649       if (!r_2->is_valid()) {
 650         __ ldr(rscratch2, Address(loadCounter, next_off, Address::pre));
 651         __ str(rscratch2, Address(sp, st_off));
 652       } else {
 653         int tmp_off = st_off;
 654         if(abs(st_off) > (255 << 2)) {
 655             //st_off doesn't fit imm8 required by strd
 656 
 657             if(__ is_valid_for_imm12(st_off)) {
 658                 __ add(sp, sp, st_off);
 659             } else {
 660                 // add operates encoded imm12, NOT plain
 661                 __ mov(rscratch1, st_off);
 662                 __ add(sp, sp, rscratch1);
 663             }
 664             tmp_off = 0;
 665             sp_offset += st_off;
 666         }
 667 
 668 
 669         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 670         // are accessed as negative so LSW is at LOW address
 671 
 672         // this can be a misaligned move
 673     __ ldrd(rscratch1, rscratch2, Address(loadCounter, 2 * next_off, Address::pre));
 674     __ strd(rscratch1, rscratch2, Address(sp, tmp_off));
 675       }
 676     } else if (r_1->is_Register()) {  // Register argument
 677       Register r = r_1->as_Register();
 678       assert(r != loadCounter, "loadCounter is reloaded");
 679       if (r_2->is_valid()) {
 680         assert(r_2->as_Register() != loadCounter, "loadCounter is reloaded");
 681         // this can be a misaligned move
 682         // ldrd can handle inconsecutive registers
 683         __ ldrd(r, r_2->as_Register(), Address(loadCounter, 2 * next_off, Address::pre));
 684       } else {
 685         __ ldr(r, Address(loadCounter, next_off, Address::pre));
 686       }
 687     } else {
 688       assert(r_1->is_FloatRegister(), "");
 689       if (!r_2->is_valid()) {
 690         // Can't do pre or post addressing for vldr, vstr
 691         __ add(loadCounter, loadCounter, next_off);
 692         __ vldr_f32(r_1->as_FloatRegister(), Address(loadCounter));
 693       } else {
 694     // TODO assert(r_2->is_FloatRegister() && r_2->as_FloatRegister() == r_1->as_FloatRegister() + 1, "");
 695         // Can't do pre or post addressing for vldr, vstr
 696         __ add(loadCounter, loadCounter, 2 * next_off);
 697         __ vldr_f64(r_1->as_FloatRegister(), Address(loadCounter));
 698       }
 699     }
 700   }
 701 
 702   // restore sp
 703   if(sp_offset) {
 704     if(__ is_valid_for_imm12(sp_offset)) {
 705         __ sub(sp, sp, sp_offset);
 706     } else {
 707         // add operates encoded imm12, we need plain
 708         __ mov(rscratch1, sp_offset);
 709         __ sub(sp, sp, rscratch1);
 710     }
 711   }
 712 
 713   if(total_args_passed) {
 714     // restore loadCounter
 715     __ ldr(loadCounter, Address(sp, -wordSize));
 716   }
 717 
 718   // 6243940 We might end up in handle_wrong_method if
 719   // the callee is deoptimized as we race thru here. If that
 720   // happens we don't want to take a safepoint because the
 721   // caller frame will look interpreted and arguments are now
 722   // "compiled" so it is much better to make this transition
 723   // invisible to the stack walking code. Unfortunately if
 724   // we try and find the callee by normal means a safepoint
 725   // is possible. So we stash the desired callee in the thread
 726   // and the vm will find there should this case occur.
 727 
 728   __ str(rmethod, Address(rthread, JavaThread::callee_target_offset()));
 729 
 730   // Will jump to the compiled code just as if compiled code was doing it.
 731   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
 732   __ b(rscratch1);
 733 }
 734 
 735 // ---------------------------------------------------------------
 736 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 737                                                             int total_args_passed,
 738                                                             int comp_args_on_stack,
 739                                                             const BasicType *sig_bt,
 740                                                             const VMRegPair *regs,
 741                                                             AdapterFingerPrint* fingerprint) {
 742   address i2c_entry = __ pc();
 743   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 744 
 745   address c2i_unverified_entry = __ pc();
 746   Label skip_fixup;
 747 
 748   Label ok;
 749 
 750   Register holder = rscratch2;
 751   Register receiver = j_rarg0;
 752   Register tmp = r8;  // A call-clobbered register not used for arg passing
 753 
 754   // -------------------------------------------------------------------------
 755   // Generate a C2I adapter.  On entry we know rmethod holds the Method* during calls
 756   // to the interpreter.  The args start out packed in the compiled layout.  They
 757   // need to be unpacked into the interpreter layout.  This will almost always
 758   // require some stack space.  We grow the current (compiled) stack, then repack
 759   // the args.  We  finally end in a jump to the generic interpreter entry point.
 760   // On exit from the interpreter, the interpreter will restore our SP (lest the
 761   // compiled code, which relys solely on SP and not FP, get sick).
 762 
 763   {
 764     __ block_comment("c2i_unverified_entry {");
 765     __ load_klass(rscratch1, receiver);
 766     __ ldr(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
 767     __ cmp(rscratch1, tmp);
 768     __ ldr(rmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
 769     __ b(ok, Assembler::EQ);
 770     __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 771 
 772     __ bind(ok);
 773     // Method might have been compiled since the call site was patched to
 774     // interpreted; if that is the case treat it as a miss so we can get
 775     // the call site corrected.
 776     __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
 777     __ cbz(rscratch1, skip_fixup);
 778     __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 779     __ block_comment("} c2i_unverified_entry");
 780   }
 781 
 782   address c2i_entry = __ pc();
 783 
 784   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 785 
 786   __ flush();
 787   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 788 }
 789 
 790 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 791                                          VMRegPair *regs,
 792                                          VMRegPair *regs2,
 793                                          int total_args_passed) {
 794   assert(regs2 == NULL, "not needed on AArch32");
 795 
 796 // We return the amount of VMRegImpl stack slots we need to reserve for all
 797 // the arguments NOT counting out_preserve_stack_slots.
 798 
 799     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
 800       c_rarg0, c_rarg1, c_rarg2, c_rarg3
 801     };
 802 #ifdef HARD_FLOAT_CC
 803     const int FP_ArgReg_N = 16;
 804     static const FloatRegister FP_ArgReg[] = {
 805       f0, f1, f2, f3,
 806       f4, f5, f6, f7,
 807       f8, f9, f10, f11,
 808       f12, f13, f14, f15,
 809     };
 810     unsigned long fp_free_mask = (1 << FP_ArgReg_N) - 1;
 811     uint fp_args = 0;
 812 #endif //HARD_FLOAT_CC
 813 
 814     uint int_args = 0;
 815     uint stk_args = 0;
 816 
 817     for (int i = 0; i < total_args_passed; i++) {
 818       switch (sig_bt[i]) {
 819       case T_BOOLEAN:
 820       case T_CHAR:
 821       case T_BYTE:
 822       case T_SHORT:
 823       case T_INT:
 824       case T_OBJECT:
 825       case T_ARRAY:
 826       case T_ADDRESS:
 827       case T_METADATA:
 828 #ifndef HARD_FLOAT_CC
 829       // soft FP case
 830       case T_FLOAT:
 831 #endif
 832         if (int_args < Argument::n_int_register_parameters_c) {
 833           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 834         } else {
 835           regs[i].set1(VMRegImpl::stack2reg(stk_args));
 836           stk_args += 1;
 837         }
 838         break;
 839 #ifndef HARD_FLOAT_CC
 840       // soft FP case
 841       case  T_DOUBLE:
 842 #endif
 843       case T_LONG:
 844         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 845         if (int_args + 1 < Argument::n_int_register_parameters_c) {
 846           if ((int_args % 2) != 0) {
 847             ++int_args;
 848           }
 849           regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
 850           int_args += 2;
 851         } else {
 852           if (stk_args % 2 != 0) {
 853             ++stk_args;
 854           }
 855           regs[i].set2(VMRegImpl::stack2reg(stk_args));
 856           stk_args += 2;
 857           int_args = Argument::n_int_register_parameters_c;
 858         }
 859         break;
 860 #ifdef HARD_FLOAT_CC
 861       case T_FLOAT:
 862         if (fp_free_mask & ((1 << FP_ArgReg_N)-1)) {
 863           unsigned index = __builtin_ctz(fp_free_mask);
 864           regs[i].set1(FP_ArgReg[index]->as_VMReg());
 865           fp_free_mask &= ~(1 << index);
 866           fp_args += 2 * ((~index) & 1);
 867         } else {
 868           regs[i].set1(VMRegImpl::stack2reg(stk_args));
 869           stk_args += 1;
 870         }
 871         break;
 872       case T_DOUBLE:
 873         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 874         if (fp_args + 1 < FP_ArgReg_N) {
 875           fp_free_mask &= ~(3 << fp_args);
 876           regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
 877           fp_args += 2;
 878         } else {
 879           regs[i].set2(VMRegImpl::stack2reg(stk_args));
 880           stk_args += 2;
 881         }
 882         break;
 883 #endif //HARD_FLOAT_CC
 884       case T_VOID: // Halves of longs and doubles
 885         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 886         regs[i].set_bad();
 887         break;
 888       default:
 889         ShouldNotReachHere();
 890         break;
 891       }
 892     }
 893 
 894   return align_up(stk_args, StackAlignmentInBytes/wordSize);
 895 }
 896 
 897 // On 64 bit we will store integer like items to the stack as
 898 // 64 bits items (sparc abi) even though java would only store
 899 // 32bits for a parameter. On 32bit it will simply be 32 bits
 900 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
 901 
 902 static void move_int(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
 903   if (src.first()->is_stack()) {
 904     if (dst.first()->is_stack()) {
 905       // stack to stack
 906       __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
 907       __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
 908     } else {
 909       // stack to reg
 910       __ ldr(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first())));
 911     }
 912   } else if (dst.first()->is_stack()) {
 913     // reg to stack
 914     __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
 915   } else {
 916     if (dst.first() != src.first()) {
 917       __ mov(dst.first()->as_Register(), src.first()->as_Register());
 918     }
 919   }
 920 }
 921 
 922 // An oop arg. Must pass a handle not the oop itself
 923 static void object_move(MacroAssembler* masm,
 924                         OopMap* map,
 925                         int oop_handle_offset,
 926                         int framesize_in_slots,
 927                         VMRegPair src,
 928                         VMRegPair dst,
 929                         bool is_receiver,
 930                         int* receiver_offset) {
 931 
 932   // must pass a handle. First figure out the location we use as a handle
 933 
 934   Register rHandle = dst.first()->is_stack() ? rscratch2 : dst.first()->as_Register();
 935 
 936   // See if oop is NULL if it is we need no handle
 937 
 938   if (src.first()->is_stack()) {
 939 
 940     // Oop is already on the stack as an argument
 941     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 942     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
 943     if (is_receiver) {
 944       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
 945     }
 946 
 947     __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
 948     __ lea(rHandle, Address(rfp, reg2offset_in(src.first())));
 949     // conditionally move a NULL
 950     __ cmp(rscratch1, 0);
 951     __ mov(rHandle, 0, Assembler::EQ);
 952   } else {
 953 
 954     // Oop is in an a register we must store it to the space we reserve
 955     // on the stack for oop_handles and pass a handle if oop is non-NULL
 956 
 957     const Register rOop = src.first()->as_Register();
 958     int oop_slot;
 959     if (rOop == j_rarg0)
 960       oop_slot = 0;
 961     else if (rOop == j_rarg1)
 962       oop_slot = 1;
 963     else if (rOop == j_rarg2)
 964       oop_slot = 2;
 965     else {
 966       assert(rOop == j_rarg3, "wrong register");
 967       oop_slot = 3;
 968     }
 969 
 970     oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
 971     int offset = oop_slot*VMRegImpl::stack_slot_size;
 972 
 973     map->set_oop(VMRegImpl::stack2reg(oop_slot));
 974     // Store oop in handle area, may be NULL
 975     __ str(rOop, Address(sp, offset));
 976     if (is_receiver) {
 977       *receiver_offset = offset;
 978     }
 979 
 980     __ cmp(rOop, 0);
 981     __ lea(rHandle, Address(sp, offset));
 982     // conditionally move a NULL
 983     __ mov(rHandle, 0, Assembler::EQ);
 984   }
 985 
 986   // If arg is on the stack then place it otherwise it is already in correct reg.
 987   if (dst.first()->is_stack()) {
 988     __ str(rHandle, Address(sp, reg2offset_out(dst.first())));
 989   }
 990 }
 991 
 992 // A float arg may have to do float reg int reg conversion
 993 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
 994     if(hasFPU()) {
 995         if (src.first()->is_stack()) {
 996           if (dst.first()->is_stack()) {
 997             // stack to stack
 998             // Have no vfp scratch registers, so copy via gpr
 999             __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first())));
1000             __ str(rscratch1, Address(sp, reg2offset_out(dst.first())));
1001           } else {
1002             // stack to reg
1003             __ vldr_f32(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
1004           }
1005         } else if (dst.first()->is_stack()) {
1006           // reg to stack
1007           __ vstr_f32(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
1008         } else {
1009 #ifndef HARD_FLOAT_CC
1010             if(dst.first()->is_Register()) {
1011                 __ vmov_f32(dst.first()->as_Register(), src.first()->as_FloatRegister());
1012             } else
1013 #endif
1014             if (dst.first() != src.first()) {
1015                  __ vmov_f32(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1016             }
1017         }
1018     } else {
1019         move_int(masm, src, dst);
1020     }
1021 }
1022 
1023 // A long move
1024 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1025   if (src.first()->is_stack()) {
1026     if (dst.first()->is_stack()) {
1027       // stack to stack
1028       __ ldrd(rscratch1, rscratch2, Address(rfp, reg2offset_in(src.first())));
1029       __ strd(rscratch1, rscratch2, Address(sp, reg2offset_out(dst.first())));
1030     } else {
1031       // stack to reg
1032       __ ldrd(dst.first()->as_Register(), dst.second()->as_Register(),
1033       Address(rfp, reg2offset_in(src.first())));
1034     }
1035   } else if (dst.first()->is_stack()) {
1036     // reg to stack
1037     __ strd(src.first()->as_Register(), src.second()->as_Register(),
1038     Address(sp, reg2offset_out(dst.first())));
1039   } else {
1040     // reg to reg
1041     if (dst.first() != src.first()) {
1042       if (dst.first() != src.second()) {
1043         __ mov(dst.first()->as_Register(), src.first()->as_Register());
1044         __ mov(dst.second()->as_Register(), src.second()->as_Register());
1045       } else {
1046         __ mov(dst.second()->as_Register(), src.second()->as_Register());
1047         __ mov(dst.first()->as_Register(), src.first()->as_Register());
1048       }
1049     }
1050   }
1051 }
1052 
1053 // A double move
1054 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1055   if(hasFPU()) {
1056     if (src.first()->is_stack()) {
1057       if (dst.first()->is_stack()) {
1058         // stack to stack
1059         // Have no vfp scratch registers, so copy via gpr
1060         __ ldrd(rscratch1, rscratch2, Address(rfp, reg2offset_in(src.first())));
1061         __ strd(rscratch1, rscratch2, Address(sp, reg2offset_out(dst.first())));
1062       } else {
1063         // stack to reg
1064         __ vldr_f64(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
1065       }
1066     } else if (dst.first()->is_stack()) {
1067       // reg to stack
1068       __ vstr_f64(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first())));
1069     } else {
1070 #ifndef HARD_FLOAT_CC
1071         if(dst.first()->is_Register()) {
1072             __ vmov_f64(dst.first()->as_Register(), dst.second()->as_Register(), src.first()->as_FloatRegister());
1073         } else
1074 #endif
1075         if (dst.first() != src.first()) {
1076            __ vmov_f64(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1077         }
1078       }
1079   } else {
1080     long_move(masm, src, dst);
1081   }
1082 }
1083 
1084 
1085 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1086   // We always ignore the frame_slots arg and just use the space just below frame pointer
1087   // which by this time is free to use
1088   switch (ret_type) {
1089   case T_DOUBLE:
1090 #ifdef HARD_FLOAT_CC
1091     __ vstr_f64(d0, Address(rfp, -(frame::get_frame_size() + 1) * wordSize));
1092     break;
1093 #endif//fall through otherwise
1094   case T_LONG:
1095     __ strd(r0, r1, Address(rfp, -(frame::get_frame_size() + 1) * wordSize));
1096     break;
1097   case T_VOID:
1098     break;
1099   case T_FLOAT:
1100 #ifdef HARD_FLOAT_CC
1101     __ vstr_f32(f0, Address(rfp, -frame::get_frame_size() * wordSize));
1102     break;
1103 #endif//fall through otherwise
1104   default:
1105     __ str(r0, Address(rfp, -frame::get_frame_size() * wordSize));
1106     break;
1107   }
1108 }
1109 
1110 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1111   // We always ignore the frame_slots arg and just use the space just below frame pointer
1112   // which by this time is free to use
1113   switch (ret_type) {
1114   case T_DOUBLE:
1115 #ifdef HARD_FLOAT_CC
1116     __ vldr_f64(d0, Address(rfp, -(frame::get_frame_size() + 1) * wordSize));
1117     break;
1118 #endif//fall through otherwise
1119   case T_LONG:
1120     __ ldrd(r0, r1, Address(rfp, -(frame::get_frame_size() + 1) * wordSize));
1121     break;
1122   case T_VOID:
1123     break;
1124   case T_FLOAT:
1125 #ifdef HARD_FLOAT_CC
1126     __ vldr_f32(d0, Address(rfp, -frame::get_frame_size() * wordSize));
1127     break;
1128 #endif//fall through otherwise
1129   default:
1130     __ ldr(r0, Address(rfp, -frame::get_frame_size() * wordSize));
1131     break;
1132   }
1133 }
1134 
1135 static int save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1136   RegSet x;
1137   int saved_slots = 0;
1138   for ( int i = first_arg ; i < arg_count ; i++ ) {
1139     if (args[i].first()->is_Register()) {
1140       x = x + args[i].first()->as_Register();
1141       ++saved_slots;
1142     }
1143     if (args[i].second()->is_Register()) {
1144       x = x + args[i].second()->as_Register();
1145       ++saved_slots;
1146     }
1147 #ifdef HARD_FLOAT_CC
1148     else if (args[i].first()->is_FloatRegister()) {
1149       FloatRegister fr = args[i].first()->as_FloatRegister();
1150 
1151       if (args[i].second()->is_FloatRegister()) {
1152         assert(args[i].is_single_phys_reg(), "doubles should be 2 consequents float regs");
1153         __ decrement(sp, 2 * wordSize);
1154         __ vstr_f64(fr, Address(sp));
1155         saved_slots += 2;
1156       } else {
1157         __ decrement(sp, wordSize);
1158         __ vstr_f32(fr, Address(sp));
1159         ++saved_slots;
1160       }
1161     }
1162 #endif//HARD_FLOAT_CC
1163   }
1164   __ push(x, sp);
1165   return saved_slots;
1166 }
1167 
1168 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1169   RegSet x;
1170   for ( int i = first_arg ; i < arg_count ; i++ ) {
1171     if (args[i].first()->is_Register()) {
1172       x = x + args[i].first()->as_Register();
1173     } else {
1174       ;
1175     }
1176     if (args[i].second()->is_Register()) {
1177       x = x + args[i].second()->as_Register();
1178     }
1179   }
1180   __ pop(x, sp);
1181   for ( int i = first_arg ; i < arg_count ; i++ ) {
1182     if (args[i].first()->is_Register()) {
1183       ;
1184     }
1185 #ifdef HARD_FLOAT_CC
1186     else if (args[i].first()->is_FloatRegister()) {
1187       FloatRegister fr = args[i].first()->as_FloatRegister();
1188 
1189       if (args[i].second()->is_FloatRegister()) {
1190     assert(args[i].is_single_phys_reg(), "doubles should be 2 consequents float regs");
1191     __ vstr_f64(fr, Address(sp));
1192         __ increment(sp, 2 * wordSize);
1193       } else {
1194     __ vstr_f32(fr, Address(sp));
1195         __ increment(sp, wordSize);
1196       }
1197     }
1198 #endif//HARD_FLOAT_CC
1199   }
1200 }
1201 
1202 
1203 // Check GCLocker::needs_gc and enter the runtime if it's true.  This
1204 // keeps a new JNI critical region from starting until a GC has been
1205 // forced.  Save down any oops in registers and describe them in an
1206 // OopMap.
1207 static void check_needs_gc_for_critical_native(MacroAssembler* masm,
1208                                                int stack_slots,
1209                                                int total_c_args,
1210                                                int total_in_args,
1211                                                int arg_save_area,
1212                                                OopMapSet* oop_maps,
1213                                                VMRegPair* in_regs,
1214                                                BasicType* in_sig_bt) { Unimplemented(); }
1215 
1216 // Unpack an array argument into a pointer to the body and the length
1217 // if the array is non-null, otherwise pass 0 for both.
1218 static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); }
1219 
1220 
1221 class ComputeMoveOrder: public StackObj {
1222   class MoveOperation: public ResourceObj {
1223     friend class ComputeMoveOrder;
1224    private:
1225     VMRegPair        _src;
1226     VMRegPair        _dst;
1227     int              _src_index;
1228     int              _dst_index;
1229     bool             _processed;
1230     MoveOperation*  _next;
1231     MoveOperation*  _prev;
1232 
1233     static int get_id(VMRegPair r) { Unimplemented(); return 0; }
1234 
1235    public:
1236     MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst):
1237       _src(src)
1238     , _src_index(src_index)
1239     , _dst(dst)
1240     , _dst_index(dst_index)
1241     , _next(NULL)
1242     , _prev(NULL)
1243     , _processed(false) { Unimplemented(); }
1244 
1245     VMRegPair src() const              { Unimplemented(); return _src; }
1246     int src_id() const                 { Unimplemented(); return 0; }
1247     int src_index() const              { Unimplemented(); return 0; }
1248     VMRegPair dst() const              { Unimplemented(); return _src; }
1249     void set_dst(int i, VMRegPair dst) { Unimplemented(); }
1250     int dst_index() const              { Unimplemented(); return 0; }
1251     int dst_id() const                 { Unimplemented(); return 0; }
1252     MoveOperation* next() const        { Unimplemented(); return 0; }
1253     MoveOperation* prev() const        { Unimplemented(); return 0; }
1254     void set_processed()               { Unimplemented(); }
1255     bool is_processed() const          { Unimplemented(); return 0; }
1256 
1257     // insert
1258     void break_cycle(VMRegPair temp_register) { Unimplemented(); }
1259 
1260     void link(GrowableArray<MoveOperation*>& killer) { Unimplemented(); }
1261   };
1262 
1263  private:
1264   GrowableArray<MoveOperation*> edges;
1265 
1266  public:
1267   ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs,
1268                     BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); }
1269 
1270   // Collected all the move operations
1271   void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); }
1272 
1273   // Walk the edges breaking cycles between moves.  The result list
1274   // can be walked in order to produce the proper set of loads
1275   GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; }
1276 };
1277 
1278 
1279 static void rt_call(MacroAssembler* masm, address dest) {
1280   CodeBlob *cb = CodeCache::find_blob(dest);
1281   if (cb) {
1282     __ far_call(RuntimeAddress(dest), NULL);
1283   } else {
1284     __ lea(rscratch2, RuntimeAddress(dest));
1285     __ bl(rscratch2);
1286     __ maybe_isb();
1287   }
1288 }
1289 
1290 static void verify_oop_args(MacroAssembler* masm,
1291                             const methodHandle &method,
1292                             const BasicType* sig_bt,
1293                             const VMRegPair* regs) {
1294   Register temp_reg = rscratch2;  // not part of any compiled calling seq
1295   if (VerifyOops) {
1296     for (int i = 0; i < method->size_of_parameters(); i++) {
1297       if (sig_bt[i] == T_OBJECT ||
1298           sig_bt[i] == T_ARRAY) {
1299         VMReg r = regs[i].first();
1300         assert(r->is_valid(), "bad oop arg");
1301         if (r->is_stack()) {
1302           __ ldr(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1303           __ verify_oop(temp_reg);
1304         } else {
1305           __ verify_oop(r->as_Register());
1306         }
1307       }
1308     }
1309   }
1310 }
1311 
1312 static void gen_special_dispatch(MacroAssembler* masm,
1313                                  const methodHandle &method,
1314                                  const BasicType* sig_bt,
1315                                  const VMRegPair* regs) {
1316   verify_oop_args(masm, method, sig_bt, regs);
1317   vmIntrinsics::ID iid = method->intrinsic_id();
1318 
1319   // Now write the args into the outgoing interpreter space
1320   bool     has_receiver   = false;
1321   Register receiver_reg   = noreg;
1322   int      member_arg_pos = -1;
1323   Register member_reg     = noreg;
1324   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1325   if (ref_kind != 0) {
1326     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1327     member_reg = r4;
1328     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1329   } else if (iid == vmIntrinsics::_invokeBasic) {
1330     has_receiver = true;
1331   } else {
1332     fatal("unexpected intrinsic id %d", iid);
1333   }
1334 
1335   if (member_reg != noreg) {
1336     // Load the member_arg into register, if necessary.
1337     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1338     VMReg r = regs[member_arg_pos].first();
1339     if (r->is_stack()) {
1340       __ ldr(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1341     } else {
1342       // no data motion is needed
1343       member_reg = r->as_Register();
1344     }
1345   }
1346 
1347   if (has_receiver) {
1348     // Make sure the receiver is loaded into a register.
1349     assert(method->size_of_parameters() > 0, "oob");
1350     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1351     VMReg r = regs[0].first();
1352     assert(r->is_valid(), "bad receiver arg");
1353     if (r->is_stack()) {
1354       // Porting note:  This assumes that compiled calling conventions always
1355       // pass the receiver oop in a register.  If this is not true on some
1356       // platform, pick a temp and load the receiver from stack.
1357       fatal("receiver always in a register");
1358     } else {
1359       // no data motion is needed
1360       receiver_reg = r->as_Register();
1361     }
1362   }
1363 
1364   // Figure out which address we are really jumping to:
1365   MethodHandles::generate_method_handle_dispatch(masm, iid,
1366                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1367 }
1368 
1369 // ---------------------------------------------------------------------------
1370 // Generate a native wrapper for a given method.  The method takes arguments
1371 // in the Java compiled code convention, marshals them to the native
1372 // convention (handlizes oops, etc), transitions to native, makes the call,
1373 // returns to java state (possibly blocking), unhandlizes any result and
1374 // returns.
1375 //
1376 // Critical native functions are a shorthand for the use of
1377 // GetPrimtiveArrayCritical and disallow the use of any other JNI
1378 // functions.  The wrapper is expected to unpack the arguments before
1379 // passing them to the callee and perform checks before and after the
1380 // native call to ensure that they GC_locker
1381 // lock_critical/unlock_critical semantics are followed.  Some other
1382 // parts of JNI setup are skipped like the tear down of the JNI handle
1383 // block and the check for pending exceptions it's impossible for them
1384 // to be thrown.
1385 //
1386 // They are roughly structured like this:
1387 //    if (GC_locker::needs_gc())
1388 //      SharedRuntime::block_for_jni_critical();
1389 //    tranistion to thread_in_native
1390 //    unpack arrray arguments and call native entry point
1391 //    check for safepoint in progress
1392 //    check if any thread suspend flags are set
1393 //      call into JVM and possible unlock the JNI critical
1394 //      if a GC was suppressed while in the critical native.
1395 //    transition back to thread_in_Java
1396 //    return to caller
1397 //
1398 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1399                                                 const methodHandle& method,
1400                                                 int compile_id,
1401                                                 BasicType* in_sig_bt,
1402                                                 VMRegPair* in_regs,
1403                                                 BasicType ret_type) {
1404   if (method->is_method_handle_intrinsic()) {
1405     vmIntrinsics::ID iid = method->intrinsic_id();
1406     intptr_t start = (intptr_t)__ pc();
1407     int vep_offset = ((intptr_t)__ pc()) - start;
1408 
1409     // First instruction must be a nop as it may need to be patched on deoptimisation
1410     __ nop();
1411     gen_special_dispatch(masm,
1412                          method,
1413                          in_sig_bt,
1414                          in_regs);
1415     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1416     __ flush();
1417     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1418     return nmethod::new_native_nmethod(method,
1419                                        compile_id,
1420                                        masm->code(),
1421                                        vep_offset,
1422                                        frame_complete,
1423                                        stack_slots / VMRegImpl::slots_per_word,
1424                                        in_ByteSize(-1),
1425                                        in_ByteSize(-1),
1426                                        (OopMapSet*)NULL);
1427   }
1428 
1429   bool is_critical_native = true;
1430   address native_func = method->critical_native_function();
1431   if (native_func == NULL) {
1432     native_func = method->native_function();
1433     is_critical_native = false;
1434   }
1435   assert(native_func != NULL, "must have function");
1436 
1437   // An OopMap for lock (and class if static)
1438   OopMapSet *oop_maps = new OopMapSet();
1439   intptr_t start = (intptr_t)__ pc();
1440 
1441   // We have received a description of where all the java arg are located
1442   // on entry to the wrapper. We need to convert these args to where
1443   // the jni function will expect them. To figure out where they go
1444   // we convert the java signature to a C signature by inserting
1445   // the hidden arguments as arg[0] and possibly arg[1] (static method)
1446 
1447   const int total_in_args = method->size_of_parameters();
1448   int total_c_args = total_in_args;
1449   if (!is_critical_native) {
1450     total_c_args += 1;
1451     if (method->is_static()) {
1452       total_c_args++;
1453     }
1454   } else {
1455     for (int i = 0; i < total_in_args; i++) {
1456       if (in_sig_bt[i] == T_ARRAY) {
1457         total_c_args++;
1458       }
1459     }
1460   }
1461 
1462   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1463   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1464   BasicType* in_elem_bt = NULL;
1465 
1466   int argc = 0;
1467   if (!is_critical_native) {
1468     out_sig_bt[argc++] = T_ADDRESS;
1469     if (method->is_static()) {
1470       out_sig_bt[argc++] = T_OBJECT;
1471     }
1472 
1473     for (int i = 0; i < total_in_args ; i++ ) {
1474       out_sig_bt[argc++] = in_sig_bt[i];
1475     }
1476   } else {
1477     Thread* THREAD = Thread::current();
1478     in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1479     SignatureStream ss(method->signature());
1480     for (int i = 0; i < total_in_args ; i++ ) {
1481       if (in_sig_bt[i] == T_ARRAY) {
1482         // Arrays are passed as int, elem* pair
1483         out_sig_bt[argc++] = T_INT;
1484         out_sig_bt[argc++] = T_ADDRESS;
1485         Symbol* atype = ss.as_symbol(CHECK_NULL);
1486         const char* at = atype->as_C_string();
1487         if (strlen(at) == 2) {
1488           assert(at[0] == '[', "must be");
1489           switch (at[1]) {
1490             case 'B': in_elem_bt[i]  = T_BYTE; break;
1491             case 'C': in_elem_bt[i]  = T_CHAR; break;
1492             case 'D': in_elem_bt[i]  = T_DOUBLE; break;
1493             case 'F': in_elem_bt[i]  = T_FLOAT; break;
1494             case 'I': in_elem_bt[i]  = T_INT; break;
1495             case 'J': in_elem_bt[i]  = T_LONG; break;
1496             case 'S': in_elem_bt[i]  = T_SHORT; break;
1497             case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
1498             default: ShouldNotReachHere();
1499           }
1500         }
1501       } else {
1502         out_sig_bt[argc++] = in_sig_bt[i];
1503         in_elem_bt[i] = T_VOID;
1504       }
1505       if (in_sig_bt[i] != T_VOID) {
1506         assert(in_sig_bt[i] == ss.type(), "must match");
1507         ss.next();
1508       }
1509     }
1510   }
1511 
1512   // Now figure out where the args must be stored and how much stack space
1513   // they require.
1514   int out_arg_slots;
1515   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
1516 
1517   // Compute framesize for the wrapper.  We need to handlize all oops in
1518   // incoming registers
1519 
1520   // Calculate the total number of stack slots we will need.
1521 
1522   // First count the abi requirement plus all of the outgoing args
1523   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1524 
1525   // Now the space for the inbound oop handle area
1526   int total_save_slots = -1;
1527   if (is_critical_native) {
1528     // Critical natives may have to call out so they need a save area
1529     // for register arguments.
1530     int double_slots = 0;
1531     int single_slots = 0;
1532     for ( int i = 0; i < total_in_args; i++) {
1533       if (in_regs[i].first()->is_Register()) {
1534         const Register reg = in_regs[i].first()->as_Register();
1535         switch (in_sig_bt[i]) {
1536           case T_ARRAY:  // critical array (uses 2 slots on LP64)
1537           case T_BOOLEAN:
1538           case T_BYTE:
1539           case T_SHORT:
1540           case T_CHAR:
1541           case T_INT:  single_slots++; break;
1542           case T_LONG: double_slots++; break;
1543           default:  ShouldNotReachHere();
1544         }
1545       } else
1546 #ifdef HARD_FLOAT_CC
1547           if (in_regs[i].first()->is_FloatRegister())
1548 #endif // HARD_FLOAT_CC
1549             ShouldNotReachHere();
1550     }
1551     total_save_slots = double_slots * 2 + single_slots;
1552     // align the save area
1553     if (double_slots != 0) {
1554       stack_slots = align_up(stack_slots, 2);
1555     }
1556   } else {
1557     total_save_slots = 4 * VMRegImpl::slots_per_word;  // 4 arguments passed in registers
1558   }
1559   assert(total_save_slots != -1, "initialize total_save_slots!");
1560 
1561   int oop_handle_offset = stack_slots;
1562   stack_slots += total_save_slots;
1563 
1564   // Now any space we need for handlizing a klass if static method
1565 
1566   int klass_slot_offset = 0;
1567   int klass_offset = -1;
1568   int lock_slot_offset = 0;
1569   bool is_static = false;
1570 
1571   if (method->is_static()) {
1572     klass_slot_offset = stack_slots;
1573     stack_slots += VMRegImpl::slots_per_word;
1574     klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1575     is_static = true;
1576   }
1577 
1578   // Plus a lock if needed
1579 
1580   if (method->is_synchronized()) {
1581     lock_slot_offset = stack_slots;
1582     stack_slots += VMRegImpl::slots_per_word;
1583   }
1584 
1585   const int enter_frame_size = frame::get_frame_size();
1586 
1587   // Now a place (+2) to save return values or temp during shuffling
1588   // + {2,4} words which pushed by enter()
1589   // (return address (which we own), saved rfp, ...)
1590   stack_slots += 2 + enter_frame_size;
1591 
1592   // Ok The space we have allocated will look like:
1593   //
1594   //
1595   // FP-> | saved lr            |
1596   //      |---------------------|
1597   //      | saved fp            |
1598   //      |---------------------|
1599   //      | 2 slots for moves   |
1600   //      |.....................|
1601   //      | 1 slot opt padding  |
1602   //      |---------------------|
1603   //      | lock box (if sync)  |
1604   //      |---------------------| <- lock_slot_offset
1605   //      | klass (if static)   |
1606   //      |---------------------| <- klass_slot_offset
1607   //      | oopHandle area      |
1608   //      |---------------------| <- oop_handle_offset (8 java arg registers)
1609   //      | outbound memory     |
1610   //      | based arguments     |
1611   //      |                     |
1612   //      |---------------------|
1613   //      |                     |
1614   // SP-> | out_preserved_slots |
1615   //
1616   //
1617 
1618 
1619   // Now compute actual number of stack words we need rounding to make
1620   // stack properly aligned.
1621   stack_slots = align_up(stack_slots, StackAlignmentInSlots);
1622 
1623   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1624 
1625   // First thing make an ic check to see if we should even be here
1626 
1627   // We are free to use all registers as temps without saving them and
1628   // restoring them except rfp. rfp is the only callee save register
1629   // as far as the interpreter and the compiler(s) are concerned.
1630 
1631 
1632   const Register ic_reg = rscratch2;
1633   const Register receiver = j_rarg0;
1634 
1635   Label hit;
1636   Label exception_pending;
1637 
1638   assert_different_registers(ic_reg, receiver, rscratch1);
1639   __ verify_oop(receiver);
1640   __ cmp_klass(receiver, ic_reg, rscratch1);
1641   __ b(hit, Assembler::EQ);
1642 
1643   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1644 
1645   // Verified entry point must be aligned
1646   __ align(8);
1647 
1648   __ bind(hit);
1649 
1650 #ifdef ASSERT
1651   __ mov(ic_reg, 0xdead); // trash ic_reg(rscratch2), as used as real scratch further
1652 #endif
1653 
1654   int vep_offset = ((intptr_t)__ pc()) - start;
1655 
1656   // Generate stack overflow check
1657 
1658   // If we have to make this method not-entrant we'll overwrite its
1659   // first instruction with a jump.  For this action to be legal we
1660   // must ensure that this first instruction is a B, BL, NOP, BKPT,
1661   // SVC, HVC, or SMC.  Make it a NOP.
1662   __ nop();
1663 
1664   if (UseStackBanging) {
1665     __ bang_stack_with_offset(JavaThread::stack_shadow_zone_size());
1666   } else {
1667     Unimplemented();
1668   }
1669 
1670   // Generate a new frame for the wrapper.
1671   __ enter();
1672   // some words are pushed by enter, so adjust frame size on this value
1673   __ sub(sp, sp, stack_size - enter_frame_size * wordSize);
1674 
1675   // Frame is now completed as far as size and linkage.
1676   int frame_complete = ((intptr_t)__ pc()) - start;
1677 
1678   if (is_critical_native) {
1679     check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
1680                                        oop_handle_offset, oop_maps, in_regs, in_sig_bt);
1681   }
1682 
1683   //
1684   // We immediately shuffle the arguments so that any vm call we have to
1685   // make from here on out (sync slow path, jvmti, etc.) we will have
1686   // captured the oops from our caller and have a valid oopMap for
1687   // them.
1688 
1689   // -----------------
1690   // The Grand Shuffle
1691 
1692   // The Java calling convention is either equal (linux) or denser (win64) than the
1693   // c calling convention. However the because of the jni_env argument the c calling
1694   // convention always has at least one more (and two for static) arguments than Java.
1695   // Therefore if we move the args from java -> c backwards then we will never have
1696   // a register->register conflict and we don't have to build a dependency graph
1697   // and figure out how to break any cycles.
1698   //
1699 
1700   // Record sp-based slot for receiver on stack for non-static methods
1701   int receiver_offset = -1;
1702 
1703   // This is a trick. We double the stack slots so we can claim
1704   // the oops in the caller's frame. Since we are sure to have
1705   // more args than the caller doubling is enough to make
1706   // sure we can capture all the incoming oop args from the
1707   // caller.
1708   //
1709   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1710 
1711   // Mark location of rfp (someday)
1712   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rfp));
1713 
1714 
1715 #ifdef ASSERT
1716   bool reg_destroyed[RegisterImpl::number_of_registers];
1717   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1718   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
1719     reg_destroyed[r] = false;
1720   }
1721   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
1722     freg_destroyed[f] = false;
1723   }
1724 
1725 #endif // ASSERT
1726 
1727   // This may iterate in two different directions depending on the
1728   // kind of native it is.  The reason is that for regular JNI natives
1729   // the incoming and outgoing registers are offset upwards and for
1730   // critical natives they are offset down.
1731   GrowableArray<int> arg_order(2 * total_in_args);
1732   VMRegPair tmp_vmreg;
1733   tmp_vmreg.set2(rscratch2->as_VMReg());
1734 
1735   if (!is_critical_native) {
1736     for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
1737       arg_order.push(i);
1738       arg_order.push(c_arg);
1739     }
1740   } else {
1741     // Compute a valid move order, using tmp_vmreg to break any cycles
1742     ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
1743   }
1744 
1745   int temploc = -1;
1746   for (int ai = 0; ai < arg_order.length(); ai += 2) {
1747     int i = arg_order.at(ai);
1748     int c_arg = arg_order.at(ai + 1);
1749     __ block_comment(err_msg("move %d -> %d", i, c_arg));
1750     if (c_arg == -1) {
1751       assert(is_critical_native, "should only be required for critical natives");
1752       // This arg needs to be moved to a temporary
1753       __ mov(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
1754       in_regs[i] = tmp_vmreg;
1755       temploc = i;
1756       continue;
1757     } else if (i == -1) {
1758       assert(is_critical_native, "should only be required for critical natives");
1759       // Read from the temporary location
1760       assert(temploc != -1, "must be valid");
1761       i = temploc;
1762       temploc = -1;
1763     }
1764 #ifdef ASSERT
1765     if (in_regs[i].first()->is_Register()) {
1766       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1767     } else if (in_regs[i].first()->is_FloatRegister()) {
1768       assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
1769     }
1770     if (out_regs[c_arg].first()->is_Register()) {
1771       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1772     } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1773       freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1774     }
1775 #endif // ASSERT
1776     switch (in_sig_bt[i]) {
1777       case T_ARRAY:
1778         if (is_critical_native) {
1779           unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1780           c_arg++;
1781 #ifdef ASSERT
1782           if (out_regs[c_arg].first()->is_Register()) {
1783             reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1784           } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1785             freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1786           }
1787 #endif
1788           break;
1789         }
1790       case T_OBJECT:
1791         assert(!is_critical_native, "no oop arguments");
1792         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1793                     ((i == 0) && (!is_static)),
1794                     &receiver_offset);
1795         break;
1796       case T_VOID:
1797         break;
1798 
1799       case T_FLOAT:
1800         float_move(masm, in_regs[i], out_regs[c_arg]);
1801         break;
1802 
1803       case T_DOUBLE:
1804         assert( i + 1 < total_in_args &&
1805                 in_sig_bt[i + 1] == T_VOID &&
1806                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1807         double_move(masm, in_regs[i], out_regs[c_arg]);
1808         break;
1809 
1810       case T_LONG :
1811         long_move(masm, in_regs[i], out_regs[c_arg]);
1812         break;
1813 
1814       case T_BOOLEAN :
1815       case T_BYTE :
1816       case T_CHAR :
1817       case T_SHORT :
1818       case T_INT :
1819         move_int(masm, in_regs[i], out_regs[c_arg]);
1820     break;
1821 
1822       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1823       case T_NARROWOOP :
1824       case T_METADATA :
1825       case T_NARROWKLASS :
1826       default:
1827     ShouldNotReachHere();
1828     }
1829   }
1830 
1831   // point c_arg at the first arg that is already loaded in case we
1832   // need to spill before we call out
1833   int c_arg = total_c_args - total_in_args;
1834 
1835   // We use r4 as the oop handle for the receiver/klass
1836   // It is callee save so it survives the call to native
1837 
1838   const Register oop_handle_reg = r4;
1839 
1840   // Pre-load a static method's oop.  Used both by locking code and
1841   // the normal JNI call code.
1842   if (method->is_static() && !is_critical_native) {
1843 
1844     //  load oop into a register
1845     __ movoop(oop_handle_reg,
1846               JNIHandles::make_local(method->method_holder()->java_mirror()),
1847               /*immediate*/true);
1848 
1849     // Now handlize the static class mirror it's known not-null.
1850     __ str(oop_handle_reg, Address(sp, klass_offset));
1851     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1852 
1853     // Now get the handle
1854     __ lea(oop_handle_reg, Address(sp, klass_offset));
1855     // store the klass handle as second argument
1856     __ mov(c_rarg1, oop_handle_reg);
1857     // and protect the arg if we must spill
1858     c_arg--;
1859   }
1860 
1861   // Change state to native (we save the return address in the thread, since it might not
1862   // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
1863   // points into the right code segment. It does not have to be the correct return pc.
1864   // We use the same pc/oopMap repeatedly when we call out
1865 
1866   intptr_t the_pc = (intptr_t) __ pc();
1867   oop_maps->add_gc_map(the_pc - start, map);
1868 
1869   __ set_last_Java_frame(sp, noreg, (address)the_pc, rscratch1);
1870 
1871 
1872   // We have all of the arguments setup at this point. We must not touch any register
1873   // argument registers at this point (what if we save/restore them there are no oop?
1874 
1875 #ifdef DTRACE_ENABLED
1876   {
1877     SkipIfEqual skip(masm, &DTraceMethodProbes, false);
1878     // protect the args we've loaded
1879     (void) save_args(masm, total_c_args, c_arg, out_regs);
1880     __ mov_metadata(c_rarg1, method());
1881     __ call_VM_leaf(
1882       CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
1883       rthread, c_rarg1);
1884     restore_args(masm, total_c_args, c_arg, out_regs);
1885   }
1886 #endif
1887 
1888   // RedefineClasses() tracing support for obsolete method entry
1889   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1890     // protect the args we've loaded
1891     save_args(masm, total_c_args, c_arg, out_regs);
1892     __ mov_metadata(c_rarg1, method());
1893     __ call_VM_leaf(
1894       CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
1895       rthread, c_rarg1);
1896     restore_args(masm, total_c_args, c_arg, out_regs);
1897   }
1898 
1899   // Lock a synchronized method
1900 
1901   // Register definitions used by locking and unlocking
1902 
1903   Label slow_path_lock;
1904   Label lock_done;
1905 
1906   if (method->is_synchronized()) {
1907     assert(!is_critical_native, "unhandled");
1908 
1909     // registers below are not used to pass parameters
1910     // and they are caller save in C1
1911     // => safe to use as temporary here
1912     const Register swap_reg = r5;
1913     const Register obj_reg  = r6;  // Will contain the oop
1914     const Register lock_reg = r7;  // Address of compiler lock object (BasicLock)
1915 
1916     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
1917 
1918     // Get the handle (the 2nd argument)
1919     __ mov(oop_handle_reg, c_rarg1);
1920 
1921     // Get address of the box
1922 
1923     __ lea(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1924 
1925     // Load the oop from the handle
1926     __ ldr(obj_reg, Address(oop_handle_reg, 0));
1927 
1928     if (UseBiasedLocking) {
1929       __ biased_locking_enter(obj_reg, swap_reg, rscratch2, rscratch1, false, lock_done, &slow_path_lock);
1930     }
1931 
1932     // Load (object->mark() | 1) into swap_reg %r0
1933     __ ldr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1934     __ orr(swap_reg, swap_reg, 1);
1935 
1936     // Save (object->mark() | 1) into BasicLock's displaced header
1937     __ str(swap_reg, Address(lock_reg, mark_word_offset));
1938 
1939     // src -> dest iff dest == r0 else r0 <- dest
1940     { Label here;
1941       __ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, lock_done, &slow_path_lock);
1942     }
1943 
1944     // Slow path will re-enter here
1945     __ bind(lock_done);
1946   }
1947 
1948 
1949   // Finally just about ready to make the JNI call
1950 
1951 
1952   // get JNIEnv* which is first argument to native
1953   if (!is_critical_native) {
1954     __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
1955   }
1956 
1957   // Now set thread in native
1958   __ mov(rscratch1, _thread_in_native);
1959   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1960   __ dmb(Assembler::ISH);
1961   __ str(rscratch1, rscratch2);
1962 
1963   // Do the call
1964   rt_call(masm, native_func);
1965 
1966   // Unpack native results.
1967   switch (ret_type) {
1968   case T_BOOLEAN: __ c2bool(r0);             break;
1969   case T_CHAR   : __ uxth(r0, r0);           break;
1970   case T_BYTE   : __ sxtb(r0, r0);           break;
1971   case T_SHORT  : __ sxth(r0, r0);           break;
1972   case T_INT    :                            break;
1973   case T_FLOAT  :
1974 #ifndef HARD_FLOAT_CC
1975       if(hasFPU()) {
1976           __ vmov_f32(d0, r0);
1977       }
1978 #endif
1979       break;
1980   case T_DOUBLE :
1981 #ifndef HARD_FLOAT_CC
1982       if(hasFPU()) {
1983           __ vmov_f64(d0, r0, r1);
1984       }
1985 #endif
1986       break;
1987   case T_ARRAY:                 // Really a handle
1988   case T_OBJECT:                // Really a handle
1989       break; // can't de-handlize until after safepoint check
1990   case T_VOID: break;
1991   case T_LONG: break;
1992   default       : ShouldNotReachHere();
1993   }
1994 
1995   // Switch thread to "native transition" state before reading the synchronization state.
1996   // This additional state is necessary because reading and testing the synchronization
1997   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1998   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1999   //     VM thread changes sync state to synchronizing and suspends threads for GC.
2000   //     Thread A is resumed to finish this native method, but doesn't block here since it
2001   //     didn't see any synchronization is progress, and escapes.
2002   __ mov(rscratch1, _thread_in_native_trans);
2003   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
2004   __ dmb(Assembler::ISH);
2005   __ str(rscratch1, rscratch2);
2006 
2007   if(os::is_MP()) {
2008     if (UseMembar) {
2009       // Force this write out before the read below
2010       __ membar(Assembler::AnyAny);
2011     } else {
2012       // Write serialization page so VM thread can do a pseudo remote membar.
2013       // We use the current thread pointer to calculate a thread specific
2014       // offset to write to within the page. This minimizes bus traffic
2015       // due to cache line collision.
2016       __ serialize_memory(rthread, rscratch1);
2017     }
2018   }
2019 
2020   Label after_transition;
2021 
2022   // check for safepoint operation in progress and/or pending suspend requests
2023   {
2024     Label Continue;
2025 
2026     Label L;
2027     __ safepoint_poll_acquire(L);
2028     __ ldr(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
2029     __ cbz(rscratch1, Continue);
2030     __ bind(L);
2031 
2032     // Don't use call_VM as it will see a possible pending exception and forward it
2033     // and never return here preventing us from clearing _last_native_pc down below.
2034     //
2035     save_native_result(masm, ret_type, stack_slots);
2036     __ mov(c_rarg0, rthread);
2037 #ifndef PRODUCT
2038   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
2039 #endif
2040     if (!is_critical_native) {
2041       __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
2042     } else {
2043       __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
2044     }
2045     __ bl(rscratch1);
2046     __ maybe_isb();
2047     // Restore any method result value
2048     restore_native_result(masm, ret_type, stack_slots);
2049 
2050     if (is_critical_native) {
2051       // The call above performed the transition to thread_in_Java so
2052       // skip the transition logic below.
2053       __ b(after_transition);
2054     }
2055 
2056     __ bind(Continue);
2057   }
2058 
2059   // change thread state
2060   __ mov(rscratch1, _thread_in_Java);
2061   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
2062   __ dmb(Assembler::ISH);
2063   __ str(rscratch1, rscratch2);
2064   __ bind(after_transition);
2065 
2066   Label reguard;
2067   Label reguard_done;
2068   __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
2069   __ cmp(rscratch1, JavaThread::stack_guard_yellow_reserved_disabled);
2070   __ b(reguard, Assembler::EQ);
2071   __ bind(reguard_done);
2072 
2073   // native result if any is live
2074 
2075   // Unlock
2076   Label unlock_done;
2077   Label slow_path_unlock;
2078   if (method->is_synchronized()) {
2079     const Register obj_reg  = r2;  // Will contain the oop
2080     const Register lock_reg = rscratch1; // Address of compiler lock object (BasicLock)
2081     const Register old_hdr  = r3;  // value of old header at unlock time
2082 
2083     // Get locked oop from the handle we passed to jni
2084     __ ldr(obj_reg, Address(oop_handle_reg, 0));
2085 
2086     if (UseBiasedLocking) {
2087       __ biased_locking_exit(obj_reg, old_hdr, unlock_done);
2088     }
2089 
2090     // Simple recursive lock?
2091     // get address of the stack lock
2092     __ lea(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
2093 
2094     //  get old displaced header
2095     __ ldr(old_hdr, Address(lock_reg, 0));
2096     __ cbz(old_hdr, unlock_done);
2097 
2098     // Atomic swap old header if oop still contains the stack lock
2099     Label succeed;
2100     __ cmpxchg_obj_header(lock_reg, old_hdr, obj_reg, rscratch2, succeed, &slow_path_unlock);
2101     __ bind(succeed);
2102 
2103     // slow path re-enters here
2104     __ bind(unlock_done);
2105   }
2106 
2107 #ifdef DTRACE_ENABLED
2108   {
2109     SkipIfEqual skip(masm, &DTraceMethodProbes, false);
2110     save_native_result(masm, ret_type, stack_slots);
2111     __ mov_metadata(c_rarg1, method());
2112     __ call_VM_leaf(
2113          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2114          rthread, c_rarg1);
2115     restore_native_result(masm, ret_type, stack_slots);
2116   }
2117 #endif
2118 
2119   __ reset_last_Java_frame(false);
2120 
2121   // Unbox oop result, e.g. JNIHandles::resolve result.
2122   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2123     __ resolve_jobject(r0, rthread, rscratch2);
2124   }
2125 
2126   if (CheckJNICalls) {
2127     // clear_pending_jni_exception_check
2128     __ mov(rscratch1, 0);
2129     __ str(rscratch1, Address(rthread, JavaThread::pending_jni_exception_check_fn_offset()));
2130   }
2131 
2132   if (!is_critical_native) {
2133     // reset handle block
2134     __ mov(rscratch1, 0);
2135     __ ldr(r2, Address(rthread, JavaThread::active_handles_offset()));
2136     __ str(rscratch1, Address(r2, JNIHandleBlock::top_offset_in_bytes()));
2137   }
2138 
2139   __ leave();
2140 
2141   if (!is_critical_native) {
2142     // Any exception pending?
2143     __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2144     __ cbnz(rscratch1, exception_pending);
2145   }
2146 
2147   // We're done
2148   __ b(lr);
2149 
2150   // Unexpected paths are out of line and go here
2151 
2152   if (!is_critical_native) {
2153     // forward the exception
2154     __ bind(exception_pending);
2155 
2156     // and forward the exception
2157     __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2158   }
2159 
2160   // Slow path locking & unlocking
2161   if (method->is_synchronized()) {
2162 
2163     // BEGIN Slow path lock
2164     __ bind(slow_path_lock);
2165 
2166     // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
2167     // args are (oop obj, BasicLock* lock, JavaThread* thread)
2168 
2169     // protect the args we've loaded
2170     const int extra_words = save_args(masm, total_c_args, c_arg, out_regs);
2171 
2172     __ ldr(c_rarg0, Address(oop_handle_reg));
2173     __ lea(c_rarg1, Address(sp, (extra_words + lock_slot_offset) * VMRegImpl::stack_slot_size));
2174     __ mov(c_rarg2, rthread);
2175 
2176     // Not a leaf but we have last_Java_frame setup as we want
2177     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
2178     restore_args(masm, total_c_args, c_arg, out_regs);
2179 
2180 #ifdef ASSERT
2181     { Label L;
2182       __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2183       __ cbz(rscratch1, L);
2184       __ stop("no pending exception allowed on exit from monitorenter");
2185       __ bind(L);
2186     }
2187 #endif
2188     __ b(lock_done);
2189 
2190     // END Slow path lock
2191 
2192     // BEGIN Slow path unlock
2193     __ bind(slow_path_unlock);
2194 
2195     // If we haven't already saved the native result we must save it now as xmm registers
2196     // are still exposed.
2197 
2198     save_native_result(masm, ret_type, stack_slots);
2199 
2200     __ mov(c_rarg2, rthread);
2201     __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
2202     __ ldr(c_rarg0, Address(oop_handle_reg));
2203 
2204     // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2205     __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2206     __ mov(rscratch2, 0);
2207     __ str(rscratch2, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2208 
2209     rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
2210 
2211 #ifdef ASSERT
2212     {
2213       Label L;
2214       __ ldr(rscratch2, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2215       __ cbz(rscratch2, L);
2216       __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2217       __ bind(L);
2218     }
2219 #endif // ASSERT
2220 
2221     __ str(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2222 
2223     restore_native_result(masm, ret_type, stack_slots);
2224 
2225     __ b(unlock_done);
2226 
2227     // END Slow path unlock
2228 
2229   } // synchronized
2230 
2231   // SLOW PATH Reguard the stack if needed
2232 
2233   __ bind(reguard);
2234   save_native_result(masm, ret_type, stack_slots);
2235   rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
2236   restore_native_result(masm, ret_type, stack_slots);
2237   // and continue
2238   __ b(reguard_done);
2239 
2240 
2241 
2242   __ flush();
2243 
2244   nmethod *nm = nmethod::new_native_nmethod(method,
2245                                             compile_id,
2246                                             masm->code(),
2247                                             vep_offset,
2248                                             frame_complete,
2249                                             stack_slots / VMRegImpl::slots_per_word,
2250                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2251                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
2252                                             oop_maps);
2253 
2254   if (is_critical_native) {
2255     nm->set_lazy_critical_native(true);
2256   }
2257 
2258   return nm;
2259 }
2260 
2261 // this function returns the adjust size (in number of words) to a c2i adapter
2262 // activation for use during deoptimization
2263 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2264   assert(callee_locals >= callee_parameters,
2265           "test and remove; got more parms than locals");
2266   if (callee_locals < callee_parameters)
2267     return 0;                   // No adjustment for negative locals
2268   int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2269   // diff is counted in stack words
2270   return align_up(diff, 2);
2271 }
2272 
2273 
2274 //------------------------------generate_deopt_blob----------------------------
2275 void SharedRuntime::generate_deopt_blob() {
2276 
2277   // Allocate space for the code
2278   ResourceMark rm;
2279   // Setup code generation tools
2280   CodeBuffer buffer("deopt_blob", 2048, 1024);
2281   MacroAssembler* masm = new MacroAssembler(&buffer);
2282   int frame_size_in_words;
2283   OopMap* map = NULL;
2284   OopMapSet *oop_maps = new OopMapSet();
2285 
2286   // -------------
2287   // This code enters when returning to a de-optimized nmethod.  A return
2288   // address has been pushed on the the stack, and return values are in
2289   // registers.
2290   // If we are doing a normal deopt then we were called from the patched
2291   // nmethod from the point we returned to the nmethod. So the return
2292   // address on the stack is wrong by NativeCall::instruction_size
2293   // We will adjust the value so it looks like we have the original return
2294   // address on the stack (like when we eagerly deoptimized).
2295   // In the case of an exception pending when deoptimizing, we enter
2296   // with a return address on the stack that points after the call we patched
2297   // into the exception handler. We have the following register state from,
2298   // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
2299   //    r0: exception oop
2300   //    r7: exception handler
2301   //    r3: throwing pc
2302   // So in this case we simply jam r3 into the useless return address and
2303   // the stack looks just like we want.
2304   //
2305   // At this point we need to de-opt.  We save the argument return
2306   // registers.  We call the first C routine, fetch_unroll_info().  This
2307   // routine captures the return values and returns a structure which
2308   // describes the current frame size and the sizes of all replacement frames.
2309   // The current frame is compiled code and may contain many inlined
2310   // functions, each with their own JVM state.  We pop the current frame, then
2311   // push all the new frames.  Then we call the C routine unpack_frames() to
2312   // populate these frames.  Finally unpack_frames() returns us the new target
2313   // address.  Notice that callee-save registers are BLOWN here; they have
2314   // already been captured in the vframeArray at the time the return PC was
2315   // patched.
2316   address start = __ pc();
2317   Label cont;
2318 
2319   // Prolog for non exception case!
2320 
2321   // Save everything in sight.
2322   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, true);
2323 
2324   // Normal deoptimization.  Save exec mode for unpack_frames.
2325   __ mov(r7, Deoptimization::Unpack_deopt); // callee-saved
2326   __ b(cont);
2327 
2328   int reexecute_offset = __ pc() - start;
2329 
2330   // Reexecute case
2331   // return address is the pc describes what bci to do re-execute at
2332 
2333   // No need to update map as each call to save_live_registers will produce identical oopmap
2334   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2335 
2336   __ mov(r7, Deoptimization::Unpack_reexecute); // callee-saved
2337   __ b(cont);
2338 
2339   int exception_offset = __ pc() - start;
2340 
2341   // Prolog for exception case
2342 
2343   // all registers are dead at this entry point, except for r0, and
2344   // r3 which contain the exception oop and exception pc
2345   // respectively.  Set them in TLS and fall thru to the
2346   // unpack_with_exception_in_tls entry point.
2347 
2348   __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
2349   __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
2350 
2351   int exception_in_tls_offset = __ pc() - start;
2352 
2353   // new implementation because exception oop is now passed in JavaThread
2354 
2355   // Prolog for exception case
2356   // All registers must be preserved because they might be used by LinearScan
2357   // Exceptiop oop and throwing PC are passed in JavaThread
2358   // tos: stack at point of call to method that threw the exception (i.e. only
2359   // args are on the stack, no return address)
2360 
2361   // The return address pushed by save_live_registers will be patched
2362   // later with the throwing pc. The correct value is not available
2363   // now because loading it from memory would destroy registers.
2364 
2365   // NB: The SP at this point must be the SP of the method that is
2366   // being deoptimized.  Deoptimization assumes that the frame created
2367   // here by save_live_registers is immediately below the method's SP.
2368   // This is a somewhat fragile mechanism.
2369 
2370   // Save everything in sight.
2371   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2372 
2373   // Now it is safe to overwrite any register
2374 
2375   // Deopt during an exception.  Save exec mode for unpack_frames.
2376   __ mov(r7, Deoptimization::Unpack_exception); // callee-saved
2377 
2378   // load throwing pc from JavaThread and patch it as the return address
2379   // of the current frame. Then clear the field in JavaThread
2380 
2381   __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
2382   __ str(r3, Address(rfp, frame::get_return_addr_offset() * wordSize));
2383   __ mov(rscratch1, 0);
2384   __ str(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
2385 
2386 #ifdef ASSERT
2387   // verify that there is really an exception oop in JavaThread
2388   __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
2389   __ verify_oop(r0);
2390 
2391   // verify that there is no pending exception
2392   Label no_pending_exception;
2393   __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2394   __ cbz(rscratch1, no_pending_exception);
2395   __ stop("must not have pending exception here");
2396   __ bind(no_pending_exception);
2397 #endif
2398 
2399   __ bind(cont);
2400 
2401   // Call C code.  Need thread and this frame, but NOT official VM entry
2402   // crud.  We cannot block on this call, no GC can happen.
2403   //
2404   // UnrollBlock* fetch_unroll_info(JavaThread* thread)
2405 
2406   // fetch_unroll_info needs to call last_java_frame().
2407 
2408   Label retaddr;
2409   __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2410 #ifdef ASSERT0
2411   { Label L;
2412     __ ldr(rscratch1, Address(rthread,
2413                               JavaThread::last_Java_fp_offset()));
2414     __ cbz(rscratch1, L);
2415     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2416     __ bind(L);
2417   }
2418 #endif // ASSERT
2419   __ mov(c_rarg0, rthread);
2420   __ mov(c_rarg1, r7); // rcpool
2421   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2422   __ bl(rscratch1);
2423   __ bind(retaddr);
2424 
2425   // Need to have an oopmap that tells fetch_unroll_info where to
2426   // find any register it might need.
2427   oop_maps->add_gc_map(__ pc() - start, map);
2428 
2429   __ reset_last_Java_frame(false);
2430 
2431   // Load UnrollBlock* into r5
2432   __ mov(r5, r0);
2433 
2434    Label noException;
2435   __ cmp(r7, Deoptimization::Unpack_exception);   // Was exception pending?
2436   __ b(noException, Assembler::NE);
2437   __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
2438   // QQQ this is useless it was NULL above
2439   __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
2440   __ mov(rscratch1, 0);
2441   __ str(rscratch1, Address(rthread, JavaThread::exception_oop_offset()));
2442   __ str(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
2443 
2444   __ verify_oop(r0);
2445 
2446   // Overwrite the result registers with the exception results.
2447   __ str(r0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::r0_off)));
2448   // I think this is useless
2449   // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2450 
2451   __ bind(noException);
2452 
2453   // Only register save data is on the stack.
2454   // Now restore the result registers.  Everything else is either dead
2455   // or captured in the vframeArray.
2456   RegisterSaver::restore_result_registers(masm);
2457 
2458   // All of the register save area has been popped of the stack. Only the
2459   // return address remains.
2460 
2461   // Pop all the frames we must move/replace.
2462   //
2463   // Frame picture (youngest to oldest)
2464   // 1: self-frame (no frame link)
2465   // 2: deopting frame  (no frame link)
2466   // 3: caller of deopting frame (could be compiled/interpreted).
2467   //
2468   // Note: by leaving the return address of self-frame on the stack
2469   // and using the size of frame 2 to adjust the stack
2470   // when we are done the return to frame 3 will still be on the stack.
2471 
2472   // Pop deoptimized frame
2473   __ ldr(r2, Address(r5, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2474   __ sub(r2, r2, frame::get_frame_size() * wordSize);
2475   __ add(sp, sp, r2);
2476   if (FrameAPCS) {
2477     // frame constructed with
2478     // push    {r11, r12, lr, pc}
2479     __ ldr(rfp, __ post(sp, 2 * wordSize));
2480     __ ldr(lr,  __ post(sp, 2 * wordSize));
2481   } else {
2482     __ ldrd(rfp, lr, __ post(sp, 2 * wordSize));
2483   }
2484   // LR should now be the return address to the caller (3)
2485 
2486 #ifdef ASSERT
2487   // Compilers generate code that bang the stack by as much as the
2488   // interpreter would need. So this stack banging should never
2489   // trigger a fault. Verify that it does not on non product builds.
2490   if (UseStackBanging) {
2491     __ ldr(rscratch2, Address(r5, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2492     __ bang_stack_size(rscratch2, r2);
2493   }
2494 #endif
2495   // Load address of array of frame pcs into r2
2496   __ ldr(r2, Address(r5, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2497 
2498   // Trash the old pc
2499   // __ addptr(sp, wordSize);  FIXME ????
2500 
2501   // Load address of array of frame sizes into r4
2502   __ ldr(r4, Address(r5, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2503 
2504   // Load counter into r3
2505   __ ldr(r3, Address(r5, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2506 
2507   // Now adjust the caller's stack to make up for the extra locals
2508   // but record the original sp so that we can save it in the skeletal interpreter
2509   // frame and the stack walking of interpreter_sender will get the unextended sp
2510   // value and not the "real" sp value.
2511 
2512   const Register sender_sp = r6;
2513 
2514   __ mov(sender_sp, sp);
2515   __ ldr(rscratch1, Address(r5,
2516                        Deoptimization::UnrollBlock::
2517                        caller_adjustment_offset_in_bytes()));
2518   __ sub(sp, sp, rscratch1);
2519 
2520   // Push interpreter frames in a loop
2521   __ mov(rscratch1, (address)0xDEADDEAD);        // Make a recognizable pattern
2522   // Initially used to place 0xDEADDEAD in rscratch2 as well - why?
2523   __ mov(rscratch2, 0);
2524   Label loop;
2525   __ bind(loop);
2526   __ ldr(rscratch1, Address(__ post(r4, wordSize)));          // Load frame size
2527   __ sub(rscratch1, rscratch1, frame::get_frame_size() * wordSize); // We'll push frame backtrace by hand
2528   __ ldr(lr, Address(__ post(r2, wordSize)));  // Load pc
2529   __ enter();                           // Save old & set new fp
2530   __ sub(sp, sp, rscratch1);                  // Prolog
2531   // This value is corrected by layout_activation_impl
2532   __ str(rscratch2, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
2533   __ str(sender_sp, Address(rfp, frame::get_interpreter_frame_sender_sp_offset() * wordSize)); // Make it walkable
2534   __ mov(sender_sp, sp);               // Pass sender_sp to next frame
2535   __ sub(r3, r3, 1);                   // Decrement counter
2536   __ cbnz(r3, loop);
2537 
2538     // Re-push self-frame
2539   __ ldr(lr, Address(r2));
2540   __ enter();
2541 
2542   // Allocate a full sized register save area.  We subtract frame::get_frame_size() words,
2543   // because enter() just pushed them.
2544   __ sub(sp, sp, (frame_size_in_words - frame::get_frame_size()) * wordSize);
2545 
2546   // Restore frame locals after moving the frame
2547   if(hasFPU()) {
2548     __ vstr_f64(d0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::fpu_state_off)));
2549   }
2550   __ strd(r0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::r0_off)));
2551 
2552   // Call C code.  Need thread but NOT official VM entry
2553   // crud.  We cannot block on this call, no GC can happen.  Call should
2554   // restore return values to their stack-slots with the new SP.
2555   //
2556   // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
2557 
2558   // Use rfp because the frames look interpreted now
2559   // Don't need the precise return PC here, just precise enough to point into this code blob.
2560   address the_pc = __ pc();
2561   __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
2562 
2563   __ mov(c_rarg0, rthread);
2564   __ mov(c_rarg1, r7); // second arg: exec_mode
2565   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2566   __ bl(rscratch1);
2567 
2568   // Set an oopmap for the call site
2569   // Use the same PC we used for the last java frame
2570   oop_maps->add_gc_map(the_pc - start,
2571                        new OopMap( frame_size_in_words, 0 ));
2572 
2573   // Clear fp AND pc
2574   __ reset_last_Java_frame(true);
2575 
2576   // Collect return values
2577   if(hasFPU()) {
2578     __ vldr_f64(d0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::fpu_state_off)));
2579   }
2580   __ ldrd(r0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::r0_off)));
2581   // I think this is useless (throwing pc?)
2582   // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2583 
2584   // Pop self-frame.
2585   __ leave();                           // Epilog
2586 
2587   // Jump to interpreter
2588   __ b(lr);
2589 
2590   // Make sure all code is generated
2591   masm->flush();
2592 
2593   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2594   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2595 
2596 }
2597 
2598 uint SharedRuntime::out_preserve_stack_slots() {
2599   return 0;
2600 }
2601 
2602 #if COMPILER2_OR_JVMCI
2603 //------------------------------generate_uncommon_trap_blob--------------------
2604 void SharedRuntime::generate_uncommon_trap_blob() {
2605   // Allocate space for the code
2606   ResourceMark rm;
2607   // Setup code generation tools
2608   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2609   MacroAssembler* masm = new MacroAssembler(&buffer);
2610 
2611   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2612 
2613   address start = __ pc();
2614 
2615   // Push self-frame.  We get here with a return address in LR
2616   // and sp should be 16 byte aligned
2617   // push rfp and retaddr by hand
2618   __ enter();
2619   // we don't expect an arg reg save area
2620 #ifndef PRODUCT
2621   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
2622 #endif
2623   // compiler left unloaded_class_index in j_rarg0
2624   __ mov(c_rarg1, j_rarg0);
2625 
2626   // we need to set the past SP to the stack pointer of the stub frame
2627   // and the pc to the address where this runtime call will return
2628   // although actually any pc in this code blob will do).
2629   Label retaddr;
2630   __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2631 
2632   // Call C code.  Need thread but NOT official VM entry
2633   // crud.  We cannot block on this call, no GC can happen.  Call should
2634   // capture callee-saved registers as well as return values.
2635   //
2636   // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
2637   //
2638   // n.b. 2 gp args, 0 fp args, integral return type
2639 
2640   __ mov(c_rarg0, rthread);
2641   __ mov(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
2642   __ lea(rscratch1,
2643          RuntimeAddress(CAST_FROM_FN_PTR(address,
2644                                          Deoptimization::uncommon_trap)));
2645   __ bl(rscratch1);
2646   __ bind(retaddr);
2647 
2648   // Set an oopmap for the call site
2649   OopMapSet* oop_maps = new OopMapSet();
2650   OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
2651 
2652   // location of rfp is known implicitly by the frame sender code
2653 
2654   oop_maps->add_gc_map(__ pc() - start, map);
2655 
2656   __ reset_last_Java_frame(false);
2657 
2658   // move UnrollBlock* into r4
2659   __ mov(r4, r0);
2660 
2661   // Pop all the frames we must move/replace.
2662   //
2663   // Frame picture (youngest to oldest)
2664   // 1: self-frame (no frame link)
2665   // 2: deopting frame  (no frame link)
2666   // 3: caller of deopting frame (could be compiled/interpreted).
2667 
2668   // Pop self-frame
2669   __ leave();
2670 
2671   // Pop deoptimized frame (int)
2672   __ ldr(r2, Address(r4,
2673                      Deoptimization::UnrollBlock::
2674                      size_of_deoptimized_frame_offset_in_bytes()));
2675   __ add(sp, sp, r2);
2676 
2677 #ifdef ASSERT
2678   // Compilers generate code that bang the stack by as much as the
2679   // interpreter would need. So this stack banging should never
2680   // trigger a fault. Verify that it does not on non product builds.
2681   if (UseStackBanging) {
2682     // The compiled method that we are deoptimizing was popped from the stack.
2683     // If the stack bang results in a stack overflow, we don't return to the
2684     // method that is being deoptimized. The stack overflow exception is
2685     // propagated to the caller of the deoptimized method. Need to get the pc
2686     // from the caller in LR and restore FP.
2687     __ ldr(r2, Address(r4,
2688                        Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2689     __ ldr(lr, Address(r2, 0));
2690     __ ldr(rfp, Address(r4, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2691     __ ldr(r1, Address(r4,
2692                        Deoptimization::UnrollBlock::
2693                        total_frame_sizes_offset_in_bytes()));
2694     __ bang_stack_size(r1, r2);
2695   }
2696 #endif
2697   // Now is the time to restore frameptr. Need to take what was in the frame header
2698   // since it can be real FP if previous frame was interpreted/C1 or arbitrary value if C2
2699   __ ldr(rfp, Address(sp, -2*wordSize)/*Address(r4,
2700                     Deoptimization::UnrollBlock::initial_info_offset_in_bytes())*/);
2701 
2702   // Load address of array of frame pcs into r2 (address*)
2703   __ ldr(r2, Address(r4,
2704                      Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2705 
2706   // Load address of array of frame sizes into r5 (intptr_t*)
2707   __ ldr(r5, Address(r4,
2708                      Deoptimization::UnrollBlock::
2709                      frame_sizes_offset_in_bytes()));
2710 
2711   // Counter
2712   __ ldr(r3, Address(r4,
2713                      Deoptimization::UnrollBlock::
2714                      number_of_frames_offset_in_bytes())); // (int)
2715 
2716   // Now adjust the caller's stack to make up for the extra locals but
2717   // record the original sp so that we can save it in the skeletal
2718   // interpreter frame and the stack walking of interpreter_sender
2719   // will get the unextended sp value and not the "real" sp value.
2720 
2721   const Register sender_sp = r7;
2722 
2723   __ mov(sender_sp, sp);
2724   __ ldr(r1, Address(r4,
2725                      Deoptimization::UnrollBlock::
2726                      caller_adjustment_offset_in_bytes())); // (int)
2727   __ sub(sp, sp, r1);
2728 
2729   __ mov(rscratch1, 0);
2730   // Push interpreter frames in a loop
2731   Label loop;
2732   __ bind(loop);
2733   __ ldr(r1, __ post(r5, wordSize));   // Load frame size
2734   __ sub(r1, r1, 2 * wordSize);        // We'll push pc and rfp by hand
2735   __ ldr(lr, __ post(r2, wordSize));   // Save return address
2736   __ enter();                          // and old rfp & set new rfp
2737   __ sub(sp, sp, r1);                  // Prolog
2738   __ str(sender_sp, Address(rfp, frame::get_interpreter_frame_sender_sp_offset() * wordSize)); // Make it walkable
2739   // This value is corrected by layout_activation_impl
2740   __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); //zero it
2741   __ mov(sender_sp, sp);               // Pass sender_sp to next frame
2742   __ subs(r3, r3, 1);                  // Decrement counter
2743   __ b(loop, Assembler::GT);
2744   __ ldr(lr, Address(r2, 0));          // save final return address
2745   // Re-push self-frame
2746   __ enter();                          // & old rfp & set new rfp
2747 
2748   // Use rfp because the frames look interpreted now
2749   // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
2750   // Don't need the precise return PC here, just precise enough to point into this code blob.
2751   address the_pc = __ pc();
2752   __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
2753 
2754   // Call C code.  Need thread but NOT official VM entry
2755   // crud.  We cannot block on this call, no GC can happen.  Call should
2756   // restore return values to their stack-slots with the new SP.
2757   // Thread is in rdi already.
2758   //
2759   // BasicType unpack_frames(JavaThread* thread, int exec_mode);
2760   //
2761   // n.b. 2 gp args, 0 fp args, integral return type
2762 
2763   // sp should already be aligned
2764   __ mov(c_rarg0, rthread);
2765   __ mov(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
2766   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2767   __ bl(rscratch1);
2768 
2769   // Set an oopmap for the call site
2770   // Use the same PC we used for the last java frame
2771   oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
2772 
2773   // Clear fp AND pc
2774   __ reset_last_Java_frame(true);
2775 
2776   // Pop self-frame.
2777   __ leave();                 // Epilog
2778 
2779   // Jump to interpreter
2780   __ b(lr);
2781 
2782   // Make sure all code is generated
2783   masm->flush();
2784 
2785   _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
2786                                                  SimpleRuntimeFrame::framesize >> 1);
2787 }
2788 #endif // COMPILER2_OR_JVMCI
2789 
2790 
2791 //------------------------------generate_handler_blob------
2792 //
2793 // Generate a special Compile2Runtime blob that saves all registers,
2794 // and setup oopmap.
2795 //
2796 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2797   ResourceMark rm;
2798   OopMapSet *oop_maps = new OopMapSet();
2799   OopMap* map;
2800 
2801   // Allocate space for the code.  Setup code generation tools.
2802   CodeBuffer buffer("handler_blob", 2048, 1024);
2803   MacroAssembler* masm = new MacroAssembler(&buffer);
2804 
2805   address start   = __ pc();
2806   address call_pc = NULL;
2807   int frame_size_in_words;
2808   bool cause_return = (poll_type == POLL_AT_RETURN);
2809 
2810   // If cause_return is true we are at a poll_return and there is
2811   // the return address on the stack to the caller on the nmethod
2812   // that is safepoint. We can leave this return on the stack and
2813   // effectively complete the return and safepoint in the caller.
2814   // Otherwise we push space for a return address that the safepoint
2815   // handler will install later to make the stack walking sensible.
2816   if (!cause_return) {
2817     __ sub(sp, sp, wordSize); // make room for return address
2818   }
2819   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, !cause_return);
2820 
2821   // The following is basically a call_VM.  However, we need the precise
2822   // address of the call in order to generate an oopmap. Hence, we do all the
2823   // work outselves.
2824 
2825   Label retaddr;
2826   __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2827 
2828   // The return address must always be correct so that frame constructor never
2829   // sees an invalid pc.
2830 
2831   if (!cause_return) {
2832     // overwrite the return address pushed by save_live_registers
2833     // Additionally, r5 is a callee-saved register so we can look at
2834     // it later to determine if someone changed the return address for
2835     // us!
2836     __ ldr(r5, Address(rthread, JavaThread::saved_exception_pc_offset()));
2837     __ str(r5, Address(rfp, frame::get_return_addr_offset() * wordSize));
2838   }
2839 
2840   // Do the call
2841   __ mov(c_rarg0, rthread);
2842   __ lea(rscratch1, RuntimeAddress(call_ptr));
2843   __ bl(rscratch1);
2844   __ bind(retaddr);
2845 
2846   // Set an oopmap for the call site.  This oopmap will map all
2847   // oop-registers and debug-info registers as callee-saved.  This
2848   // will allow deoptimization at this safepoint to find all possible
2849   // debug-info recordings, as well as let GC find all oops.
2850 
2851   oop_maps->add_gc_map( __ pc() - start, map);
2852 
2853   Label noException, no_adjust, bail;
2854 
2855   __ reset_last_Java_frame(false);
2856 
2857   __ maybe_isb();
2858   __ membar(Assembler::LoadLoad | Assembler::LoadStore);
2859 
2860   if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
2861     __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2862     __ cbnz(rscratch1, no_adjust);
2863 
2864     // If our stashed return pc was modified by the runtime we avoid touching it
2865     __ ldr(rscratch1, Address(rfp, frame::get_return_addr_offset() * wordSize));
2866     __ cmp(r5, rscratch1);
2867     __ b(no_adjust, Assembler::NE);
2868 
2869 #ifdef ASSERT
2870     // Verify the correct encoding of the poll we're about to skip.
2871     // ldr(r12, [r12, #0]);
2872     __ ldr(rscratch1, Address(r5));
2873     __ bic(rscratch1, rscratch1, ~0xfff0ffff);
2874     __ mov(rscratch2, 0xe590c000);
2875     __ cmp(rscratch1, rscratch2);
2876     __ b(bail, Assembler::NE);
2877 #endif
2878     // Adjust return pc forward to step over the safepoint poll instruction
2879     __ add(r5, r5, NativeInstruction::arm_insn_sz);
2880     __ str(r5, Address(rfp, frame::get_return_addr_offset() * wordSize));
2881   }
2882 
2883   __ bind(no_adjust);
2884 
2885   __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2886   __ cmp(rscratch1, 0);
2887 
2888   // does not kill flags
2889   RegisterSaver::restore_live_registers(masm, cause_return);
2890   // for !POLL_AT_RETURN the stack has return address on it
2891 
2892   __ b(noException, Assembler::EQ);
2893 
2894   // Exception pending
2895   if (cause_return)
2896     __ mov(r3, lr);
2897   else
2898     __ pop(r3);
2899   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2900 
2901   // No exception case
2902   __ bind(noException);
2903 
2904   if (cause_return)
2905     __ b(lr);
2906   else
2907     __ pop(r15_pc);
2908 
2909 #ifdef ASSERT
2910   __ bind(bail);
2911   __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
2912 #endif
2913 
2914   // Make sure all code is generated
2915   masm->flush();
2916 
2917   // Fill-out other meta info
2918   return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
2919 }
2920 
2921 //
2922 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
2923 //
2924 // Generate a stub that calls into vm to find out the proper destination
2925 // of a java call. All the argument registers are live at this point
2926 // but since this is generic code we don't know what they are and the caller
2927 // must do any gc of the args.
2928 //
2929 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2930   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2931 
2932   // allocate space for the code
2933   ResourceMark rm;
2934 
2935   //CodeBuffer buffer(name, 1000, 512);
2936   CodeBuffer buffer(name, 2048, 512 ); // changed as error later
2937   MacroAssembler* masm                = new MacroAssembler(&buffer);
2938 
2939   int frame_size_in_words;
2940 
2941   OopMapSet *oop_maps = new OopMapSet();
2942   OopMap* map = NULL;
2943 
2944   int start = __ offset();
2945 
2946   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2947 
2948   int frame_complete = __ offset();
2949 
2950   {
2951     Label retaddr;
2952     __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2953 
2954     __ mov(c_rarg0, rthread);
2955     __ lea(rscratch1, RuntimeAddress(destination));
2956 
2957     __ bl(rscratch1);
2958     __ bind(retaddr);
2959   }
2960 
2961   // Set an oopmap for the call site.
2962   // We need this not only for callee-saved registers, but also for volatile
2963   // registers that the compiler might be keeping live across a safepoint.
2964 
2965   oop_maps->add_gc_map( __ offset() - start, map);
2966 
2967   __ maybe_isb();
2968 
2969   // r0 contains the address we are going to jump to assuming no exception got installed
2970 
2971   // clear last_Java_sp
2972   __ reset_last_Java_frame(false);
2973   // check for pending exceptions
2974   Label pending;
2975   __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2976   __ cbnz(rscratch1, pending);
2977 
2978   // get the returned Method*
2979   __ get_vm_result_2(rmethod, rthread);
2980   __ str(rmethod, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::rmethod_off)));
2981 
2982   // r0 is where we want to jump, overwrite rscratch1 which is saved and scratch
2983   __ str(r0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::rscratch1_off)));
2984   RegisterSaver::restore_live_registers(masm);
2985 
2986   // We are back the the original state on entry and ready to go.
2987 
2988   __ b(rscratch1);
2989 
2990   // Pending exception after the safepoint
2991 
2992   __ bind(pending);
2993 
2994   RegisterSaver::restore_live_registers(masm);
2995 
2996   // exception pending => remove activation and forward to exception handler
2997   __ mov(rscratch1, 0);
2998   __ str(rscratch1, Address(rthread, JavaThread::vm_result_offset()));
2999 
3000   __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
3001   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3002 
3003   // -------------
3004   // make sure all code is generated
3005   masm->flush();
3006 
3007   // return the  blob
3008   // frame_size_words or bytes??
3009   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3010 }
3011 
3012 
3013 #if COMPILER2_OR_JVMCI
3014 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3015 //
3016 //------------------------------generate_exception_blob---------------------------
3017 // creates exception blob at the end
3018 // Using exception blob, this code is jumped from a compiled method.
3019 // (see emit_exception_handler in x86_64.ad file)
3020 //
3021 // Given an exception pc at a call we call into the runtime for the
3022 // handler in this method. This handler might merely restore state
3023 // (i.e. callee save registers) unwind the frame and jump to the
3024 // exception handler for the nmethod if there is no Java level handler
3025 // for the nmethod.
3026 //
3027 // This code is entered with a jmp.
3028 //
3029 // Arguments:
3030 //   r0: exception oop
3031 //   r3: exception pc
3032 //
3033 // Results:
3034 //   r0: exception oop
3035 //   r3: exception pc in caller or ???
3036 //   destination: exception handler of caller
3037 //
3038 // Note: the exception pc MUST be at a call (precise debug information)
3039 //       Registers r0, r3, r2, r4, r5, r8-r11 are not callee saved.
3040 //
3041 
3042 void OptoRuntime::generate_exception_blob() {
3043   // allocate space for code
3044   ResourceMark rm;
3045   int pad = VerifyThread ? 256 : 0;// Extra slop space for more verify code
3046 
3047   // setup code generation tools
3048   // Measured 8/7/03 at 256 in 32bit debug build (no VerifyThread)
3049   // Measured 8/7/03 at 528 in 32bit debug build (VerifyThread)
3050   CodeBuffer buffer("exception_blob", 600+pad, 512);
3051   MacroAssembler* masm     = new MacroAssembler(&buffer);
3052 
3053   int framesize_in_words = 2; // FP + LR
3054   int framesize_in_bytes = framesize_in_words * wordSize;
3055   int framesize_in_slots = framesize_in_bytes / sizeof(jint);
3056 
3057   address start = __ pc();
3058 
3059   __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
3060   __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
3061 
3062   // This call does all the hard work. It checks if an exception catch
3063   // exists in the method.
3064   // If so, it returns the handler address.
3065   // If the nmethod has been deoptimized and it had a handler the handler
3066   // address is the deopt blob unpack_with_exception entry.
3067   //
3068   // If no handler exists it prepares for stack-unwinding, restoring the callee-save
3069   // registers of the frame being removed.
3070   //
3071   __ mov(lr, r3);
3072   __ enter();
3073   address the_pc = __ pc();
3074   __ set_last_Java_frame(sp, noreg, the_pc, rscratch1);
3075 
3076   __ mov(r0, rthread);
3077 
3078   // This call can block at exit and nmethod can be deoptimized at that
3079   // point. If the nmethod had a catch point we would jump to the
3080   // now deoptimized catch point and fall thru the vanilla deopt
3081   // path and lose the exception
3082   // Sure would be simpler if this call didn't block!
3083   __ call(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C));
3084 
3085   // Set an oopmap for the call site.  This oopmap will only be used if we
3086   // are unwinding the stack.  Hence, all locations will be dead.
3087   // Callee-saved registers will be the same as the frame above (i.e.,
3088   // handle_exception_stub), since they were restored when we got the
3089   // exception.
3090 
3091   OopMapSet* oop_maps = new OopMapSet();
3092 
3093   oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
3094 
3095   __ reset_last_Java_frame(false);
3096 
3097   __ leave();
3098 
3099   // Restore SP from its saved reg (FP) if the exception PC is a MethodHandle call site.
3100   __ ldr(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset()));
3101   __ cmp(rscratch1, 0);
3102   __ mov(sp, rfp, Assembler::NE);
3103 
3104   // We have a handler in r0 (could be deopt blob).
3105   __ mov(rscratch2, r0);
3106 
3107   // Since this may be the deopt blob we must set R3 to look like we returned
3108   // from the original pc that threw the exception
3109 
3110   __ ldr(r3,  Address(rthread, JavaThread::exception_pc_offset()));
3111 
3112   __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
3113   __ mov(rscratch1, 0);
3114 #ifdef ASSERT
3115   __ str(rscratch1, Address(rthread, JavaThread::exception_handler_pc_offset()));
3116   __ str(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
3117 #endif
3118   // Clear the exception oop so GC no longer processes it as a root.
3119   __ str(rscratch1, Address(rthread, JavaThread::exception_oop_offset()));
3120   __ b(rscratch2);
3121 
3122   // -------------
3123   // make sure all code is generated
3124   masm->flush();
3125 
3126   _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize_in_words);
3127 }
3128 #endif // COMPILER2_OR_JVMCI