1 /*
   2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "assembler_arm.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "logging/log.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "runtime/sharedRuntime.hpp"
  36 #include "runtime/vframeArray.hpp"
  37 #include "utilities/align.hpp"
  38 #include "vmreg_arm.inline.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_Runtime1.hpp"
  41 #endif
  42 #ifdef COMPILER2
  43 #include "opto/runtime.hpp"
  44 #endif
  45 
  46 #define __ masm->
  47 
  48 class RegisterSaver {
  49 public:
  50 
  51   // Special registers:
  52   //              32-bit ARM     64-bit ARM
  53   //  Rthread:       R10            R28
  54   //  LR:            R14            R30
  55 
  56   // Rthread is callee saved in the C ABI and never changed by compiled code:
  57   // no need to save it.
  58 
  59   // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
  60   // The one at LR_offset is a return address that is needed by stack walking.
  61   // A c2 method uses LR as a standard register so it may be live when we
  62   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
  63   // in case it's live in the method we are coming from.
  64 
  65 
  66   enum RegisterLayout {
  67     fpu_save_size = FloatRegisterImpl::number_of_registers,
  68 #ifndef __SOFTFP__
  69     D0_offset = 0,
  70 #endif
  71     R0_offset = fpu_save_size,
  72     R1_offset,
  73     R2_offset,
  74     R3_offset,
  75     R4_offset,
  76     R5_offset,
  77     R6_offset,
  78 #if (FP_REG_NUM != 7)
  79     // if not saved as FP
  80     R7_offset,
  81 #endif
  82     R8_offset,
  83     R9_offset,
  84 #if (FP_REG_NUM != 11)
  85     // if not saved as FP
  86     R11_offset,
  87 #endif
  88     R12_offset,
  89     R14_offset,
  90     FP_offset,
  91     LR_offset,
  92     reg_save_size,
  93 
  94     Rmethod_offset = R9_offset,
  95     Rtemp_offset = R12_offset,
  96   };
  97 
  98   // all regs but Rthread (R10), FP (R7 or R11), SP and PC
  99   // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
 100 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 101 
 102 
 103   //  When LR may be live in the nmethod from which we are comming
 104   //  then lr_saved is true, the return address is saved before the
 105   //  call to save_live_register by the caller and LR contains the
 106   //  live value.
 107 
 108   static OopMap* save_live_registers(MacroAssembler* masm,
 109                                      int* total_frame_words,
 110                                      bool lr_saved = false);
 111   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
 112 
 113 };
 114 
 115 
 116 
 117 
 118 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
 119                                            int* total_frame_words,
 120                                            bool lr_saved) {
 121   *total_frame_words = reg_save_size;
 122 
 123   OopMapSet *oop_maps = new OopMapSet();
 124   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 125 
 126   if (lr_saved) {
 127     __ push(RegisterSet(FP));
 128   } else {
 129     __ push(RegisterSet(FP) | RegisterSet(LR));
 130   }
 131   __ push(SAVED_BASE_REGS);
 132   if (HaveVFP) {
 133     if (VM_Version::has_vfp3_32()) {
 134       __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
 135     } else {
 136       if (FloatRegisterImpl::number_of_registers > 32) {
 137         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 138         __ sub(SP, SP, 32 * wordSize);
 139       }
 140     }
 141     __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
 142   } else {
 143     __ sub(SP, SP, fpu_save_size * wordSize);
 144   }
 145 
 146   int i;
 147   int j=0;
 148   for (i = R0_offset; i <= R9_offset; i++) {
 149     if (j == FP_REG_NUM) {
 150       // skip the FP register, managed below.
 151       j++;
 152     }
 153     map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
 154     j++;
 155   }
 156   assert(j == R10->encoding(), "must be");
 157 #if (FP_REG_NUM != 11)
 158   // add R11, if not managed as FP
 159   map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
 160 #endif
 161   map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
 162   map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
 163   if (HaveVFP) {
 164     for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
 165       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
 166       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
 167     }
 168   }
 169 
 170   return map;
 171 }
 172 
 173 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
 174   if (HaveVFP) {
 175     __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
 176     if (VM_Version::has_vfp3_32()) {
 177       __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
 178     } else {
 179       if (FloatRegisterImpl::number_of_registers > 32) {
 180         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 181         __ add(SP, SP, 32 * wordSize);
 182       }
 183     }
 184   } else {
 185     __ add(SP, SP, fpu_save_size * wordSize);
 186   }
 187   __ pop(SAVED_BASE_REGS);
 188   if (restore_lr) {
 189     __ pop(RegisterSet(FP) | RegisterSet(LR));
 190   } else {
 191     __ pop(RegisterSet(FP));
 192   }
 193 }
 194 
 195 
 196 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 197 #ifdef __ABI_HARD__
 198   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 199     __ sub(SP, SP, 8);
 200     __ fstd(D0, Address(SP));
 201     return;
 202   }
 203 #endif // __ABI_HARD__
 204   __ raw_push(R0, R1);
 205 }
 206 
 207 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 208 #ifdef __ABI_HARD__
 209   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 210     __ fldd(D0, Address(SP));
 211     __ add(SP, SP, 8);
 212     return;
 213   }
 214 #endif // __ABI_HARD__
 215   __ raw_pop(R0, R1);
 216 }
 217 
 218 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 219   // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment
 220   __ push(RegisterSet(R0, R3));
 221 
 222 #ifdef __ABI_HARD__
 223   // preserve arguments
 224   // Likely not needed as the locking code won't probably modify volatile FP registers,
 225   // but there is no way to guarantee that
 226   if (fp_regs_in_arguments) {
 227     // convert fp_regs_in_arguments to a number of double registers
 228     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 229     __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 230   }
 231 #endif // __ ABI_HARD__
 232 }
 233 
 234 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 235 #ifdef __ABI_HARD__
 236   if (fp_regs_in_arguments) {
 237     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 238     __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 239   }
 240 #endif // __ABI_HARD__
 241 
 242   __ pop(RegisterSet(R0, R3));
 243 }
 244 
 245 
 246 
 247 // Is vector's size (in bytes) bigger than a size saved by default?
 248 // All vector registers are saved by default on ARM.
 249 bool SharedRuntime::is_wide_vector(int size) {
 250   return false;
 251 }
 252 
 253 size_t SharedRuntime::trampoline_size() {
 254   return 16;
 255 }
 256 
 257 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 258   InlinedAddress dest(destination);
 259   __ indirect_jump(dest, Rtemp);
 260   __ bind_literal(dest);
 261 }
 262 
 263 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 264                                         VMRegPair *regs,
 265                                         VMRegPair *regs2,
 266                                         int total_args_passed) {
 267   assert(regs2 == NULL, "not needed on arm");
 268 
 269   int slot = 0;
 270   int ireg = 0;
 271 #ifdef __ABI_HARD__
 272   int fp_slot = 0;
 273   int single_fpr_slot = 0;
 274 #endif // __ABI_HARD__
 275   for (int i = 0; i < total_args_passed; i++) {
 276     switch (sig_bt[i]) {
 277     case T_SHORT:
 278     case T_CHAR:
 279     case T_BYTE:
 280     case T_BOOLEAN:
 281     case T_INT:
 282     case T_ARRAY:
 283     case T_OBJECT:
 284     case T_ADDRESS:
 285     case T_METADATA:
 286 #ifndef __ABI_HARD__
 287     case T_FLOAT:
 288 #endif // !__ABI_HARD__
 289       if (ireg < 4) {
 290         Register r = as_Register(ireg);
 291         regs[i].set1(r->as_VMReg());
 292         ireg++;
 293       } else {
 294         regs[i].set1(VMRegImpl::stack2reg(slot));
 295         slot++;
 296       }
 297       break;
 298     case T_LONG:
 299 #ifndef __ABI_HARD__
 300     case T_DOUBLE:
 301 #endif // !__ABI_HARD__
 302       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 303       if (ireg <= 2) {
 304 #if (ALIGN_WIDE_ARGUMENTS == 1)
 305         if(ireg & 1) ireg++;  // Aligned location required
 306 #endif
 307         Register r1 = as_Register(ireg);
 308         Register r2 = as_Register(ireg + 1);
 309         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 310         ireg += 2;
 311 #if (ALIGN_WIDE_ARGUMENTS == 0)
 312       } else if (ireg == 3) {
 313         // uses R3 + one stack slot
 314         Register r = as_Register(ireg);
 315         regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg());
 316         ireg += 1;
 317         slot += 1;
 318 #endif
 319       } else {
 320         if (slot & 1) slot++; // Aligned location required
 321         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 322         slot += 2;
 323         ireg = 4;
 324       }
 325       break;
 326     case T_VOID:
 327       regs[i].set_bad();
 328       break;
 329 #ifdef __ABI_HARD__
 330     case T_FLOAT:
 331       if ((fp_slot < 16)||(single_fpr_slot & 1)) {
 332         if ((single_fpr_slot & 1) == 0) {
 333           single_fpr_slot = fp_slot;
 334           fp_slot += 2;
 335         }
 336         FloatRegister r = as_FloatRegister(single_fpr_slot);
 337         single_fpr_slot++;
 338         regs[i].set1(r->as_VMReg());
 339       } else {
 340         regs[i].set1(VMRegImpl::stack2reg(slot));
 341         slot++;
 342       }
 343       break;
 344     case T_DOUBLE:
 345       assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
 346       if (fp_slot <= 14) {
 347         FloatRegister r1 = as_FloatRegister(fp_slot);
 348         FloatRegister r2 = as_FloatRegister(fp_slot+1);
 349         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 350         fp_slot += 2;
 351       } else {
 352         if(slot & 1) slot++;
 353         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 354         slot += 2;
 355         single_fpr_slot = 16;
 356       }
 357       break;
 358 #endif // __ABI_HARD__
 359     default:
 360       ShouldNotReachHere();
 361     }
 362   }
 363   return slot;
 364 }
 365 
 366 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 367                                            VMRegPair *regs,
 368                                            int total_args_passed,
 369                                            int is_outgoing) {
 370 #ifdef __SOFTFP__
 371   // soft float is the same as the C calling convention.
 372   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 373 #endif // __SOFTFP__
 374   (void) is_outgoing;
 375   int slot = 0;
 376   int ireg = 0;
 377   int freg = 0;
 378   int single_fpr = 0;
 379 
 380   for (int i = 0; i < total_args_passed; i++) {
 381     switch (sig_bt[i]) {
 382     case T_SHORT:
 383     case T_CHAR:
 384     case T_BYTE:
 385     case T_BOOLEAN:
 386     case T_INT:
 387     case T_ARRAY:
 388     case T_OBJECT:
 389     case T_ADDRESS:
 390       if (ireg < 4) {
 391         Register r = as_Register(ireg++);
 392         regs[i].set1(r->as_VMReg());
 393       } else {
 394         regs[i].set1(VMRegImpl::stack2reg(slot++));
 395       }
 396       break;
 397     case T_FLOAT:
 398       // C2 utilizes S14/S15 for mem-mem moves
 399       if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) {
 400         if ((single_fpr & 1) == 0) {
 401           single_fpr = freg;
 402           freg += 2;
 403         }
 404         FloatRegister r = as_FloatRegister(single_fpr++);
 405         regs[i].set1(r->as_VMReg());
 406       } else {
 407         regs[i].set1(VMRegImpl::stack2reg(slot++));
 408       }
 409       break;
 410     case T_DOUBLE:
 411       // C2 utilizes S14/S15 for mem-mem moves
 412       if (freg <= 14 COMPILER2_PRESENT(-2)) {
 413         FloatRegister r1 = as_FloatRegister(freg);
 414         FloatRegister r2 = as_FloatRegister(freg + 1);
 415         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 416         freg += 2;
 417       } else {
 418         // Keep internally the aligned calling convention,
 419         // ignoring ALIGN_WIDE_ARGUMENTS
 420         if (slot & 1) slot++;
 421         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 422         slot += 2;
 423         single_fpr = 16;
 424       }
 425       break;
 426     case T_LONG:
 427       // Keep internally the aligned calling convention,
 428       // ignoring ALIGN_WIDE_ARGUMENTS
 429       if (ireg <= 2) {
 430         if (ireg & 1) ireg++;
 431         Register r1 = as_Register(ireg);
 432         Register r2 = as_Register(ireg + 1);
 433         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 434         ireg += 2;
 435       } else {
 436         if (slot & 1) slot++;
 437         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 438         slot += 2;
 439         ireg = 4;
 440       }
 441       break;
 442     case T_VOID:
 443       regs[i].set_bad();
 444       break;
 445     default:
 446       ShouldNotReachHere();
 447     }
 448   }
 449 
 450   if (slot & 1) slot++;
 451   return slot;
 452 }
 453 
 454 static void patch_callers_callsite(MacroAssembler *masm) {
 455   Label skip;
 456 
 457   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 458   __ cbz(Rtemp, skip);
 459 
 460   // Pushing an even number of registers for stack alignment.
 461   // Selecting R9, which had to be saved anyway for some platforms.
 462   __ push(RegisterSet(R0, R3) | R9 | LR);
 463 
 464   __ mov(R0, Rmethod);
 465   __ mov(R1, LR);
 466   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 467 
 468   __ pop(RegisterSet(R0, R3) | R9 | LR);
 469 
 470   __ bind(skip);
 471 }
 472 
 473 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 474                                     int total_args_passed, int comp_args_on_stack,
 475                                     const BasicType *sig_bt, const VMRegPair *regs) {
 476   // TODO: ARM - May be can use ldm to load arguments
 477   const Register tmp = Rtemp; // avoid erasing R5_mh
 478 
 479   // Next assert may not be needed but safer. Extra analysis required
 480   // if this there is not enough free registers and we need to use R5 here.
 481   assert_different_registers(tmp, R5_mh);
 482 
 483   // 6243940 We might end up in handle_wrong_method if
 484   // the callee is deoptimized as we race thru here. If that
 485   // happens we don't want to take a safepoint because the
 486   // caller frame will look interpreted and arguments are now
 487   // "compiled" so it is much better to make this transition
 488   // invisible to the stack walking code. Unfortunately if
 489   // we try and find the callee by normal means a safepoint
 490   // is possible. So we stash the desired callee in the thread
 491   // and the vm will find there should this case occur.
 492   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
 493   __ str(Rmethod, callee_target_addr);
 494 
 495 
 496   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 497 
 498   const Register initial_sp = Rmethod; // temporarily scratched
 499 
 500   // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
 501   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
 502 
 503   __ mov(initial_sp, SP);
 504 
 505   if (comp_args_on_stack) {
 506     __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
 507   }
 508   __ bic(SP, SP, StackAlignmentInBytes - 1);
 509 
 510   for (int i = 0; i < total_args_passed; i++) {
 511     if (sig_bt[i] == T_VOID) {
 512       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 513       continue;
 514     }
 515     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 516     int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i);
 517 
 518     VMReg r_1 = regs[i].first();
 519     VMReg r_2 = regs[i].second();
 520     if (r_1->is_stack()) {
 521       int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size;
 522       if (!r_2->is_valid()) {
 523         __ ldr(tmp, Address(initial_sp, arg_offset));
 524         __ str(tmp, Address(SP, stack_offset));
 525       } else {
 526         __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 527         __ str(tmp, Address(SP, stack_offset));
 528         __ ldr(tmp, Address(initial_sp, arg_offset));
 529         __ str(tmp, Address(SP, stack_offset + wordSize));
 530       }
 531     } else if (r_1->is_Register()) {
 532       if (!r_2->is_valid()) {
 533         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset));
 534       } else {
 535         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 536         __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
 537       }
 538     } else if (r_1->is_FloatRegister()) {
 539 #ifdef __SOFTFP__
 540       ShouldNotReachHere();
 541 #endif // __SOFTFP__
 542       if (!r_2->is_valid()) {
 543         __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
 544       } else {
 545         __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 546       }
 547     } else {
 548       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 549     }
 550   }
 551 
 552   // restore Rmethod (scratched for initial_sp)
 553   __ ldr(Rmethod, callee_target_addr);
 554   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 555 
 556 }
 557 
 558 static void gen_c2i_adapter(MacroAssembler *masm,
 559                             int total_args_passed,  int comp_args_on_stack,
 560                             const BasicType *sig_bt, const VMRegPair *regs,
 561                             Label& skip_fixup) {
 562   // TODO: ARM - May be can use stm to deoptimize arguments
 563   const Register tmp = Rtemp;
 564 
 565   patch_callers_callsite(masm);
 566   __ bind(skip_fixup);
 567 
 568   __ mov(Rsender_sp, SP); // not yet saved
 569 
 570 
 571   int extraspace = total_args_passed * Interpreter::stackElementSize;
 572   if (extraspace) {
 573     __ sub_slow(SP, SP, extraspace);
 574   }
 575 
 576   for (int i = 0; i < total_args_passed; i++) {
 577     if (sig_bt[i] == T_VOID) {
 578       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 579       continue;
 580     }
 581     int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
 582 
 583     VMReg r_1 = regs[i].first();
 584     VMReg r_2 = regs[i].second();
 585     if (r_1->is_stack()) {
 586       int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 587       if (!r_2->is_valid()) {
 588         __ ldr(tmp, Address(SP, arg_offset));
 589         __ str(tmp, Address(SP, stack_offset));
 590       } else {
 591         __ ldr(tmp, Address(SP, arg_offset));
 592         __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize));
 593         __ ldr(tmp, Address(SP, arg_offset + wordSize));
 594         __ str(tmp, Address(SP, stack_offset));
 595       }
 596     } else if (r_1->is_Register()) {
 597       if (!r_2->is_valid()) {
 598         __ str(r_1->as_Register(), Address(SP, stack_offset));
 599       } else {
 600         __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
 601         __ str(r_2->as_Register(), Address(SP, stack_offset));
 602       }
 603     } else if (r_1->is_FloatRegister()) {
 604 #ifdef __SOFTFP__
 605       ShouldNotReachHere();
 606 #endif // __SOFTFP__
 607       if (!r_2->is_valid()) {
 608         __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
 609       } else {
 610         __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
 611       }
 612     } else {
 613       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 614     }
 615   }
 616 
 617   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 618 
 619 }
 620 
 621 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 622                                                             int total_args_passed,
 623                                                             int comp_args_on_stack,
 624                                                             const BasicType *sig_bt,
 625                                                             const VMRegPair *regs,
 626                                                             AdapterFingerPrint* fingerprint) {
 627   address i2c_entry = __ pc();
 628   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 629 
 630   address c2i_unverified_entry = __ pc();
 631   Label skip_fixup;
 632   const Register receiver       = R0;
 633   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
 634   const Register receiver_klass = R4;
 635 
 636   __ load_klass(receiver_klass, receiver);
 637   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
 638   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
 639   __ cmp(receiver_klass, holder_klass);
 640 
 641   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
 642   __ cmp(Rtemp, 0, eq);
 643   __ b(skip_fixup, eq);
 644   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
 645 
 646   address c2i_entry = __ pc();
 647   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 648 
 649   __ flush();
 650   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 651 }
 652 
 653 
 654 static int reg2offset_in(VMReg r) {
 655   // Account for saved FP and LR
 656   return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
 657 }
 658 
 659 static int reg2offset_out(VMReg r) {
 660   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 661 }
 662 
 663 
 664 static void verify_oop_args(MacroAssembler* masm,
 665                             const methodHandle& method,
 666                             const BasicType* sig_bt,
 667                             const VMRegPair* regs) {
 668   Register temp_reg = Rmethod;  // not part of any compiled calling seq
 669   if (VerifyOops) {
 670     for (int i = 0; i < method->size_of_parameters(); i++) {
 671       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
 672         VMReg r = regs[i].first();
 673         assert(r->is_valid(), "bad oop arg");
 674         if (r->is_stack()) {
 675           __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 676           __ verify_oop(temp_reg);
 677         } else {
 678           __ verify_oop(r->as_Register());
 679         }
 680       }
 681     }
 682   }
 683 }
 684 
 685 static void gen_special_dispatch(MacroAssembler* masm,
 686                                  const methodHandle& method,
 687                                  const BasicType* sig_bt,
 688                                  const VMRegPair* regs) {
 689   verify_oop_args(masm, method, sig_bt, regs);
 690   vmIntrinsics::ID iid = method->intrinsic_id();
 691 
 692   // Now write the args into the outgoing interpreter space
 693   bool     has_receiver   = false;
 694   Register receiver_reg   = noreg;
 695   int      member_arg_pos = -1;
 696   Register member_reg     = noreg;
 697   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
 698   if (ref_kind != 0) {
 699     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
 700     member_reg = Rmethod;  // known to be free at this point
 701     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 702   } else if (iid == vmIntrinsics::_invokeBasic) {
 703     has_receiver = true;
 704   } else {
 705     fatal("unexpected intrinsic id %d", iid);
 706   }
 707 
 708   if (member_reg != noreg) {
 709     // Load the member_arg into register, if necessary.
 710     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
 711     VMReg r = regs[member_arg_pos].first();
 712     if (r->is_stack()) {
 713       __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 714     } else {
 715       // no data motion is needed
 716       member_reg = r->as_Register();
 717     }
 718   }
 719 
 720   if (has_receiver) {
 721     // Make sure the receiver is loaded into a register.
 722     assert(method->size_of_parameters() > 0, "oob");
 723     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 724     VMReg r = regs[0].first();
 725     assert(r->is_valid(), "bad receiver arg");
 726     if (r->is_stack()) {
 727       // Porting note:  This assumes that compiled calling conventions always
 728       // pass the receiver oop in a register.  If this is not true on some
 729       // platform, pick a temp and load the receiver from stack.
 730       assert(false, "receiver always in a register");
 731       receiver_reg = j_rarg0;  // known to be free at this point
 732       __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 733     } else {
 734       // no data motion is needed
 735       receiver_reg = r->as_Register();
 736     }
 737   }
 738 
 739   // Figure out which address we are really jumping to:
 740   MethodHandles::generate_method_handle_dispatch(masm, iid,
 741                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
 742 }
 743 
 744 // ---------------------------------------------------------------------------
 745 // Generate a native wrapper for a given method.  The method takes arguments
 746 // in the Java compiled code convention, marshals them to the native
 747 // convention (handlizes oops, etc), transitions to native, makes the call,
 748 // returns to java state (possibly blocking), unhandlizes any result and
 749 // returns.
 750 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 751                                                 const methodHandle& method,
 752                                                 int compile_id,
 753                                                 BasicType* in_sig_bt,
 754                                                 VMRegPair* in_regs,
 755                                                 BasicType ret_type,
 756                                                 address critical_entry) {
 757   if (method->is_method_handle_intrinsic()) {
 758     vmIntrinsics::ID iid = method->intrinsic_id();
 759     intptr_t start = (intptr_t)__ pc();
 760     int vep_offset = ((intptr_t)__ pc()) - start;
 761     gen_special_dispatch(masm,
 762                          method,
 763                          in_sig_bt,
 764                          in_regs);
 765     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
 766     __ flush();
 767     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
 768     return nmethod::new_native_nmethod(method,
 769                                        compile_id,
 770                                        masm->code(),
 771                                        vep_offset,
 772                                        frame_complete,
 773                                        stack_slots / VMRegImpl::slots_per_word,
 774                                        in_ByteSize(-1),
 775                                        in_ByteSize(-1),
 776                                        (OopMapSet*)NULL);
 777   }
 778   // Arguments for JNI method include JNIEnv and Class if static
 779 
 780   // Usage of Rtemp should be OK since scratched by native call
 781 
 782   bool is_static = method->is_static();
 783 
 784   const int total_in_args = method->size_of_parameters();
 785   int total_c_args = total_in_args + 1;
 786   if (is_static) {
 787     total_c_args++;
 788   }
 789 
 790   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
 791   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
 792 
 793   int argc = 0;
 794   out_sig_bt[argc++] = T_ADDRESS;
 795   if (is_static) {
 796     out_sig_bt[argc++] = T_OBJECT;
 797   }
 798 
 799   int i;
 800   for (i = 0; i < total_in_args; i++) {
 801     out_sig_bt[argc++] = in_sig_bt[i];
 802   }
 803 
 804   int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
 805   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 806   // Since object arguments need to be wrapped, we must preserve space
 807   // for those object arguments which come in registers (GPR_PARAMS maximum)
 808   // plus one more slot for Klass handle (for static methods)
 809   int oop_handle_offset = stack_slots;
 810   stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word;
 811 
 812   // Plus a lock if needed
 813   int lock_slot_offset = 0;
 814   if (method->is_synchronized()) {
 815     lock_slot_offset = stack_slots;
 816     assert(sizeof(BasicLock) == wordSize, "adjust this code");
 817     stack_slots += VMRegImpl::slots_per_word;
 818   }
 819 
 820   // Space to save return address and FP
 821   stack_slots += 2 * VMRegImpl::slots_per_word;
 822 
 823   // Calculate the final stack size taking account of alignment
 824   stack_slots = align_up(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size);
 825   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 826   int lock_slot_fp_offset = stack_size - 2 * wordSize -
 827     lock_slot_offset * VMRegImpl::stack_slot_size;
 828 
 829   // Unverified entry point
 830   address start = __ pc();
 831 
 832   // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
 833   const Register receiver = R0; // see receiverOpr()
 834   __ load_klass(Rtemp, receiver);
 835   __ cmp(Rtemp, Ricklass);
 836   Label verified;
 837 
 838   __ b(verified, eq); // jump over alignment no-ops too
 839   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
 840   __ align(CodeEntryAlignment);
 841 
 842   // Verified entry point
 843   __ bind(verified);
 844   int vep_offset = __ pc() - start;
 845 
 846 
 847   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
 848     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
 849     // instead of doing a full VM transition once it's been computed.
 850     Label slow_case;
 851     const Register obj_reg = R0;
 852 
 853     // Unlike for Object.hashCode, System.identityHashCode is static method and
 854     // gets object as argument instead of the receiver.
 855     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
 856       assert(method->is_static(), "method should be static");
 857       // return 0 for null reference input, return val = R0 = obj_reg = 0
 858       __ cmp(obj_reg, 0);
 859       __ bx(LR, eq);
 860     }
 861 
 862     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 863 
 864     assert(markOopDesc::unlocked_value == 1, "adjust this code");
 865     __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
 866 
 867     if (UseBiasedLocking) {
 868       assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
 869       __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
 870     }
 871 
 872     __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
 873     __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
 874     __ bx(LR, ne);
 875 
 876     __ bind(slow_case);
 877   }
 878 
 879   // Bang stack pages
 880   __ arm_stack_overflow_check(stack_size, Rtemp);
 881 
 882   // Setup frame linkage
 883   __ raw_push(FP, LR);
 884   __ mov(FP, SP);
 885   __ sub_slow(SP, SP, stack_size - 2*wordSize);
 886 
 887   int frame_complete = __ pc() - start;
 888 
 889   OopMapSet* oop_maps = new OopMapSet();
 890   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
 891   const int extra_args = is_static ? 2 : 1;
 892   int receiver_offset = -1;
 893   int fp_regs_in_arguments = 0;
 894 
 895   for (i = total_in_args; --i >= 0; ) {
 896     switch (in_sig_bt[i]) {
 897     case T_ARRAY:
 898     case T_OBJECT: {
 899       VMReg src = in_regs[i].first();
 900       VMReg dst = out_regs[i + extra_args].first();
 901       if (src->is_stack()) {
 902         assert(dst->is_stack(), "must be");
 903         assert(i != 0, "Incoming receiver is always in a register");
 904         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
 905         __ cmp(Rtemp, 0);
 906         __ add(Rtemp, FP, reg2offset_in(src), ne);
 907         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
 908         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 909         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
 910       } else {
 911         int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
 912         __ str(src->as_Register(), Address(SP, offset));
 913         map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
 914         if ((i == 0) && (!is_static)) {
 915           receiver_offset = offset;
 916         }
 917         oop_handle_offset += VMRegImpl::slots_per_word;
 918 
 919         if (dst->is_stack()) {
 920           __ movs(Rtemp, src->as_Register());
 921           __ add(Rtemp, SP, offset, ne);
 922           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
 923         } else {
 924           __ movs(dst->as_Register(), src->as_Register());
 925           __ add(dst->as_Register(), SP, offset, ne);
 926         }
 927       }
 928     }
 929 
 930     case T_VOID:
 931       break;
 932 
 933 
 934 #ifdef __SOFTFP__
 935     case T_DOUBLE:
 936 #endif
 937     case T_LONG: {
 938       VMReg src_1 = in_regs[i].first();
 939       VMReg src_2 = in_regs[i].second();
 940       VMReg dst_1 = out_regs[i + extra_args].first();
 941       VMReg dst_2 = out_regs[i + extra_args].second();
 942 #if (ALIGN_WIDE_ARGUMENTS == 0)
 943       // C convention can mix a register and a stack slot for a
 944       // 64-bits native argument.
 945 
 946       // Note: following code should work independently of whether
 947       // the Java calling convention follows C convention or whether
 948       // it aligns 64-bit values.
 949       if (dst_2->is_Register()) {
 950         if (src_1->as_Register() != dst_1->as_Register()) {
 951           assert(src_1->as_Register() != dst_2->as_Register() &&
 952                  src_2->as_Register() != dst_2->as_Register(), "must be");
 953           __ mov(dst_2->as_Register(), src_2->as_Register());
 954           __ mov(dst_1->as_Register(), src_1->as_Register());
 955         } else {
 956           assert(src_2->as_Register() == dst_2->as_Register(), "must be");
 957         }
 958       } else if (src_2->is_Register()) {
 959         if (dst_1->is_Register()) {
 960           // dst mixes a register and a stack slot
 961           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
 962           assert(src_1->as_Register() != dst_1->as_Register(), "must be");
 963           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
 964           __ mov(dst_1->as_Register(), src_1->as_Register());
 965         } else {
 966           // registers to stack slots
 967           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
 968           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
 969           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
 970         }
 971       } else if (src_1->is_Register()) {
 972         if (dst_1->is_Register()) {
 973           // src and dst must be R3 + stack slot
 974           assert(dst_1->as_Register() == src_1->as_Register(), "must be");
 975           __ ldr(Rtemp,    Address(FP, reg2offset_in(src_2)));
 976           __ str(Rtemp,    Address(SP, reg2offset_out(dst_2)));
 977         } else {
 978           // <R3,stack> -> <stack,stack>
 979           assert(dst_2->is_stack() && src_2->is_stack(), "must be");
 980           __ ldr(LR, Address(FP, reg2offset_in(src_2)));
 981           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
 982           __ str(LR, Address(SP, reg2offset_out(dst_2)));
 983         }
 984       } else {
 985         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
 986         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
 987         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
 988         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
 989         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
 990       }
 991 #else // ALIGN_WIDE_ARGUMENTS
 992       if (src_1->is_stack()) {
 993         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
 994         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
 995         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
 996         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
 997         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
 998       } else if (dst_1->is_stack()) {
 999         assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1000         __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1001         __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1002       } else if (src_1->as_Register() == dst_1->as_Register()) {
1003         assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1004       } else {
1005         assert(src_1->as_Register() != dst_2->as_Register() &&
1006                src_2->as_Register() != dst_2->as_Register(), "must be");
1007         __ mov(dst_2->as_Register(), src_2->as_Register());
1008         __ mov(dst_1->as_Register(), src_1->as_Register());
1009       }
1010 #endif // ALIGN_WIDE_ARGUMENTS
1011       break;
1012     }
1013 
1014 #if (!defined __SOFTFP__ && !defined __ABI_HARD__)
1015     case T_FLOAT: {
1016       VMReg src = in_regs[i].first();
1017       VMReg dst = out_regs[i + extra_args].first();
1018       if (src->is_stack()) {
1019         assert(dst->is_stack(), "must be");
1020         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1021         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1022       } else if (dst->is_stack()) {
1023         __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst)));
1024       } else {
1025         assert(src->is_FloatRegister() && dst->is_Register(), "must be");
1026         __ fmrs(dst->as_Register(), src->as_FloatRegister());
1027       }
1028       break;
1029     }
1030 
1031     case T_DOUBLE: {
1032       VMReg src_1 = in_regs[i].first();
1033       VMReg src_2 = in_regs[i].second();
1034       VMReg dst_1 = out_regs[i + extra_args].first();
1035       VMReg dst_2 = out_regs[i + extra_args].second();
1036       if (src_1->is_stack()) {
1037         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1038         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1039         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1040         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1041         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1042       } else if (dst_1->is_stack()) {
1043         assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be");
1044         __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1)));
1045 #if (ALIGN_WIDE_ARGUMENTS == 0)
1046       } else if (dst_2->is_stack()) {
1047         assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned
1048         // double register must go into R3 + one stack slot
1049         __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister());
1050         __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
1051 #endif
1052       } else {
1053         assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be");
1054         __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister());
1055       }
1056       break;
1057     }
1058 #endif // __SOFTFP__
1059 
1060 #ifdef __ABI_HARD__
1061     case T_FLOAT: {
1062       VMReg src = in_regs[i].first();
1063       VMReg dst = out_regs[i + extra_args].first();
1064       if (src->is_stack()) {
1065         if (dst->is_stack()) {
1066           __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1067           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1068         } else {
1069           // C2 Java calling convention does not populate S14 and S15, therefore
1070           // those need to be loaded from stack here
1071           __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src)));
1072           fp_regs_in_arguments++;
1073         }
1074       } else {
1075         assert(src->is_FloatRegister(), "must be");
1076         fp_regs_in_arguments++;
1077       }
1078       break;
1079     }
1080     case T_DOUBLE: {
1081       VMReg src_1 = in_regs[i].first();
1082       VMReg src_2 = in_regs[i].second();
1083       VMReg dst_1 = out_regs[i + extra_args].first();
1084       VMReg dst_2 = out_regs[i + extra_args].second();
1085       if (src_1->is_stack()) {
1086         if (dst_1->is_stack()) {
1087           assert(dst_2->is_stack(), "must be");
1088           __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1089           __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1090           __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1091           __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1092         } else {
1093           // C2 Java calling convention does not populate S14 and S15, therefore
1094           // those need to be loaded from stack here
1095           __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1096           fp_regs_in_arguments += 2;
1097         }
1098       } else {
1099         assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1100         fp_regs_in_arguments += 2;
1101       }
1102       break;
1103     }
1104 #endif // __ABI_HARD__
1105 
1106     default: {
1107       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1108       VMReg src = in_regs[i].first();
1109       VMReg dst = out_regs[i + extra_args].first();
1110       if (src->is_stack()) {
1111         assert(dst->is_stack(), "must be");
1112         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1113         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1114       } else if (dst->is_stack()) {
1115         __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1116       } else {
1117         assert(src->is_Register() && dst->is_Register(), "must be");
1118         __ mov(dst->as_Register(), src->as_Register());
1119       }
1120     }
1121     }
1122   }
1123 
1124   // Get Klass mirror
1125   int klass_offset = -1;
1126   if (is_static) {
1127     klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1128     __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1129     __ add(c_rarg1, SP, klass_offset);
1130     __ str(Rtemp, Address(SP, klass_offset));
1131     map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1132   }
1133 
1134   // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1135   int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1136   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1137   oop_maps->add_gc_map(pc_offset, map);
1138 
1139   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1140   __ membar(MacroAssembler::StoreStore, Rtemp);
1141 
1142   // RedefineClasses() tracing support for obsolete method entry
1143   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1144     __ save_caller_save_registers();
1145     __ mov(R0, Rthread);
1146     __ mov_metadata(R1, method());
1147     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1148     __ restore_caller_save_registers();
1149   }
1150 
1151   const Register sync_handle = R5;
1152   const Register sync_obj    = R6;
1153   const Register disp_hdr    = altFP_7_11;
1154   const Register tmp         = R8;
1155 
1156   Label slow_lock, slow_lock_biased, lock_done, fast_lock;
1157   if (method->is_synchronized()) {
1158     // The first argument is a handle to sync object (a class or an instance)
1159     __ ldr(sync_obj, Address(R1));
1160     // Remember the handle for the unlocking code
1161     __ mov(sync_handle, R1);
1162 
1163     __ resolve(IS_NOT_NULL, sync_obj);
1164 
1165     if(UseBiasedLocking) {
1166       __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1167     }
1168 
1169     const Register mark = tmp;
1170     // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1171     // That would be acceptable as either CAS or slow case path is taken in that case
1172 
1173     __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1174     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1175     __ tst(mark, markOopDesc::unlocked_value);
1176     __ b(fast_lock, ne);
1177 
1178     // Check for recursive lock
1179     // See comments in InterpreterMacroAssembler::lock_object for
1180     // explanations on the fast recursive locking check.
1181     // Check independently the low bits and the distance to SP
1182     // -1- test low 2 bits
1183     __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1184     // -2- test (hdr - SP) if the low two bits are 0
1185     __ sub(Rtemp, mark, SP, eq);
1186     __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1187     // If still 'eq' then recursive locking OK: set displaced header to 0
1188     __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1189     __ b(lock_done, eq);
1190     __ b(slow_lock);
1191 
1192     __ bind(fast_lock);
1193     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1194 
1195     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1196 
1197     __ bind(lock_done);
1198   }
1199 
1200   // Get JNIEnv*
1201   __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1202 
1203   // Perform thread state transition
1204   __ mov(Rtemp, _thread_in_native);
1205   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1206 
1207   // Finally, call the native method
1208   __ call(method->native_function());
1209 
1210   // Set FPSCR/FPCR to a known state
1211   if (AlwaysRestoreFPU) {
1212     __ restore_default_fp_mode();
1213   }
1214 
1215   // Ensure a Boolean result is mapped to 0..1
1216   if (ret_type == T_BOOLEAN) {
1217     __ c2bool(R0);
1218   }
1219 
1220   // Do a safepoint check while thread is in transition state
1221   InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1222   Label call_safepoint_runtime, return_to_java;
1223   __ mov(Rtemp, _thread_in_native_trans);
1224   __ ldr_literal(R2, safepoint_state);
1225   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1226 
1227   // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1228   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1229 
1230   __ ldr_s32(R2, Address(R2));
1231   __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
1232   __ cmp(R2, SafepointSynchronize::_not_synchronized);
1233   __ cond_cmp(R3, 0, eq);
1234   __ b(call_safepoint_runtime, ne);
1235   __ bind(return_to_java);
1236 
1237   // Perform thread state transition and reguard stack yellow pages if needed
1238   Label reguard, reguard_done;
1239   __ mov(Rtemp, _thread_in_Java);
1240   __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset()));
1241   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1242 
1243   __ cmp(R2, JavaThread::stack_guard_yellow_reserved_disabled);
1244   __ b(reguard, eq);
1245   __ bind(reguard_done);
1246 
1247   Label slow_unlock, unlock_done;
1248   if (method->is_synchronized()) {
1249     __ ldr(sync_obj, Address(sync_handle));
1250 
1251     __ resolve(IS_NOT_NULL, sync_obj);
1252 
1253     if(UseBiasedLocking) {
1254       __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1255       // disp_hdr may not have been saved on entry with biased locking
1256       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1257     }
1258 
1259     // See C1_MacroAssembler::unlock_object() for more comments
1260     __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1261     __ cbz(R2, unlock_done);
1262 
1263     __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1264 
1265     __ bind(unlock_done);
1266   }
1267 
1268   // Set last java frame and handle block to zero
1269   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1270   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1271 
1272   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1273   if (CheckJNICalls) {
1274     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1275   }
1276 
1277   // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1278   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1279     __ resolve_jobject(R0,      // value
1280                        Rtemp,   // tmp1
1281                        R1_tmp); // tmp2
1282   }
1283 
1284   // Any exception pending?
1285   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1286   __ mov(SP, FP);
1287 
1288   __ cmp(Rtemp, 0);
1289   // Pop the frame and return if no exception pending
1290   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1291   // Pop the frame and forward the exception. Rexception_pc contains return address.
1292   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1293   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1294   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1295 
1296   // Safepoint operation and/or pending suspend request is in progress.
1297   // Save the return values and call the runtime function by hand.
1298   __ bind(call_safepoint_runtime);
1299   push_result_registers(masm, ret_type);
1300   __ mov(R0, Rthread);
1301   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1302   pop_result_registers(masm, ret_type);
1303   __ b(return_to_java);
1304 
1305   __ bind_literal(safepoint_state);
1306 
1307   // Reguard stack pages. Save native results around a call to C runtime.
1308   __ bind(reguard);
1309   push_result_registers(masm, ret_type);
1310   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1311   pop_result_registers(masm, ret_type);
1312   __ b(reguard_done);
1313 
1314   if (method->is_synchronized()) {
1315     // Locking slow case
1316     if(UseBiasedLocking) {
1317       __ bind(slow_lock_biased);
1318       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1319     }
1320 
1321     __ bind(slow_lock);
1322 
1323     push_param_registers(masm, fp_regs_in_arguments);
1324 
1325     // last_Java_frame is already set, so do call_VM manually; no exception can occur
1326     __ mov(R0, sync_obj);
1327     __ mov(R1, disp_hdr);
1328     __ mov(R2, Rthread);
1329     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1330 
1331     pop_param_registers(masm, fp_regs_in_arguments);
1332 
1333     __ b(lock_done);
1334 
1335     // Unlocking slow case
1336     __ bind(slow_unlock);
1337 
1338     push_result_registers(masm, ret_type);
1339 
1340     // Clear pending exception before reentering VM.
1341     // Can store the oop in register since it is a leaf call.
1342     assert_different_registers(Rtmp_save1, sync_obj, disp_hdr);
1343     __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1344     Register zero = __ zero_register(Rtemp);
1345     __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
1346     __ mov(R0, sync_obj);
1347     __ mov(R1, disp_hdr);
1348     __ mov(R2, Rthread);
1349     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1350     __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1351 
1352     pop_result_registers(masm, ret_type);
1353 
1354     __ b(unlock_done);
1355   }
1356 
1357   __ flush();
1358   return nmethod::new_native_nmethod(method,
1359                                      compile_id,
1360                                      masm->code(),
1361                                      vep_offset,
1362                                      frame_complete,
1363                                      stack_slots / VMRegImpl::slots_per_word,
1364                                      in_ByteSize(is_static ? klass_offset : receiver_offset),
1365                                      in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1366                                      oop_maps);
1367 }
1368 
1369 // this function returns the adjust size (in number of words) to a c2i adapter
1370 // activation for use during deoptimization
1371 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1372   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1373   return extra_locals_size;
1374 }
1375 
1376 
1377 uint SharedRuntime::out_preserve_stack_slots() {
1378   return 0;
1379 }
1380 
1381 
1382 //------------------------------generate_deopt_blob----------------------------
1383 void SharedRuntime::generate_deopt_blob() {
1384   ResourceMark rm;
1385   CodeBuffer buffer("deopt_blob", 1024, 1024);
1386   int frame_size_in_words;
1387   OopMapSet* oop_maps;
1388   int reexecute_offset;
1389   int exception_in_tls_offset;
1390   int exception_offset;
1391 
1392   MacroAssembler* masm = new MacroAssembler(&buffer);
1393   Label cont;
1394   const Register Rkind   = R9; // caller-saved
1395   const Register Rublock = R6;
1396   const Register Rsender = altFP_7_11;
1397   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1398 
1399   address start = __ pc();
1400 
1401   oop_maps = new OopMapSet();
1402   // LR saved by caller (can be live in c2 method)
1403 
1404   // A deopt is a case where LR may be live in the c2 nmethod. So it's
1405   // not possible to call the deopt blob from the nmethod and pass the
1406   // address of the deopt handler of the nmethod in LR. What happens
1407   // now is that the caller of the deopt blob pushes the current
1408   // address so the deopt blob doesn't have to do it. This way LR can
1409   // be preserved, contains the live value from the nmethod and is
1410   // saved at R14/R30_offset here.
1411   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1412   __ mov(Rkind, Deoptimization::Unpack_deopt);
1413   __ b(cont);
1414 
1415   exception_offset = __ pc() - start;
1416 
1417   // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the
1418   // exception_in_tls_offset entry point.
1419   __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1420   __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1421   // Force return value to NULL to avoid confusing the escape analysis
1422   // logic. Everything is dead here anyway.
1423   __ mov(R0, 0);
1424 
1425   exception_in_tls_offset = __ pc() - start;
1426 
1427   // Exception data is in JavaThread structure
1428   // Patch the return address of the current frame
1429   __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset()));
1430   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1431   {
1432     const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure
1433     __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset()));
1434   }
1435   __ mov(Rkind, Deoptimization::Unpack_exception);
1436   __ b(cont);
1437 
1438   reexecute_offset = __ pc() - start;
1439 
1440   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1441   __ mov(Rkind, Deoptimization::Unpack_reexecute);
1442 
1443   // Calculate UnrollBlock and save the result in Rublock
1444   __ bind(cont);
1445   __ mov(R0, Rthread);
1446   __ mov(R1, Rkind);
1447 
1448   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1449   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1450   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info));
1451   if (pc_offset == -1) {
1452     pc_offset = __ offset();
1453   }
1454   oop_maps->add_gc_map(pc_offset, map);
1455   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1456 
1457   __ mov(Rublock, R0);
1458 
1459   // Reload Rkind from the UnrollBlock (might have changed)
1460   __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1461   Label noException;
1462   __ cmp_32(Rkind, Deoptimization::Unpack_exception);   // Was exception pending?
1463   __ b(noException, ne);
1464   // handle exception case
1465 #ifdef ASSERT
1466   // assert that exception_pc is zero in tls
1467   { Label L;
1468     __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1469     __ cbz(Rexception_pc, L);
1470     __ stop("exception pc should be null");
1471     __ bind(L);
1472   }
1473 #endif
1474   __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1475   __ verify_oop(Rexception_obj);
1476   {
1477     const Register Rzero = __ zero_register(Rtemp);
1478     __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1479   }
1480 
1481   __ bind(noException);
1482 
1483   // This frame is going away.  Fetch return value, so we can move it to
1484   // a new frame.
1485   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1486   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1487 #ifndef __SOFTFP__
1488   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1489 #endif
1490   // pop frame
1491   __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1492 
1493   // Set initial stack state before pushing interpreter frames
1494   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1495   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1496   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1497 
1498   __ add(SP, SP, Rtemp);
1499 
1500 #ifdef ASSERT
1501   // Compilers generate code that bang the stack by as much as the
1502   // interpreter would need. So this stack banging should never
1503   // trigger a fault. Verify that it does not on non product builds.
1504   // See if it is enough stack to push deoptimized frames
1505   if (UseStackBanging) {
1506     // The compiled method that we are deoptimizing was popped from the stack.
1507     // If the stack bang results in a stack overflow, we don't return to the
1508     // method that is being deoptimized. The stack overflow exception is
1509     // propagated to the caller of the deoptimized method. Need to get the pc
1510     // from the caller in LR and restore FP.
1511     __ ldr(LR, Address(R2, 0));
1512     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1513     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
1514     __ arm_stack_overflow_check(R8, Rtemp);
1515   }
1516 #endif
1517   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
1518 
1519   // Pick up the initial fp we should save
1520   // XXX Note: was ldr(FP, Address(FP));
1521 
1522   // The compiler no longer uses FP as a frame pointer for the
1523   // compiled code. It can be used by the allocator in C2 or to
1524   // memorize the original SP for JSR292 call sites.
1525 
1526   // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
1527   // Deoptimization::fetch_unroll_info computes the right FP value and
1528   // stores it in Rublock.initial_info. This has been activated for ARM.
1529   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1530 
1531   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
1532   __ mov(Rsender, SP);
1533   __ sub(SP, SP, Rtemp);
1534 
1535   // Push interpreter frames in a loop
1536   Label loop;
1537   __ bind(loop);
1538   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
1539   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
1540 
1541   __ raw_push(FP, LR);                                     // create new frame
1542   __ mov(FP, SP);
1543   __ sub(Rtemp, Rtemp, 2*wordSize);
1544 
1545   __ sub(SP, SP, Rtemp);
1546 
1547   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
1548   __ mov(LR, 0);
1549   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
1550 
1551   __ subs(R8, R8, 1);                               // decrement counter
1552   __ mov(Rsender, SP);
1553   __ b(loop, ne);
1554 
1555   // Re-push self-frame
1556   __ ldr(LR, Address(R2));
1557   __ raw_push(FP, LR);
1558   __ mov(FP, SP);
1559   __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
1560 
1561   // Restore frame locals after moving the frame
1562   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1563   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1564 
1565 #ifndef __SOFTFP__
1566   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1567 #endif // !__SOFTFP__
1568 
1569 #ifdef ASSERT
1570   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
1571   { Label L;
1572     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1573     __ cmp_32(Rkind, Rtemp);
1574     __ b(L, eq);
1575     __ stop("Rkind was overwritten");
1576     __ bind(L);
1577   }
1578 #endif
1579 
1580   // Call unpack_frames with proper arguments
1581   __ mov(R0, Rthread);
1582   __ mov(R1, Rkind);
1583 
1584   pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1585   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1586   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1587   if (pc_offset == -1) {
1588     pc_offset = __ offset();
1589   }
1590   oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
1591   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1592 
1593   // Collect return values, pop self-frame and jump to interpreter
1594   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1595   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1596   // Interpreter floats controlled by __SOFTFP__, but compiler
1597   // float return value registers controlled by __ABI_HARD__
1598   // This matters for vfp-sflt builds.
1599 #ifndef __SOFTFP__
1600   // Interpreter hard float
1601 #ifdef __ABI_HARD__
1602   // Compiler float return value in FP registers
1603   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1604 #else
1605   // Compiler float return value in integer registers,
1606   // copy to D0 for interpreter (S0 <-- R0)
1607   __ fmdrr(D0_tos, R0, R1);
1608 #endif
1609 #endif // !__SOFTFP__
1610   __ mov(SP, FP);
1611 
1612   __ pop(RegisterSet(FP) | RegisterSet(PC));
1613 
1614   __ flush();
1615 
1616   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
1617                                            reexecute_offset, frame_size_in_words);
1618   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
1619 }
1620 
1621 #ifdef COMPILER2
1622 
1623 //------------------------------generate_uncommon_trap_blob--------------------
1624 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
1625 // instead.
1626 void SharedRuntime::generate_uncommon_trap_blob() {
1627   // allocate space for the code
1628   ResourceMark rm;
1629 
1630   // setup code generation tools
1631   int pad = VerifyThread ? 512 : 0;
1632 #ifdef _LP64
1633   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
1634 #else
1635   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
1636   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
1637   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
1638 #endif
1639   // bypassed when code generation useless
1640   MacroAssembler* masm               = new MacroAssembler(&buffer);
1641   const Register Rublock = R6;
1642   const Register Rsender = altFP_7_11;
1643   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
1644 
1645   //
1646   // This is the entry point for all traps the compiler takes when it thinks
1647   // it cannot handle further execution of compilation code. The frame is
1648   // deoptimized in these cases and converted into interpreter frames for
1649   // execution
1650   // The steps taken by this frame are as follows:
1651   //   - push a fake "unpack_frame"
1652   //   - call the C routine Deoptimization::uncommon_trap (this function
1653   //     packs the current compiled frame into vframe arrays and returns
1654   //     information about the number and size of interpreter frames which
1655   //     are equivalent to the frame which is being deoptimized)
1656   //   - deallocate the "unpack_frame"
1657   //   - deallocate the deoptimization frame
1658   //   - in a loop using the information returned in the previous step
1659   //     push interpreter frames;
1660   //   - create a dummy "unpack_frame"
1661   //   - call the C routine: Deoptimization::unpack_frames (this function
1662   //     lays out values on the interpreter frame which was just created)
1663   //   - deallocate the dummy unpack_frame
1664   //   - return to the interpreter entry point
1665   //
1666   //  Refer to the following methods for more information:
1667   //   - Deoptimization::uncommon_trap
1668   //   - Deoptimization::unpack_frame
1669 
1670   // the unloaded class index is in R0 (first parameter to this blob)
1671 
1672   __ raw_push(FP, LR);
1673   __ set_last_Java_frame(SP, FP, false, Rtemp);
1674   __ mov(R2, Deoptimization::Unpack_uncommon_trap);
1675   __ mov(R1, R0);
1676   __ mov(R0, Rthread);
1677   __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
1678   __ mov(Rublock, R0);
1679   __ reset_last_Java_frame(Rtemp);
1680   __ raw_pop(FP, LR);
1681 
1682 #ifdef ASSERT
1683   { Label L;
1684     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1685     __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
1686     __ b(L, eq);
1687     __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
1688     __ bind(L);
1689   }
1690 #endif
1691 
1692 
1693   // Set initial stack state before pushing interpreter frames
1694   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1695   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1696   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1697 
1698   __ add(SP, SP, Rtemp);
1699 
1700   // See if it is enough stack to push deoptimized frames
1701 #ifdef ASSERT
1702   // Compilers generate code that bang the stack by as much as the
1703   // interpreter would need. So this stack banging should never
1704   // trigger a fault. Verify that it does not on non product builds.
1705   if (UseStackBanging) {
1706     // The compiled method that we are deoptimizing was popped from the stack.
1707     // If the stack bang results in a stack overflow, we don't return to the
1708     // method that is being deoptimized. The stack overflow exception is
1709     // propagated to the caller of the deoptimized method. Need to get the pc
1710     // from the caller in LR and restore FP.
1711     __ ldr(LR, Address(R2, 0));
1712     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1713     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
1714     __ arm_stack_overflow_check(R8, Rtemp);
1715   }
1716 #endif
1717   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
1718   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
1719   __ mov(Rsender, SP);
1720   __ sub(SP, SP, Rtemp);
1721   //  __ ldr(FP, Address(FP));
1722   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1723 
1724   // Push interpreter frames in a loop
1725   Label loop;
1726   __ bind(loop);
1727   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
1728   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
1729 
1730   __ raw_push(FP, LR);                                     // create new frame
1731   __ mov(FP, SP);
1732   __ sub(Rtemp, Rtemp, 2*wordSize);
1733 
1734   __ sub(SP, SP, Rtemp);
1735 
1736   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
1737   __ mov(LR, 0);
1738   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
1739   __ subs(R8, R8, 1);                               // decrement counter
1740   __ mov(Rsender, SP);
1741   __ b(loop, ne);
1742 
1743   // Re-push self-frame
1744   __ ldr(LR, Address(R2));
1745   __ raw_push(FP, LR);
1746   __ mov(FP, SP);
1747 
1748   // Call unpack_frames with proper arguments
1749   __ mov(R0, Rthread);
1750   __ mov(R1, Deoptimization::Unpack_uncommon_trap);
1751   __ set_last_Java_frame(SP, FP, true, Rtemp);
1752   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1753   //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
1754   __ reset_last_Java_frame(Rtemp);
1755 
1756   __ mov(SP, FP);
1757   __ pop(RegisterSet(FP) | RegisterSet(PC));
1758 
1759   masm->flush();
1760   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
1761 }
1762 
1763 #endif // COMPILER2
1764 
1765 //------------------------------generate_handler_blob------
1766 //
1767 // Generate a special Compile2Runtime blob that saves all registers,
1768 // setup oopmap, and calls safepoint code to stop the compiled code for
1769 // a safepoint.
1770 //
1771 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
1772   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
1773 
1774   ResourceMark rm;
1775   CodeBuffer buffer("handler_blob", 256, 256);
1776   int frame_size_words;
1777   OopMapSet* oop_maps;
1778 
1779   bool cause_return = (poll_type == POLL_AT_RETURN);
1780 
1781   MacroAssembler* masm = new MacroAssembler(&buffer);
1782   address start = __ pc();
1783   oop_maps = new OopMapSet();
1784 
1785   if (!cause_return) {
1786     __ sub(SP, SP, 4); // make room for LR which may still be live
1787                        // here if we are coming from a c2 method
1788   }
1789 
1790   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
1791   if (!cause_return) {
1792     // update saved PC with correct value
1793     // need 2 steps because LR can be live in c2 method
1794     __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
1795     __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
1796   }
1797 
1798   __ mov(R0, Rthread);
1799   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1800   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1801   __ call(call_ptr);
1802   if (pc_offset == -1) {
1803     pc_offset = __ offset();
1804   }
1805   oop_maps->add_gc_map(pc_offset, map);
1806   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1807 
1808   // Check for pending exception
1809   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1810   __ cmp(Rtemp, 0);
1811 
1812   if (!cause_return) {
1813     RegisterSaver::restore_live_registers(masm, false);
1814     __ pop(PC, eq);
1815     __ pop(Rexception_pc);
1816   } else {
1817     RegisterSaver::restore_live_registers(masm);
1818     __ bx(LR, eq);
1819     __ mov(Rexception_pc, LR);
1820   }
1821 
1822   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1823 
1824   __ flush();
1825 
1826   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
1827 }
1828 
1829 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
1830   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
1831 
1832   ResourceMark rm;
1833   CodeBuffer buffer(name, 1000, 512);
1834   int frame_size_words;
1835   OopMapSet *oop_maps;
1836   int frame_complete;
1837 
1838   MacroAssembler* masm = new MacroAssembler(&buffer);
1839   Label pending_exception;
1840 
1841   int start = __ offset();
1842 
1843   oop_maps = new OopMapSet();
1844   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words);
1845 
1846   frame_complete = __ offset();
1847 
1848   __ mov(R0, Rthread);
1849 
1850   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
1851   assert(start == 0, "warning: start differs from code_begin");
1852   __ call(destination);
1853   if (pc_offset == -1) {
1854     pc_offset = __ offset();
1855   }
1856   oop_maps->add_gc_map(pc_offset, map);
1857   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1858 
1859   __ ldr(R1, Address(Rthread, Thread::pending_exception_offset()));
1860   __ cbnz(R1, pending_exception);
1861 
1862   // Overwrite saved register values
1863 
1864   // Place metadata result of VM call into Rmethod
1865   __ get_vm_result_2(R1, Rtemp);
1866   __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize));
1867 
1868   // Place target address (VM call result) into Rtemp
1869   __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize));
1870 
1871   RegisterSaver::restore_live_registers(masm);
1872   __ jump(Rtemp);
1873 
1874   __ bind(pending_exception);
1875 
1876   RegisterSaver::restore_live_registers(masm);
1877   const Register Rzero = __ zero_register(Rtemp);
1878   __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset()));
1879   __ mov(Rexception_pc, LR);
1880   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1881 
1882   __ flush();
1883 
1884   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
1885 }