1 /*
   2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "assembler_arm.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "logging/log.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "oops/klass.inline.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/vframeArray.hpp"
  38 #include "utilities/align.hpp"
  39 #include "vmreg_arm.inline.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_Runtime1.hpp"
  42 #endif
  43 #ifdef COMPILER2
  44 #include "opto/runtime.hpp"
  45 #endif
  46 
  47 #define __ masm->
  48 
  49 class RegisterSaver {
  50 public:
  51 
  52   // Special registers:
  53   //              32-bit ARM     64-bit ARM
  54   //  Rthread:       R10            R28
  55   //  LR:            R14            R30
  56 
  57   // Rthread is callee saved in the C ABI and never changed by compiled code:
  58   // no need to save it.
  59 
  60   // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
  61   // The one at LR_offset is a return address that is needed by stack walking.
  62   // A c2 method uses LR as a standard register so it may be live when we
  63   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
  64   // in case it's live in the method we are coming from.
  65 
  66 
  67   enum RegisterLayout {
  68     fpu_save_size = FloatRegisterImpl::number_of_registers,
  69 #ifndef __SOFTFP__
  70     D0_offset = 0,
  71 #endif
  72     R0_offset = fpu_save_size,
  73     R1_offset,
  74     R2_offset,
  75     R3_offset,
  76     R4_offset,
  77     R5_offset,
  78     R6_offset,
  79 #if (FP_REG_NUM != 7)
  80     // if not saved as FP
  81     R7_offset,
  82 #endif
  83     R8_offset,
  84     R9_offset,
  85 #if (FP_REG_NUM != 11)
  86     // if not saved as FP
  87     R11_offset,
  88 #endif
  89     R12_offset,
  90     R14_offset,
  91     FP_offset,
  92     LR_offset,
  93     reg_save_size,
  94 
  95     Rmethod_offset = R9_offset,
  96     Rtemp_offset = R12_offset,
  97   };
  98 
  99   // all regs but Rthread (R10), FP (R7 or R11), SP and PC
 100   // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
 101 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 102 
 103 
 104   //  When LR may be live in the nmethod from which we are comming
 105   //  then lr_saved is true, the return address is saved before the
 106   //  call to save_live_register by the caller and LR contains the
 107   //  live value.
 108 
 109   static OopMap* save_live_registers(MacroAssembler* masm,
 110                                      int* total_frame_words,
 111                                      bool lr_saved = false);
 112   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
 113 
 114 };
 115 
 116 
 117 
 118 
 119 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
 120                                            int* total_frame_words,
 121                                            bool lr_saved) {
 122   *total_frame_words = reg_save_size;
 123 
 124   OopMapSet *oop_maps = new OopMapSet();
 125   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 126 
 127   if (lr_saved) {
 128     __ push(RegisterSet(FP));
 129   } else {
 130     __ push(RegisterSet(FP) | RegisterSet(LR));
 131   }
 132   __ push(SAVED_BASE_REGS);
 133   if (HaveVFP) {
 134     if (VM_Version::has_vfp3_32()) {
 135       __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
 136     } else {
 137       if (FloatRegisterImpl::number_of_registers > 32) {
 138         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 139         __ sub(SP, SP, 32 * wordSize);
 140       }
 141     }
 142     __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
 143   } else {
 144     __ sub(SP, SP, fpu_save_size * wordSize);
 145   }
 146 
 147   int i;
 148   int j=0;
 149   for (i = R0_offset; i <= R9_offset; i++) {
 150     if (j == FP_REG_NUM) {
 151       // skip the FP register, managed below.
 152       j++;
 153     }
 154     map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
 155     j++;
 156   }
 157   assert(j == R10->encoding(), "must be");
 158 #if (FP_REG_NUM != 11)
 159   // add R11, if not managed as FP
 160   map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
 161 #endif
 162   map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
 163   map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
 164   if (HaveVFP) {
 165     for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
 166       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
 167       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
 168     }
 169   }
 170 
 171   return map;
 172 }
 173 
 174 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
 175   if (HaveVFP) {
 176     __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
 177     if (VM_Version::has_vfp3_32()) {
 178       __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
 179     } else {
 180       if (FloatRegisterImpl::number_of_registers > 32) {
 181         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 182         __ add(SP, SP, 32 * wordSize);
 183       }
 184     }
 185   } else {
 186     __ add(SP, SP, fpu_save_size * wordSize);
 187   }
 188   __ pop(SAVED_BASE_REGS);
 189   if (restore_lr) {
 190     __ pop(RegisterSet(FP) | RegisterSet(LR));
 191   } else {
 192     __ pop(RegisterSet(FP));
 193   }
 194 }
 195 
 196 
 197 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 198 #ifdef __ABI_HARD__
 199   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 200     __ sub(SP, SP, 8);
 201     __ fstd(D0, Address(SP));
 202     return;
 203   }
 204 #endif // __ABI_HARD__
 205   __ raw_push(R0, R1);
 206 }
 207 
 208 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 209 #ifdef __ABI_HARD__
 210   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 211     __ fldd(D0, Address(SP));
 212     __ add(SP, SP, 8);
 213     return;
 214   }
 215 #endif // __ABI_HARD__
 216   __ raw_pop(R0, R1);
 217 }
 218 
 219 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 220   // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment
 221   __ push(RegisterSet(R0, R3));
 222 
 223 #ifdef __ABI_HARD__
 224   // preserve arguments
 225   // Likely not needed as the locking code won't probably modify volatile FP registers,
 226   // but there is no way to guarantee that
 227   if (fp_regs_in_arguments) {
 228     // convert fp_regs_in_arguments to a number of double registers
 229     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 230     __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 231   }
 232 #endif // __ ABI_HARD__
 233 }
 234 
 235 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 236 #ifdef __ABI_HARD__
 237   if (fp_regs_in_arguments) {
 238     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 239     __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 240   }
 241 #endif // __ABI_HARD__
 242 
 243   __ pop(RegisterSet(R0, R3));
 244 }
 245 
 246 
 247 
 248 // Is vector's size (in bytes) bigger than a size saved by default?
 249 // All vector registers are saved by default on ARM.
 250 bool SharedRuntime::is_wide_vector(int size) {
 251   return false;
 252 }
 253 
 254 size_t SharedRuntime::trampoline_size() {
 255   return 16;
 256 }
 257 
 258 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 259   InlinedAddress dest(destination);
 260   __ indirect_jump(dest, Rtemp);
 261   __ bind_literal(dest);
 262 }
 263 
 264 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 265                                         VMRegPair *regs,
 266                                         VMRegPair *regs2,
 267                                         int total_args_passed) {
 268   assert(regs2 == NULL, "not needed on arm");
 269 
 270   int slot = 0;
 271   int ireg = 0;
 272 #ifdef __ABI_HARD__
 273   int fp_slot = 0;
 274   int single_fpr_slot = 0;
 275 #endif // __ABI_HARD__
 276   for (int i = 0; i < total_args_passed; i++) {
 277     switch (sig_bt[i]) {
 278     case T_SHORT:
 279     case T_CHAR:
 280     case T_BYTE:
 281     case T_BOOLEAN:
 282     case T_INT:
 283     case T_ARRAY:
 284     case T_OBJECT:
 285     case T_ADDRESS:
 286     case T_METADATA:
 287 #ifndef __ABI_HARD__
 288     case T_FLOAT:
 289 #endif // !__ABI_HARD__
 290       if (ireg < 4) {
 291         Register r = as_Register(ireg);
 292         regs[i].set1(r->as_VMReg());
 293         ireg++;
 294       } else {
 295         regs[i].set1(VMRegImpl::stack2reg(slot));
 296         slot++;
 297       }
 298       break;
 299     case T_LONG:
 300 #ifndef __ABI_HARD__
 301     case T_DOUBLE:
 302 #endif // !__ABI_HARD__
 303       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 304       if (ireg <= 2) {
 305 #if (ALIGN_WIDE_ARGUMENTS == 1)
 306         if(ireg & 1) ireg++;  // Aligned location required
 307 #endif
 308         Register r1 = as_Register(ireg);
 309         Register r2 = as_Register(ireg + 1);
 310         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 311         ireg += 2;
 312 #if (ALIGN_WIDE_ARGUMENTS == 0)
 313       } else if (ireg == 3) {
 314         // uses R3 + one stack slot
 315         Register r = as_Register(ireg);
 316         regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg());
 317         ireg += 1;
 318         slot += 1;
 319 #endif
 320       } else {
 321         if (slot & 1) slot++; // Aligned location required
 322         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 323         slot += 2;
 324         ireg = 4;
 325       }
 326       break;
 327     case T_VOID:
 328       regs[i].set_bad();
 329       break;
 330 #ifdef __ABI_HARD__
 331     case T_FLOAT:
 332       if ((fp_slot < 16)||(single_fpr_slot & 1)) {
 333         if ((single_fpr_slot & 1) == 0) {
 334           single_fpr_slot = fp_slot;
 335           fp_slot += 2;
 336         }
 337         FloatRegister r = as_FloatRegister(single_fpr_slot);
 338         single_fpr_slot++;
 339         regs[i].set1(r->as_VMReg());
 340       } else {
 341         regs[i].set1(VMRegImpl::stack2reg(slot));
 342         slot++;
 343       }
 344       break;
 345     case T_DOUBLE:
 346       assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
 347       if (fp_slot <= 14) {
 348         FloatRegister r1 = as_FloatRegister(fp_slot);
 349         FloatRegister r2 = as_FloatRegister(fp_slot+1);
 350         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 351         fp_slot += 2;
 352       } else {
 353         if(slot & 1) slot++;
 354         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 355         slot += 2;
 356         single_fpr_slot = 16;
 357       }
 358       break;
 359 #endif // __ABI_HARD__
 360     default:
 361       ShouldNotReachHere();
 362     }
 363   }
 364   return slot;
 365 }
 366 
 367 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 368                                            VMRegPair *regs,
 369                                            int total_args_passed,
 370                                            int is_outgoing) {
 371 #ifdef __SOFTFP__
 372   // soft float is the same as the C calling convention.
 373   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 374 #endif // __SOFTFP__
 375   (void) is_outgoing;
 376   int slot = 0;
 377   int ireg = 0;
 378   int freg = 0;
 379   int single_fpr = 0;
 380 
 381   for (int i = 0; i < total_args_passed; i++) {
 382     switch (sig_bt[i]) {
 383     case T_SHORT:
 384     case T_CHAR:
 385     case T_BYTE:
 386     case T_BOOLEAN:
 387     case T_INT:
 388     case T_ARRAY:
 389     case T_OBJECT:
 390     case T_ADDRESS:
 391       if (ireg < 4) {
 392         Register r = as_Register(ireg++);
 393         regs[i].set1(r->as_VMReg());
 394       } else {
 395         regs[i].set1(VMRegImpl::stack2reg(slot++));
 396       }
 397       break;
 398     case T_FLOAT:
 399       // C2 utilizes S14/S15 for mem-mem moves
 400       if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) {
 401         if ((single_fpr & 1) == 0) {
 402           single_fpr = freg;
 403           freg += 2;
 404         }
 405         FloatRegister r = as_FloatRegister(single_fpr++);
 406         regs[i].set1(r->as_VMReg());
 407       } else {
 408         regs[i].set1(VMRegImpl::stack2reg(slot++));
 409       }
 410       break;
 411     case T_DOUBLE:
 412       // C2 utilizes S14/S15 for mem-mem moves
 413       if (freg <= 14 COMPILER2_PRESENT(-2)) {
 414         FloatRegister r1 = as_FloatRegister(freg);
 415         FloatRegister r2 = as_FloatRegister(freg + 1);
 416         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 417         freg += 2;
 418       } else {
 419         // Keep internally the aligned calling convention,
 420         // ignoring ALIGN_WIDE_ARGUMENTS
 421         if (slot & 1) slot++;
 422         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 423         slot += 2;
 424         single_fpr = 16;
 425       }
 426       break;
 427     case T_LONG:
 428       // Keep internally the aligned calling convention,
 429       // ignoring ALIGN_WIDE_ARGUMENTS
 430       if (ireg <= 2) {
 431         if (ireg & 1) ireg++;
 432         Register r1 = as_Register(ireg);
 433         Register r2 = as_Register(ireg + 1);
 434         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 435         ireg += 2;
 436       } else {
 437         if (slot & 1) slot++;
 438         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 439         slot += 2;
 440         ireg = 4;
 441       }
 442       break;
 443     case T_VOID:
 444       regs[i].set_bad();
 445       break;
 446     default:
 447       ShouldNotReachHere();
 448     }
 449   }
 450 
 451   if (slot & 1) slot++;
 452   return slot;
 453 }
 454 
 455 static void patch_callers_callsite(MacroAssembler *masm) {
 456   Label skip;
 457 
 458   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 459   __ cbz(Rtemp, skip);
 460 
 461   // Pushing an even number of registers for stack alignment.
 462   // Selecting R9, which had to be saved anyway for some platforms.
 463   __ push(RegisterSet(R0, R3) | R9 | LR);
 464 
 465   __ mov(R0, Rmethod);
 466   __ mov(R1, LR);
 467   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 468 
 469   __ pop(RegisterSet(R0, R3) | R9 | LR);
 470 
 471   __ bind(skip);
 472 }
 473 
 474 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 475                                     int total_args_passed, int comp_args_on_stack,
 476                                     const BasicType *sig_bt, const VMRegPair *regs) {
 477   // TODO: ARM - May be can use ldm to load arguments
 478   const Register tmp = Rtemp; // avoid erasing R5_mh
 479 
 480   // Next assert may not be needed but safer. Extra analysis required
 481   // if this there is not enough free registers and we need to use R5 here.
 482   assert_different_registers(tmp, R5_mh);
 483 
 484   // 6243940 We might end up in handle_wrong_method if
 485   // the callee is deoptimized as we race thru here. If that
 486   // happens we don't want to take a safepoint because the
 487   // caller frame will look interpreted and arguments are now
 488   // "compiled" so it is much better to make this transition
 489   // invisible to the stack walking code. Unfortunately if
 490   // we try and find the callee by normal means a safepoint
 491   // is possible. So we stash the desired callee in the thread
 492   // and the vm will find there should this case occur.
 493   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
 494   __ str(Rmethod, callee_target_addr);
 495 
 496 
 497   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 498 
 499   const Register initial_sp = Rmethod; // temporarily scratched
 500 
 501   // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
 502   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
 503 
 504   __ mov(initial_sp, SP);
 505 
 506   if (comp_args_on_stack) {
 507     __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
 508   }
 509   __ bic(SP, SP, StackAlignmentInBytes - 1);
 510 
 511   for (int i = 0; i < total_args_passed; i++) {
 512     if (sig_bt[i] == T_VOID) {
 513       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 514       continue;
 515     }
 516     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 517     int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i);
 518 
 519     VMReg r_1 = regs[i].first();
 520     VMReg r_2 = regs[i].second();
 521     if (r_1->is_stack()) {
 522       int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size;
 523       if (!r_2->is_valid()) {
 524         __ ldr(tmp, Address(initial_sp, arg_offset));
 525         __ str(tmp, Address(SP, stack_offset));
 526       } else {
 527         __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 528         __ str(tmp, Address(SP, stack_offset));
 529         __ ldr(tmp, Address(initial_sp, arg_offset));
 530         __ str(tmp, Address(SP, stack_offset + wordSize));
 531       }
 532     } else if (r_1->is_Register()) {
 533       if (!r_2->is_valid()) {
 534         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset));
 535       } else {
 536         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 537         __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
 538       }
 539     } else if (r_1->is_FloatRegister()) {
 540 #ifdef __SOFTFP__
 541       ShouldNotReachHere();
 542 #endif // __SOFTFP__
 543       if (!r_2->is_valid()) {
 544         __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
 545       } else {
 546         __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 547       }
 548     } else {
 549       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 550     }
 551   }
 552 
 553   // restore Rmethod (scratched for initial_sp)
 554   __ ldr(Rmethod, callee_target_addr);
 555   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 556 
 557 }
 558 
 559 static void gen_c2i_adapter(MacroAssembler *masm,
 560                             int total_args_passed,  int comp_args_on_stack,
 561                             const BasicType *sig_bt, const VMRegPair *regs,
 562                             Label& skip_fixup) {
 563   // TODO: ARM - May be can use stm to deoptimize arguments
 564   const Register tmp = Rtemp;
 565 
 566   patch_callers_callsite(masm);
 567   __ bind(skip_fixup);
 568 
 569   __ mov(Rsender_sp, SP); // not yet saved
 570 
 571 
 572   int extraspace = total_args_passed * Interpreter::stackElementSize;
 573   if (extraspace) {
 574     __ sub_slow(SP, SP, extraspace);
 575   }
 576 
 577   for (int i = 0; i < total_args_passed; i++) {
 578     if (sig_bt[i] == T_VOID) {
 579       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 580       continue;
 581     }
 582     int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
 583 
 584     VMReg r_1 = regs[i].first();
 585     VMReg r_2 = regs[i].second();
 586     if (r_1->is_stack()) {
 587       int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 588       if (!r_2->is_valid()) {
 589         __ ldr(tmp, Address(SP, arg_offset));
 590         __ str(tmp, Address(SP, stack_offset));
 591       } else {
 592         __ ldr(tmp, Address(SP, arg_offset));
 593         __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize));
 594         __ ldr(tmp, Address(SP, arg_offset + wordSize));
 595         __ str(tmp, Address(SP, stack_offset));
 596       }
 597     } else if (r_1->is_Register()) {
 598       if (!r_2->is_valid()) {
 599         __ str(r_1->as_Register(), Address(SP, stack_offset));
 600       } else {
 601         __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
 602         __ str(r_2->as_Register(), Address(SP, stack_offset));
 603       }
 604     } else if (r_1->is_FloatRegister()) {
 605 #ifdef __SOFTFP__
 606       ShouldNotReachHere();
 607 #endif // __SOFTFP__
 608       if (!r_2->is_valid()) {
 609         __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
 610       } else {
 611         __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
 612       }
 613     } else {
 614       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 615     }
 616   }
 617 
 618   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 619 
 620 }
 621 
 622 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 623                                                             int total_args_passed,
 624                                                             int comp_args_on_stack,
 625                                                             const BasicType *sig_bt,
 626                                                             const VMRegPair *regs,
 627                                                             AdapterFingerPrint* fingerprint) {
 628   address i2c_entry = __ pc();
 629   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 630 
 631   address c2i_unverified_entry = __ pc();
 632   Label skip_fixup;
 633   const Register receiver       = R0;
 634   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
 635   const Register receiver_klass = R4;
 636 
 637   __ load_klass(receiver_klass, receiver);
 638   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
 639   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
 640   __ cmp(receiver_klass, holder_klass);
 641 
 642   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
 643   __ cmp(Rtemp, 0, eq);
 644   __ b(skip_fixup, eq);
 645   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
 646 
 647   address c2i_entry = __ pc();
 648   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 649 
 650   __ flush();
 651   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 652 }
 653 
 654 
 655 static int reg2offset_in(VMReg r) {
 656   // Account for saved FP and LR
 657   return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
 658 }
 659 
 660 static int reg2offset_out(VMReg r) {
 661   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 662 }
 663 
 664 
 665 static void verify_oop_args(MacroAssembler* masm,
 666                             const methodHandle& method,
 667                             const BasicType* sig_bt,
 668                             const VMRegPair* regs) {
 669   Register temp_reg = Rmethod;  // not part of any compiled calling seq
 670   if (VerifyOops) {
 671     for (int i = 0; i < method->size_of_parameters(); i++) {
 672       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
 673         VMReg r = regs[i].first();
 674         assert(r->is_valid(), "bad oop arg");
 675         if (r->is_stack()) {
 676           __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 677           __ verify_oop(temp_reg);
 678         } else {
 679           __ verify_oop(r->as_Register());
 680         }
 681       }
 682     }
 683   }
 684 }
 685 
 686 static void gen_special_dispatch(MacroAssembler* masm,
 687                                  const methodHandle& method,
 688                                  const BasicType* sig_bt,
 689                                  const VMRegPair* regs) {
 690   verify_oop_args(masm, method, sig_bt, regs);
 691   vmIntrinsics::ID iid = method->intrinsic_id();
 692 
 693   // Now write the args into the outgoing interpreter space
 694   bool     has_receiver   = false;
 695   Register receiver_reg   = noreg;
 696   int      member_arg_pos = -1;
 697   Register member_reg     = noreg;
 698   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
 699   if (ref_kind != 0) {
 700     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
 701     member_reg = Rmethod;  // known to be free at this point
 702     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 703   } else if (iid == vmIntrinsics::_invokeBasic) {
 704     has_receiver = true;
 705   } else {
 706     fatal("unexpected intrinsic id %d", iid);
 707   }
 708 
 709   if (member_reg != noreg) {
 710     // Load the member_arg into register, if necessary.
 711     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
 712     VMReg r = regs[member_arg_pos].first();
 713     if (r->is_stack()) {
 714       __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 715     } else {
 716       // no data motion is needed
 717       member_reg = r->as_Register();
 718     }
 719   }
 720 
 721   if (has_receiver) {
 722     // Make sure the receiver is loaded into a register.
 723     assert(method->size_of_parameters() > 0, "oob");
 724     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 725     VMReg r = regs[0].first();
 726     assert(r->is_valid(), "bad receiver arg");
 727     if (r->is_stack()) {
 728       // Porting note:  This assumes that compiled calling conventions always
 729       // pass the receiver oop in a register.  If this is not true on some
 730       // platform, pick a temp and load the receiver from stack.
 731       assert(false, "receiver always in a register");
 732       receiver_reg = j_rarg0;  // known to be free at this point
 733       __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 734     } else {
 735       // no data motion is needed
 736       receiver_reg = r->as_Register();
 737     }
 738   }
 739 
 740   // Figure out which address we are really jumping to:
 741   MethodHandles::generate_method_handle_dispatch(masm, iid,
 742                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
 743 }
 744 
 745 // ---------------------------------------------------------------------------
 746 // Generate a native wrapper for a given method.  The method takes arguments
 747 // in the Java compiled code convention, marshals them to the native
 748 // convention (handlizes oops, etc), transitions to native, makes the call,
 749 // returns to java state (possibly blocking), unhandlizes any result and
 750 // returns.
 751 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 752                                                 const methodHandle& method,
 753                                                 int compile_id,
 754                                                 BasicType* in_sig_bt,
 755                                                 VMRegPair* in_regs,
 756                                                 BasicType ret_type,
 757                                                 address critical_entry) {
 758   if (method->is_method_handle_intrinsic()) {
 759     vmIntrinsics::ID iid = method->intrinsic_id();
 760     intptr_t start = (intptr_t)__ pc();
 761     int vep_offset = ((intptr_t)__ pc()) - start;
 762     gen_special_dispatch(masm,
 763                          method,
 764                          in_sig_bt,
 765                          in_regs);
 766     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
 767     __ flush();
 768     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
 769     return nmethod::new_native_nmethod(method,
 770                                        compile_id,
 771                                        masm->code(),
 772                                        vep_offset,
 773                                        frame_complete,
 774                                        stack_slots / VMRegImpl::slots_per_word,
 775                                        in_ByteSize(-1),
 776                                        in_ByteSize(-1),
 777                                        (OopMapSet*)NULL);
 778   }
 779   // Arguments for JNI method include JNIEnv and Class if static
 780 
 781   // Usage of Rtemp should be OK since scratched by native call
 782 
 783   bool is_static = method->is_static();
 784 
 785   const int total_in_args = method->size_of_parameters();
 786   int total_c_args = total_in_args + 1;
 787   if (is_static) {
 788     total_c_args++;
 789   }
 790 
 791   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
 792   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
 793 
 794   int argc = 0;
 795   out_sig_bt[argc++] = T_ADDRESS;
 796   if (is_static) {
 797     out_sig_bt[argc++] = T_OBJECT;
 798   }
 799 
 800   int i;
 801   for (i = 0; i < total_in_args; i++) {
 802     out_sig_bt[argc++] = in_sig_bt[i];
 803   }
 804 
 805   int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
 806   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 807   // Since object arguments need to be wrapped, we must preserve space
 808   // for those object arguments which come in registers (GPR_PARAMS maximum)
 809   // plus one more slot for Klass handle (for static methods)
 810   int oop_handle_offset = stack_slots;
 811   stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word;
 812 
 813   // Plus a lock if needed
 814   int lock_slot_offset = 0;
 815   if (method->is_synchronized()) {
 816     lock_slot_offset = stack_slots;
 817     assert(sizeof(BasicLock) == wordSize, "adjust this code");
 818     stack_slots += VMRegImpl::slots_per_word;
 819   }
 820 
 821   // Space to save return address and FP
 822   stack_slots += 2 * VMRegImpl::slots_per_word;
 823 
 824   // Calculate the final stack size taking account of alignment
 825   stack_slots = align_up(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size);
 826   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 827   int lock_slot_fp_offset = stack_size - 2 * wordSize -
 828     lock_slot_offset * VMRegImpl::stack_slot_size;
 829 
 830   // Unverified entry point
 831   address start = __ pc();
 832 
 833   // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
 834   const Register receiver = R0; // see receiverOpr()
 835   __ load_klass(Rtemp, receiver);
 836   __ cmp(Rtemp, Ricklass);
 837   Label verified;
 838 
 839   __ b(verified, eq); // jump over alignment no-ops too
 840   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
 841   __ align(CodeEntryAlignment);
 842 
 843   // Verified entry point
 844   __ bind(verified);
 845   int vep_offset = __ pc() - start;
 846 
 847 
 848   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
 849     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
 850     // instead of doing a full VM transition once it's been computed.
 851     Label slow_case;
 852     const Register obj_reg = R0;
 853 
 854     // Unlike for Object.hashCode, System.identityHashCode is static method and
 855     // gets object as argument instead of the receiver.
 856     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
 857       assert(method->is_static(), "method should be static");
 858       // return 0 for null reference input, return val = R0 = obj_reg = 0
 859       __ cmp(obj_reg, 0);
 860       __ bx(LR, eq);
 861     }
 862 
 863     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 864 
 865     assert(markWord::unlocked_value == 1, "adjust this code");
 866     __ tbz(Rtemp, exact_log2(markWord::unlocked_value), slow_case);
 867 
 868     if (UseBiasedLocking) {
 869       assert(is_power_of_2(markWord::biased_lock_bit_in_place), "adjust this code");
 870       __ tbnz(Rtemp, exact_log2(markWord::biased_lock_bit_in_place), slow_case);
 871     }
 872 
 873     __ bics(Rtemp, Rtemp, ~markWord::hash_mask_in_place);
 874     __ mov(R0, AsmOperand(Rtemp, lsr, markWord::hash_shift), ne);
 875     __ bx(LR, ne);
 876 
 877     __ bind(slow_case);
 878   }
 879 
 880   // Bang stack pages
 881   __ arm_stack_overflow_check(stack_size, Rtemp);
 882 
 883   // Setup frame linkage
 884   __ raw_push(FP, LR);
 885   __ mov(FP, SP);
 886   __ sub_slow(SP, SP, stack_size - 2*wordSize);
 887 
 888   int frame_complete = __ pc() - start;
 889 
 890   OopMapSet* oop_maps = new OopMapSet();
 891   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
 892   const int extra_args = is_static ? 2 : 1;
 893   int receiver_offset = -1;
 894   int fp_regs_in_arguments = 0;
 895 
 896   for (i = total_in_args; --i >= 0; ) {
 897     switch (in_sig_bt[i]) {
 898     case T_ARRAY:
 899     case T_OBJECT: {
 900       VMReg src = in_regs[i].first();
 901       VMReg dst = out_regs[i + extra_args].first();
 902       if (src->is_stack()) {
 903         assert(dst->is_stack(), "must be");
 904         assert(i != 0, "Incoming receiver is always in a register");
 905         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
 906         __ cmp(Rtemp, 0);
 907         __ add(Rtemp, FP, reg2offset_in(src), ne);
 908         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
 909         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 910         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
 911       } else {
 912         int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
 913         __ str(src->as_Register(), Address(SP, offset));
 914         map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
 915         if ((i == 0) && (!is_static)) {
 916           receiver_offset = offset;
 917         }
 918         oop_handle_offset += VMRegImpl::slots_per_word;
 919 
 920         if (dst->is_stack()) {
 921           __ movs(Rtemp, src->as_Register());
 922           __ add(Rtemp, SP, offset, ne);
 923           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
 924         } else {
 925           __ movs(dst->as_Register(), src->as_Register());
 926           __ add(dst->as_Register(), SP, offset, ne);
 927         }
 928       }
 929     }
 930 
 931     case T_VOID:
 932       break;
 933 
 934 
 935 #ifdef __SOFTFP__
 936     case T_DOUBLE:
 937 #endif
 938     case T_LONG: {
 939       VMReg src_1 = in_regs[i].first();
 940       VMReg src_2 = in_regs[i].second();
 941       VMReg dst_1 = out_regs[i + extra_args].first();
 942       VMReg dst_2 = out_regs[i + extra_args].second();
 943 #if (ALIGN_WIDE_ARGUMENTS == 0)
 944       // C convention can mix a register and a stack slot for a
 945       // 64-bits native argument.
 946 
 947       // Note: following code should work independently of whether
 948       // the Java calling convention follows C convention or whether
 949       // it aligns 64-bit values.
 950       if (dst_2->is_Register()) {
 951         if (src_1->as_Register() != dst_1->as_Register()) {
 952           assert(src_1->as_Register() != dst_2->as_Register() &&
 953                  src_2->as_Register() != dst_2->as_Register(), "must be");
 954           __ mov(dst_2->as_Register(), src_2->as_Register());
 955           __ mov(dst_1->as_Register(), src_1->as_Register());
 956         } else {
 957           assert(src_2->as_Register() == dst_2->as_Register(), "must be");
 958         }
 959       } else if (src_2->is_Register()) {
 960         if (dst_1->is_Register()) {
 961           // dst mixes a register and a stack slot
 962           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
 963           assert(src_1->as_Register() != dst_1->as_Register(), "must be");
 964           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
 965           __ mov(dst_1->as_Register(), src_1->as_Register());
 966         } else {
 967           // registers to stack slots
 968           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
 969           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
 970           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
 971         }
 972       } else if (src_1->is_Register()) {
 973         if (dst_1->is_Register()) {
 974           // src and dst must be R3 + stack slot
 975           assert(dst_1->as_Register() == src_1->as_Register(), "must be");
 976           __ ldr(Rtemp,    Address(FP, reg2offset_in(src_2)));
 977           __ str(Rtemp,    Address(SP, reg2offset_out(dst_2)));
 978         } else {
 979           // <R3,stack> -> <stack,stack>
 980           assert(dst_2->is_stack() && src_2->is_stack(), "must be");
 981           __ ldr(LR, Address(FP, reg2offset_in(src_2)));
 982           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
 983           __ str(LR, Address(SP, reg2offset_out(dst_2)));
 984         }
 985       } else {
 986         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
 987         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
 988         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
 989         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
 990         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
 991       }
 992 #else // ALIGN_WIDE_ARGUMENTS
 993       if (src_1->is_stack()) {
 994         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
 995         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
 996         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
 997         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
 998         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
 999       } else if (dst_1->is_stack()) {
1000         assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1001         __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1002         __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1003       } else if (src_1->as_Register() == dst_1->as_Register()) {
1004         assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1005       } else {
1006         assert(src_1->as_Register() != dst_2->as_Register() &&
1007                src_2->as_Register() != dst_2->as_Register(), "must be");
1008         __ mov(dst_2->as_Register(), src_2->as_Register());
1009         __ mov(dst_1->as_Register(), src_1->as_Register());
1010       }
1011 #endif // ALIGN_WIDE_ARGUMENTS
1012       break;
1013     }
1014 
1015 #if (!defined __SOFTFP__ && !defined __ABI_HARD__)
1016     case T_FLOAT: {
1017       VMReg src = in_regs[i].first();
1018       VMReg dst = out_regs[i + extra_args].first();
1019       if (src->is_stack()) {
1020         assert(dst->is_stack(), "must be");
1021         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1022         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1023       } else if (dst->is_stack()) {
1024         __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst)));
1025       } else {
1026         assert(src->is_FloatRegister() && dst->is_Register(), "must be");
1027         __ fmrs(dst->as_Register(), src->as_FloatRegister());
1028       }
1029       break;
1030     }
1031 
1032     case T_DOUBLE: {
1033       VMReg src_1 = in_regs[i].first();
1034       VMReg src_2 = in_regs[i].second();
1035       VMReg dst_1 = out_regs[i + extra_args].first();
1036       VMReg dst_2 = out_regs[i + extra_args].second();
1037       if (src_1->is_stack()) {
1038         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1039         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1040         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1041         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1042         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1043       } else if (dst_1->is_stack()) {
1044         assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be");
1045         __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1)));
1046 #if (ALIGN_WIDE_ARGUMENTS == 0)
1047       } else if (dst_2->is_stack()) {
1048         assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned
1049         // double register must go into R3 + one stack slot
1050         __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister());
1051         __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
1052 #endif
1053       } else {
1054         assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be");
1055         __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister());
1056       }
1057       break;
1058     }
1059 #endif // __SOFTFP__
1060 
1061 #ifdef __ABI_HARD__
1062     case T_FLOAT: {
1063       VMReg src = in_regs[i].first();
1064       VMReg dst = out_regs[i + extra_args].first();
1065       if (src->is_stack()) {
1066         if (dst->is_stack()) {
1067           __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1068           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1069         } else {
1070           // C2 Java calling convention does not populate S14 and S15, therefore
1071           // those need to be loaded from stack here
1072           __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src)));
1073           fp_regs_in_arguments++;
1074         }
1075       } else {
1076         assert(src->is_FloatRegister(), "must be");
1077         fp_regs_in_arguments++;
1078       }
1079       break;
1080     }
1081     case T_DOUBLE: {
1082       VMReg src_1 = in_regs[i].first();
1083       VMReg src_2 = in_regs[i].second();
1084       VMReg dst_1 = out_regs[i + extra_args].first();
1085       VMReg dst_2 = out_regs[i + extra_args].second();
1086       if (src_1->is_stack()) {
1087         if (dst_1->is_stack()) {
1088           assert(dst_2->is_stack(), "must be");
1089           __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1090           __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1091           __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1092           __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1093         } else {
1094           // C2 Java calling convention does not populate S14 and S15, therefore
1095           // those need to be loaded from stack here
1096           __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1097           fp_regs_in_arguments += 2;
1098         }
1099       } else {
1100         assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1101         fp_regs_in_arguments += 2;
1102       }
1103       break;
1104     }
1105 #endif // __ABI_HARD__
1106 
1107     default: {
1108       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1109       VMReg src = in_regs[i].first();
1110       VMReg dst = out_regs[i + extra_args].first();
1111       if (src->is_stack()) {
1112         assert(dst->is_stack(), "must be");
1113         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1114         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1115       } else if (dst->is_stack()) {
1116         __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1117       } else {
1118         assert(src->is_Register() && dst->is_Register(), "must be");
1119         __ mov(dst->as_Register(), src->as_Register());
1120       }
1121     }
1122     }
1123   }
1124 
1125   // Get Klass mirror
1126   int klass_offset = -1;
1127   if (is_static) {
1128     klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1129     __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1130     __ add(c_rarg1, SP, klass_offset);
1131     __ str(Rtemp, Address(SP, klass_offset));
1132     map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1133   }
1134 
1135   // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1136   int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1137   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1138   oop_maps->add_gc_map(pc_offset, map);
1139 
1140   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1141   __ membar(MacroAssembler::StoreStore, Rtemp);
1142 
1143   // RedefineClasses() tracing support for obsolete method entry
1144   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1145     __ save_caller_save_registers();
1146     __ mov(R0, Rthread);
1147     __ mov_metadata(R1, method());
1148     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1149     __ restore_caller_save_registers();
1150   }
1151 
1152   const Register sync_handle = R5;
1153   const Register sync_obj    = R6;
1154   const Register disp_hdr    = altFP_7_11;
1155   const Register tmp         = R8;
1156 
1157   Label slow_lock, slow_lock_biased, lock_done, fast_lock;
1158   if (method->is_synchronized()) {
1159     // The first argument is a handle to sync object (a class or an instance)
1160     __ ldr(sync_obj, Address(R1));
1161     // Remember the handle for the unlocking code
1162     __ mov(sync_handle, R1);
1163 
1164     __ resolve(IS_NOT_NULL, sync_obj);
1165 
1166     if(UseBiasedLocking) {
1167       __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1168     }
1169 
1170     const Register mark = tmp;
1171     // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1172     // That would be acceptable as either CAS or slow case path is taken in that case
1173 
1174     __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1175     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1176     __ tst(mark, markWord::unlocked_value);
1177     __ b(fast_lock, ne);
1178 
1179     // Check for recursive lock
1180     // See comments in InterpreterMacroAssembler::lock_object for
1181     // explanations on the fast recursive locking check.
1182     // Check independently the low bits and the distance to SP
1183     // -1- test low 2 bits
1184     __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1185     // -2- test (hdr - SP) if the low two bits are 0
1186     __ sub(Rtemp, mark, SP, eq);
1187     __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1188     // If still 'eq' then recursive locking OK: set displaced header to 0
1189     __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1190     __ b(lock_done, eq);
1191     __ b(slow_lock);
1192 
1193     __ bind(fast_lock);
1194     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1195 
1196     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1197 
1198     __ bind(lock_done);
1199   }
1200 
1201   // Get JNIEnv*
1202   __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1203 
1204   // Perform thread state transition
1205   __ mov(Rtemp, _thread_in_native);
1206   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1207 
1208   // Finally, call the native method
1209   __ call(method->native_function());
1210 
1211   // Set FPSCR/FPCR to a known state
1212   if (AlwaysRestoreFPU) {
1213     __ restore_default_fp_mode();
1214   }
1215 
1216   // Ensure a Boolean result is mapped to 0..1
1217   if (ret_type == T_BOOLEAN) {
1218     __ c2bool(R0);
1219   }
1220 
1221   // Do a safepoint check while thread is in transition state
1222   InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1223   Label call_safepoint_runtime, return_to_java;
1224   __ mov(Rtemp, _thread_in_native_trans);
1225   __ ldr_literal(R2, safepoint_state);
1226   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1227 
1228   // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1229   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1230 
1231   __ ldr_s32(R2, Address(R2));
1232   __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
1233   __ cmp(R2, SafepointSynchronize::_not_synchronized);
1234   __ cond_cmp(R3, 0, eq);
1235   __ b(call_safepoint_runtime, ne);
1236   __ bind(return_to_java);
1237 
1238   // Perform thread state transition and reguard stack yellow pages if needed
1239   Label reguard, reguard_done;
1240   __ mov(Rtemp, _thread_in_Java);
1241   __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset()));
1242   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1243 
1244   __ cmp(R2, JavaThread::stack_guard_yellow_reserved_disabled);
1245   __ b(reguard, eq);
1246   __ bind(reguard_done);
1247 
1248   Label slow_unlock, unlock_done;
1249   if (method->is_synchronized()) {
1250     __ ldr(sync_obj, Address(sync_handle));
1251 
1252     __ resolve(IS_NOT_NULL, sync_obj);
1253 
1254     if(UseBiasedLocking) {
1255       __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1256       // disp_hdr may not have been saved on entry with biased locking
1257       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1258     }
1259 
1260     // See C1_MacroAssembler::unlock_object() for more comments
1261     __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1262     __ cbz(R2, unlock_done);
1263 
1264     __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1265 
1266     __ bind(unlock_done);
1267   }
1268 
1269   // Set last java frame and handle block to zero
1270   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1271   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1272 
1273   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1274   if (CheckJNICalls) {
1275     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1276   }
1277 
1278   // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1279   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1280     __ resolve_jobject(R0,      // value
1281                        Rtemp,   // tmp1
1282                        R1_tmp); // tmp2
1283   }
1284 
1285   // Any exception pending?
1286   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1287   __ mov(SP, FP);
1288 
1289   __ cmp(Rtemp, 0);
1290   // Pop the frame and return if no exception pending
1291   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1292   // Pop the frame and forward the exception. Rexception_pc contains return address.
1293   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1294   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1295   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1296 
1297   // Safepoint operation and/or pending suspend request is in progress.
1298   // Save the return values and call the runtime function by hand.
1299   __ bind(call_safepoint_runtime);
1300   push_result_registers(masm, ret_type);
1301   __ mov(R0, Rthread);
1302   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1303   pop_result_registers(masm, ret_type);
1304   __ b(return_to_java);
1305 
1306   __ bind_literal(safepoint_state);
1307 
1308   // Reguard stack pages. Save native results around a call to C runtime.
1309   __ bind(reguard);
1310   push_result_registers(masm, ret_type);
1311   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1312   pop_result_registers(masm, ret_type);
1313   __ b(reguard_done);
1314 
1315   if (method->is_synchronized()) {
1316     // Locking slow case
1317     if(UseBiasedLocking) {
1318       __ bind(slow_lock_biased);
1319       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1320     }
1321 
1322     __ bind(slow_lock);
1323 
1324     push_param_registers(masm, fp_regs_in_arguments);
1325 
1326     // last_Java_frame is already set, so do call_VM manually; no exception can occur
1327     __ mov(R0, sync_obj);
1328     __ mov(R1, disp_hdr);
1329     __ mov(R2, Rthread);
1330     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1331 
1332     pop_param_registers(masm, fp_regs_in_arguments);
1333 
1334     __ b(lock_done);
1335 
1336     // Unlocking slow case
1337     __ bind(slow_unlock);
1338 
1339     push_result_registers(masm, ret_type);
1340 
1341     // Clear pending exception before reentering VM.
1342     // Can store the oop in register since it is a leaf call.
1343     assert_different_registers(Rtmp_save1, sync_obj, disp_hdr);
1344     __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1345     Register zero = __ zero_register(Rtemp);
1346     __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
1347     __ mov(R0, sync_obj);
1348     __ mov(R1, disp_hdr);
1349     __ mov(R2, Rthread);
1350     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1351     __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1352 
1353     pop_result_registers(masm, ret_type);
1354 
1355     __ b(unlock_done);
1356   }
1357 
1358   __ flush();
1359   return nmethod::new_native_nmethod(method,
1360                                      compile_id,
1361                                      masm->code(),
1362                                      vep_offset,
1363                                      frame_complete,
1364                                      stack_slots / VMRegImpl::slots_per_word,
1365                                      in_ByteSize(is_static ? klass_offset : receiver_offset),
1366                                      in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1367                                      oop_maps);
1368 }
1369 
1370 // this function returns the adjust size (in number of words) to a c2i adapter
1371 // activation for use during deoptimization
1372 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1373   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1374   return extra_locals_size;
1375 }
1376 
1377 
1378 uint SharedRuntime::out_preserve_stack_slots() {
1379   return 0;
1380 }
1381 
1382 
1383 //------------------------------generate_deopt_blob----------------------------
1384 void SharedRuntime::generate_deopt_blob() {
1385   ResourceMark rm;
1386   CodeBuffer buffer("deopt_blob", 1024, 1024);
1387   int frame_size_in_words;
1388   OopMapSet* oop_maps;
1389   int reexecute_offset;
1390   int exception_in_tls_offset;
1391   int exception_offset;
1392 
1393   MacroAssembler* masm = new MacroAssembler(&buffer);
1394   Label cont;
1395   const Register Rkind   = R9; // caller-saved
1396   const Register Rublock = R6;
1397   const Register Rsender = altFP_7_11;
1398   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1399 
1400   address start = __ pc();
1401 
1402   oop_maps = new OopMapSet();
1403   // LR saved by caller (can be live in c2 method)
1404 
1405   // A deopt is a case where LR may be live in the c2 nmethod. So it's
1406   // not possible to call the deopt blob from the nmethod and pass the
1407   // address of the deopt handler of the nmethod in LR. What happens
1408   // now is that the caller of the deopt blob pushes the current
1409   // address so the deopt blob doesn't have to do it. This way LR can
1410   // be preserved, contains the live value from the nmethod and is
1411   // saved at R14/R30_offset here.
1412   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1413   __ mov(Rkind, Deoptimization::Unpack_deopt);
1414   __ b(cont);
1415 
1416   exception_offset = __ pc() - start;
1417 
1418   // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the
1419   // exception_in_tls_offset entry point.
1420   __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1421   __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1422   // Force return value to NULL to avoid confusing the escape analysis
1423   // logic. Everything is dead here anyway.
1424   __ mov(R0, 0);
1425 
1426   exception_in_tls_offset = __ pc() - start;
1427 
1428   // Exception data is in JavaThread structure
1429   // Patch the return address of the current frame
1430   __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset()));
1431   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1432   {
1433     const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure
1434     __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset()));
1435   }
1436   __ mov(Rkind, Deoptimization::Unpack_exception);
1437   __ b(cont);
1438 
1439   reexecute_offset = __ pc() - start;
1440 
1441   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1442   __ mov(Rkind, Deoptimization::Unpack_reexecute);
1443 
1444   // Calculate UnrollBlock and save the result in Rublock
1445   __ bind(cont);
1446   __ mov(R0, Rthread);
1447   __ mov(R1, Rkind);
1448 
1449   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1450   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1451   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info));
1452   if (pc_offset == -1) {
1453     pc_offset = __ offset();
1454   }
1455   oop_maps->add_gc_map(pc_offset, map);
1456   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1457 
1458   __ mov(Rublock, R0);
1459 
1460   // Reload Rkind from the UnrollBlock (might have changed)
1461   __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1462   Label noException;
1463   __ cmp_32(Rkind, Deoptimization::Unpack_exception);   // Was exception pending?
1464   __ b(noException, ne);
1465   // handle exception case
1466 #ifdef ASSERT
1467   // assert that exception_pc is zero in tls
1468   { Label L;
1469     __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1470     __ cbz(Rexception_pc, L);
1471     __ stop("exception pc should be null");
1472     __ bind(L);
1473   }
1474 #endif
1475   __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1476   __ verify_oop(Rexception_obj);
1477   {
1478     const Register Rzero = __ zero_register(Rtemp);
1479     __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1480   }
1481 
1482   __ bind(noException);
1483 
1484   // This frame is going away.  Fetch return value, so we can move it to
1485   // a new frame.
1486   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1487   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1488 #ifndef __SOFTFP__
1489   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1490 #endif
1491   // pop frame
1492   __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1493 
1494   // Set initial stack state before pushing interpreter frames
1495   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1496   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1497   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1498 
1499   __ add(SP, SP, Rtemp);
1500 
1501 #ifdef ASSERT
1502   // Compilers generate code that bang the stack by as much as the
1503   // interpreter would need. So this stack banging should never
1504   // trigger a fault. Verify that it does not on non product builds.
1505   // See if it is enough stack to push deoptimized frames
1506   if (UseStackBanging) {
1507     // The compiled method that we are deoptimizing was popped from the stack.
1508     // If the stack bang results in a stack overflow, we don't return to the
1509     // method that is being deoptimized. The stack overflow exception is
1510     // propagated to the caller of the deoptimized method. Need to get the pc
1511     // from the caller in LR and restore FP.
1512     __ ldr(LR, Address(R2, 0));
1513     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1514     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
1515     __ arm_stack_overflow_check(R8, Rtemp);
1516   }
1517 #endif
1518   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
1519 
1520   // Pick up the initial fp we should save
1521   // XXX Note: was ldr(FP, Address(FP));
1522 
1523   // The compiler no longer uses FP as a frame pointer for the
1524   // compiled code. It can be used by the allocator in C2 or to
1525   // memorize the original SP for JSR292 call sites.
1526 
1527   // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
1528   // Deoptimization::fetch_unroll_info computes the right FP value and
1529   // stores it in Rublock.initial_info. This has been activated for ARM.
1530   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1531 
1532   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
1533   __ mov(Rsender, SP);
1534   __ sub(SP, SP, Rtemp);
1535 
1536   // Push interpreter frames in a loop
1537   Label loop;
1538   __ bind(loop);
1539   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
1540   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
1541 
1542   __ raw_push(FP, LR);                                     // create new frame
1543   __ mov(FP, SP);
1544   __ sub(Rtemp, Rtemp, 2*wordSize);
1545 
1546   __ sub(SP, SP, Rtemp);
1547 
1548   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
1549   __ mov(LR, 0);
1550   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
1551 
1552   __ subs(R8, R8, 1);                               // decrement counter
1553   __ mov(Rsender, SP);
1554   __ b(loop, ne);
1555 
1556   // Re-push self-frame
1557   __ ldr(LR, Address(R2));
1558   __ raw_push(FP, LR);
1559   __ mov(FP, SP);
1560   __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
1561 
1562   // Restore frame locals after moving the frame
1563   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1564   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1565 
1566 #ifndef __SOFTFP__
1567   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1568 #endif // !__SOFTFP__
1569 
1570 #ifdef ASSERT
1571   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
1572   { Label L;
1573     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1574     __ cmp_32(Rkind, Rtemp);
1575     __ b(L, eq);
1576     __ stop("Rkind was overwritten");
1577     __ bind(L);
1578   }
1579 #endif
1580 
1581   // Call unpack_frames with proper arguments
1582   __ mov(R0, Rthread);
1583   __ mov(R1, Rkind);
1584 
1585   pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1586   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1587   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1588   if (pc_offset == -1) {
1589     pc_offset = __ offset();
1590   }
1591   oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
1592   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1593 
1594   // Collect return values, pop self-frame and jump to interpreter
1595   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1596   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1597   // Interpreter floats controlled by __SOFTFP__, but compiler
1598   // float return value registers controlled by __ABI_HARD__
1599   // This matters for vfp-sflt builds.
1600 #ifndef __SOFTFP__
1601   // Interpreter hard float
1602 #ifdef __ABI_HARD__
1603   // Compiler float return value in FP registers
1604   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1605 #else
1606   // Compiler float return value in integer registers,
1607   // copy to D0 for interpreter (S0 <-- R0)
1608   __ fmdrr(D0_tos, R0, R1);
1609 #endif
1610 #endif // !__SOFTFP__
1611   __ mov(SP, FP);
1612 
1613   __ pop(RegisterSet(FP) | RegisterSet(PC));
1614 
1615   __ flush();
1616 
1617   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
1618                                            reexecute_offset, frame_size_in_words);
1619   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
1620 }
1621 
1622 #ifdef COMPILER2
1623 
1624 //------------------------------generate_uncommon_trap_blob--------------------
1625 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
1626 // instead.
1627 void SharedRuntime::generate_uncommon_trap_blob() {
1628   // allocate space for the code
1629   ResourceMark rm;
1630 
1631   // setup code generation tools
1632   int pad = VerifyThread ? 512 : 0;
1633 #ifdef _LP64
1634   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
1635 #else
1636   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
1637   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
1638   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
1639 #endif
1640   // bypassed when code generation useless
1641   MacroAssembler* masm               = new MacroAssembler(&buffer);
1642   const Register Rublock = R6;
1643   const Register Rsender = altFP_7_11;
1644   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
1645 
1646   //
1647   // This is the entry point for all traps the compiler takes when it thinks
1648   // it cannot handle further execution of compilation code. The frame is
1649   // deoptimized in these cases and converted into interpreter frames for
1650   // execution
1651   // The steps taken by this frame are as follows:
1652   //   - push a fake "unpack_frame"
1653   //   - call the C routine Deoptimization::uncommon_trap (this function
1654   //     packs the current compiled frame into vframe arrays and returns
1655   //     information about the number and size of interpreter frames which
1656   //     are equivalent to the frame which is being deoptimized)
1657   //   - deallocate the "unpack_frame"
1658   //   - deallocate the deoptimization frame
1659   //   - in a loop using the information returned in the previous step
1660   //     push interpreter frames;
1661   //   - create a dummy "unpack_frame"
1662   //   - call the C routine: Deoptimization::unpack_frames (this function
1663   //     lays out values on the interpreter frame which was just created)
1664   //   - deallocate the dummy unpack_frame
1665   //   - return to the interpreter entry point
1666   //
1667   //  Refer to the following methods for more information:
1668   //   - Deoptimization::uncommon_trap
1669   //   - Deoptimization::unpack_frame
1670 
1671   // the unloaded class index is in R0 (first parameter to this blob)
1672 
1673   __ raw_push(FP, LR);
1674   __ set_last_Java_frame(SP, FP, false, Rtemp);
1675   __ mov(R2, Deoptimization::Unpack_uncommon_trap);
1676   __ mov(R1, R0);
1677   __ mov(R0, Rthread);
1678   __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
1679   __ mov(Rublock, R0);
1680   __ reset_last_Java_frame(Rtemp);
1681   __ raw_pop(FP, LR);
1682 
1683 #ifdef ASSERT
1684   { Label L;
1685     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1686     __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
1687     __ b(L, eq);
1688     __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
1689     __ bind(L);
1690   }
1691 #endif
1692 
1693 
1694   // Set initial stack state before pushing interpreter frames
1695   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1696   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1697   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1698 
1699   __ add(SP, SP, Rtemp);
1700 
1701   // See if it is enough stack to push deoptimized frames
1702 #ifdef ASSERT
1703   // Compilers generate code that bang the stack by as much as the
1704   // interpreter would need. So this stack banging should never
1705   // trigger a fault. Verify that it does not on non product builds.
1706   if (UseStackBanging) {
1707     // The compiled method that we are deoptimizing was popped from the stack.
1708     // If the stack bang results in a stack overflow, we don't return to the
1709     // method that is being deoptimized. The stack overflow exception is
1710     // propagated to the caller of the deoptimized method. Need to get the pc
1711     // from the caller in LR and restore FP.
1712     __ ldr(LR, Address(R2, 0));
1713     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1714     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
1715     __ arm_stack_overflow_check(R8, Rtemp);
1716   }
1717 #endif
1718   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
1719   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
1720   __ mov(Rsender, SP);
1721   __ sub(SP, SP, Rtemp);
1722   //  __ ldr(FP, Address(FP));
1723   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1724 
1725   // Push interpreter frames in a loop
1726   Label loop;
1727   __ bind(loop);
1728   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
1729   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
1730 
1731   __ raw_push(FP, LR);                                     // create new frame
1732   __ mov(FP, SP);
1733   __ sub(Rtemp, Rtemp, 2*wordSize);
1734 
1735   __ sub(SP, SP, Rtemp);
1736 
1737   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
1738   __ mov(LR, 0);
1739   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
1740   __ subs(R8, R8, 1);                               // decrement counter
1741   __ mov(Rsender, SP);
1742   __ b(loop, ne);
1743 
1744   // Re-push self-frame
1745   __ ldr(LR, Address(R2));
1746   __ raw_push(FP, LR);
1747   __ mov(FP, SP);
1748 
1749   // Call unpack_frames with proper arguments
1750   __ mov(R0, Rthread);
1751   __ mov(R1, Deoptimization::Unpack_uncommon_trap);
1752   __ set_last_Java_frame(SP, FP, true, Rtemp);
1753   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1754   //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
1755   __ reset_last_Java_frame(Rtemp);
1756 
1757   __ mov(SP, FP);
1758   __ pop(RegisterSet(FP) | RegisterSet(PC));
1759 
1760   masm->flush();
1761   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
1762 }
1763 
1764 #endif // COMPILER2
1765 
1766 //------------------------------generate_handler_blob------
1767 //
1768 // Generate a special Compile2Runtime blob that saves all registers,
1769 // setup oopmap, and calls safepoint code to stop the compiled code for
1770 // a safepoint.
1771 //
1772 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
1773   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
1774 
1775   ResourceMark rm;
1776   CodeBuffer buffer("handler_blob", 256, 256);
1777   int frame_size_words;
1778   OopMapSet* oop_maps;
1779 
1780   bool cause_return = (poll_type == POLL_AT_RETURN);
1781 
1782   MacroAssembler* masm = new MacroAssembler(&buffer);
1783   address start = __ pc();
1784   oop_maps = new OopMapSet();
1785 
1786   if (!cause_return) {
1787     __ sub(SP, SP, 4); // make room for LR which may still be live
1788                        // here if we are coming from a c2 method
1789   }
1790 
1791   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
1792   if (!cause_return) {
1793     // update saved PC with correct value
1794     // need 2 steps because LR can be live in c2 method
1795     __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
1796     __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
1797   }
1798 
1799   __ mov(R0, Rthread);
1800   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1801   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1802   __ call(call_ptr);
1803   if (pc_offset == -1) {
1804     pc_offset = __ offset();
1805   }
1806   oop_maps->add_gc_map(pc_offset, map);
1807   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1808 
1809   // Check for pending exception
1810   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1811   __ cmp(Rtemp, 0);
1812 
1813   if (!cause_return) {
1814     RegisterSaver::restore_live_registers(masm, false);
1815     __ pop(PC, eq);
1816     __ pop(Rexception_pc);
1817   } else {
1818     RegisterSaver::restore_live_registers(masm);
1819     __ bx(LR, eq);
1820     __ mov(Rexception_pc, LR);
1821   }
1822 
1823   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1824 
1825   __ flush();
1826 
1827   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
1828 }
1829 
1830 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
1831   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
1832 
1833   ResourceMark rm;
1834   CodeBuffer buffer(name, 1000, 512);
1835   int frame_size_words;
1836   OopMapSet *oop_maps;
1837   int frame_complete;
1838 
1839   MacroAssembler* masm = new MacroAssembler(&buffer);
1840   Label pending_exception;
1841 
1842   int start = __ offset();
1843 
1844   oop_maps = new OopMapSet();
1845   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words);
1846 
1847   frame_complete = __ offset();
1848 
1849   __ mov(R0, Rthread);
1850 
1851   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
1852   assert(start == 0, "warning: start differs from code_begin");
1853   __ call(destination);
1854   if (pc_offset == -1) {
1855     pc_offset = __ offset();
1856   }
1857   oop_maps->add_gc_map(pc_offset, map);
1858   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1859 
1860   __ ldr(R1, Address(Rthread, Thread::pending_exception_offset()));
1861   __ cbnz(R1, pending_exception);
1862 
1863   // Overwrite saved register values
1864 
1865   // Place metadata result of VM call into Rmethod
1866   __ get_vm_result_2(R1, Rtemp);
1867   __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize));
1868 
1869   // Place target address (VM call result) into Rtemp
1870   __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize));
1871 
1872   RegisterSaver::restore_live_registers(masm);
1873   __ jump(Rtemp);
1874 
1875   __ bind(pending_exception);
1876 
1877   RegisterSaver::restore_live_registers(masm);
1878   const Register Rzero = __ zero_register(Rtemp);
1879   __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset()));
1880   __ mov(Rexception_pc, LR);
1881   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1882 
1883   __ flush();
1884 
1885   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
1886 }