1 /*
   2  * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "assembler_arm.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "logging/log.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "runtime/sharedRuntime.hpp"
  36 #include "runtime/vframeArray.hpp"
  37 #include "utilities/align.hpp"
  38 #include "vmreg_arm.inline.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_Runtime1.hpp"
  41 #endif
  42 #ifdef COMPILER2
  43 #include "opto/runtime.hpp"
  44 #endif
  45 
  46 #define __ masm->
  47 
  48 class RegisterSaver {
  49 public:
  50 
  51   // Special registers:
  52   //              32-bit ARM     64-bit ARM
  53   //  Rthread:       R10            R28
  54   //  LR:            R14            R30
  55 
  56   // Rthread is callee saved in the C ABI and never changed by compiled code:
  57   // no need to save it.
  58 
  59   // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
  60   // The one at LR_offset is a return address that is needed by stack walking.
  61   // A c2 method uses LR as a standard register so it may be live when we
  62   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
  63   // in case it's live in the method we are coming from.
  64 
  65 #ifdef AARCH64
  66 
  67   //
  68   // On AArch64 registers save area has the following layout:
  69   //
  70   // |---------------------|
  71   // | return address (LR) |
  72   // | FP                  |
  73   // |---------------------|
  74   // | V31                 |
  75   // | ...                 |
  76   // | V0                  |
  77   // |---------------------|
  78   // | padding             |
  79   // | R30 (LR live value) |
  80   // |---------------------|
  81   // | R27                 |
  82   // | ...                 |
  83   // | R0                  |
  84   // |---------------------| <-- SP
  85   //
  86 
  87   enum RegisterLayout {
  88     number_of_saved_gprs = 28,
  89     number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
  90     words_per_fpr = ConcreteRegisterImpl::words_per_fpr,
  91 
  92     R0_offset  = 0,
  93     R30_offset = R0_offset + number_of_saved_gprs,
  94     D0_offset  = R30_offset + 2,
  95     FP_offset  = D0_offset + number_of_saved_fprs * words_per_fpr,
  96     LR_offset  = FP_offset + 1,
  97 
  98     reg_save_size = LR_offset + 1,
  99   };
 100 
 101   static const int Rmethod_offset;
 102   static const int Rtemp_offset;
 103 
 104 #else
 105 
 106   enum RegisterLayout {
 107     fpu_save_size = FloatRegisterImpl::number_of_registers,
 108 #ifndef __SOFTFP__
 109     D0_offset = 0,
 110 #endif
 111     R0_offset = fpu_save_size,
 112     R1_offset,
 113     R2_offset,
 114     R3_offset,
 115     R4_offset,
 116     R5_offset,
 117     R6_offset,
 118 #if (FP_REG_NUM != 7)
 119     // if not saved as FP
 120     R7_offset,
 121 #endif
 122     R8_offset,
 123     R9_offset,
 124 #if (FP_REG_NUM != 11)
 125     // if not saved as FP
 126     R11_offset,
 127 #endif
 128     R12_offset,
 129     R14_offset,
 130     FP_offset,
 131     LR_offset,
 132     reg_save_size,
 133 
 134     Rmethod_offset = R9_offset,
 135     Rtemp_offset = R12_offset,
 136   };
 137 
 138   // all regs but Rthread (R10), FP (R7 or R11), SP and PC
 139   // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
 140 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 141 
 142 #endif // AARCH64
 143 
 144   //  When LR may be live in the nmethod from which we are comming
 145   //  then lr_saved is true, the return address is saved before the
 146   //  call to save_live_register by the caller and LR contains the
 147   //  live value.
 148 
 149   static OopMap* save_live_registers(MacroAssembler* masm,
 150                                      int* total_frame_words,
 151                                      bool lr_saved = false);
 152   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
 153 
 154 };
 155 
 156 
 157 #ifdef AARCH64
 158 const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding();
 159 const int RegisterSaver::Rtemp_offset   = RegisterSaver::R0_offset + Rtemp->encoding();
 160 #endif // AARCH64
 161 
 162 
 163 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
 164                                            int* total_frame_words,
 165                                            bool lr_saved) {
 166   *total_frame_words = reg_save_size;
 167 
 168   OopMapSet *oop_maps = new OopMapSet();
 169   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 170 
 171 #ifdef AARCH64
 172   assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
 173 
 174   if (lr_saved) {
 175     // LR was stashed here, so that jump could use it as a scratch reg
 176     __ ldr(LR, Address(SP, 0));
 177     // There are two words on the stack top:
 178     //  [SP + 0]: placeholder for FP
 179     //  [SP + wordSize]: saved return address
 180     __ str(FP, Address(SP, 0));
 181   } else {
 182     __ raw_push(FP, LR);
 183   }
 184 
 185   __ sub(SP, SP, (reg_save_size - 2) * wordSize);
 186 
 187   for (int i = 0; i < number_of_saved_gprs; i += 2) {
 188     int offset = R0_offset + i;
 189     __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize));
 190     map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
 191     map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg());
 192   }
 193 
 194   __ str(R30, Address(SP, R30_offset * wordSize));
 195   map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg());
 196 
 197   for (int i = 0; i < number_of_saved_fprs; i += 2) {
 198     int offset1 = D0_offset + i * words_per_fpr;
 199     int offset2 = offset1 + words_per_fpr;
 200     Address base(SP, offset1 * wordSize);
 201     if (words_per_fpr == 2) {
 202       // pair of "wide" quad vector registers
 203       __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
 204     } else {
 205       // pair of double vector registers
 206       __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
 207     }
 208     map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
 209     map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg());
 210   }
 211 #else
 212   if (lr_saved) {
 213     __ push(RegisterSet(FP));
 214   } else {
 215     __ push(RegisterSet(FP) | RegisterSet(LR));
 216   }
 217   __ push(SAVED_BASE_REGS);
 218   if (HaveVFP) {
 219     if (VM_Version::has_vfp3_32()) {
 220       __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
 221     } else {
 222       if (FloatRegisterImpl::number_of_registers > 32) {
 223         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 224         __ sub(SP, SP, 32 * wordSize);
 225       }
 226     }
 227     __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
 228   } else {
 229     __ sub(SP, SP, fpu_save_size * wordSize);
 230   }
 231 
 232   int i;
 233   int j=0;
 234   for (i = R0_offset; i <= R9_offset; i++) {
 235     if (j == FP_REG_NUM) {
 236       // skip the FP register, managed below.
 237       j++;
 238     }
 239     map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
 240     j++;
 241   }
 242   assert(j == R10->encoding(), "must be");
 243 #if (FP_REG_NUM != 11)
 244   // add R11, if not managed as FP
 245   map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
 246 #endif
 247   map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
 248   map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
 249   if (HaveVFP) {
 250     for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
 251       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
 252       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
 253     }
 254   }
 255 #endif // AARCH64
 256 
 257   return map;
 258 }
 259 
 260 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
 261 #ifdef AARCH64
 262   for (int i = 0; i < number_of_saved_gprs; i += 2) {
 263     __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
 264   }
 265 
 266   __ ldr(R30, Address(SP, R30_offset * wordSize));
 267 
 268   for (int i = 0; i < number_of_saved_fprs; i += 2) {
 269     Address base(SP, (D0_offset + i * words_per_fpr) * wordSize);
 270     if (words_per_fpr == 2) {
 271       // pair of "wide" quad vector registers
 272       __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
 273     } else {
 274       // pair of double vector registers
 275       __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
 276     }
 277   }
 278 
 279   __ add(SP, SP, (reg_save_size - 2) * wordSize);
 280 
 281   if (restore_lr) {
 282     __ raw_pop(FP, LR);
 283   } else {
 284     __ ldr(FP, Address(SP, 0));
 285   }
 286 #else
 287   if (HaveVFP) {
 288     __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
 289     if (VM_Version::has_vfp3_32()) {
 290       __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
 291     } else {
 292       if (FloatRegisterImpl::number_of_registers > 32) {
 293         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 294         __ add(SP, SP, 32 * wordSize);
 295       }
 296     }
 297   } else {
 298     __ add(SP, SP, fpu_save_size * wordSize);
 299   }
 300   __ pop(SAVED_BASE_REGS);
 301   if (restore_lr) {
 302     __ pop(RegisterSet(FP) | RegisterSet(LR));
 303   } else {
 304     __ pop(RegisterSet(FP));
 305   }
 306 #endif // AARCH64
 307 }
 308 
 309 #ifdef AARCH64
 310 
 311 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 312   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 313     __ str_d(D0, Address(SP, -2*wordSize, pre_indexed));
 314   } else {
 315     __ raw_push(R0, ZR);
 316   }
 317 }
 318 
 319 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 320   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 321     __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed));
 322   } else {
 323     __ raw_pop(R0, ZR);
 324   }
 325 }
 326 
 327 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 328   __ raw_push(R0, R1);
 329   __ raw_push(R2, R3);
 330   __ raw_push(R4, R5);
 331   __ raw_push(R6, R7);
 332 
 333   assert(FPR_PARAMS == 8, "adjust this code");
 334   assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
 335 
 336   if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed));
 337   if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed));
 338   if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed));
 339   if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed));
 340 }
 341 
 342 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 343   assert(FPR_PARAMS == 8, "adjust this code");
 344   assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
 345 
 346   if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed));
 347   if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed));
 348   if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed));
 349   if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed));
 350 
 351   __ raw_pop(R6, R7);
 352   __ raw_pop(R4, R5);
 353   __ raw_pop(R2, R3);
 354   __ raw_pop(R0, R1);
 355 }
 356 
 357 #else // AARCH64
 358 
 359 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 360 #ifdef __ABI_HARD__
 361   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 362     __ sub(SP, SP, 8);
 363     __ fstd(D0, Address(SP));
 364     return;
 365   }
 366 #endif // __ABI_HARD__
 367   __ raw_push(R0, R1);
 368 }
 369 
 370 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 371 #ifdef __ABI_HARD__
 372   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 373     __ fldd(D0, Address(SP));
 374     __ add(SP, SP, 8);
 375     return;
 376   }
 377 #endif // __ABI_HARD__
 378   __ raw_pop(R0, R1);
 379 }
 380 
 381 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 382   // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment
 383   __ push(RegisterSet(R0, R3));
 384 
 385 #ifdef __ABI_HARD__
 386   // preserve arguments
 387   // Likely not needed as the locking code won't probably modify volatile FP registers,
 388   // but there is no way to guarantee that
 389   if (fp_regs_in_arguments) {
 390     // convert fp_regs_in_arguments to a number of double registers
 391     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 392     __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 393   }
 394 #endif // __ ABI_HARD__
 395 }
 396 
 397 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 398 #ifdef __ABI_HARD__
 399   if (fp_regs_in_arguments) {
 400     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 401     __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 402   }
 403 #endif // __ABI_HARD__
 404 
 405   __ pop(RegisterSet(R0, R3));
 406 }
 407 
 408 #endif // AARCH64
 409 
 410 
 411 // Is vector's size (in bytes) bigger than a size saved by default?
 412 // All vector registers are saved by default on ARM.
 413 bool SharedRuntime::is_wide_vector(int size) {
 414   return false;
 415 }
 416 
 417 size_t SharedRuntime::trampoline_size() {
 418   return 16;
 419 }
 420 
 421 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 422   InlinedAddress dest(destination);
 423   __ indirect_jump(dest, Rtemp);
 424   __ bind_literal(dest);
 425 }
 426 
 427 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 428                                         VMRegPair *regs,
 429                                         VMRegPair *regs2,
 430                                         int total_args_passed) {
 431   assert(regs2 == NULL, "not needed on arm");
 432 #ifdef AARCH64
 433   int slot = 0; // counted in 32-bit VMReg slots
 434   int reg = 0;
 435   int fp_reg = 0;
 436   for (int i = 0; i < total_args_passed; i++) {
 437     switch (sig_bt[i]) {
 438     case T_SHORT:
 439     case T_CHAR:
 440     case T_BYTE:
 441     case T_BOOLEAN:
 442     case T_INT:
 443       if (reg < GPR_PARAMS) {
 444         Register r = as_Register(reg);
 445         regs[i].set1(r->as_VMReg());
 446         reg++;
 447       } else {
 448         regs[i].set1(VMRegImpl::stack2reg(slot));
 449         slot+=2;
 450       }
 451       break;
 452     case T_LONG:
 453       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 454       // fall through
 455     case T_ARRAY:
 456     case T_OBJECT:
 457     case T_ADDRESS:
 458       if (reg < GPR_PARAMS) {
 459         Register r = as_Register(reg);
 460         regs[i].set2(r->as_VMReg());
 461         reg++;
 462       } else {
 463         regs[i].set2(VMRegImpl::stack2reg(slot));
 464         slot+=2;
 465       }
 466       break;
 467     case T_FLOAT:
 468       if (fp_reg < FPR_PARAMS) {
 469         FloatRegister r = as_FloatRegister(fp_reg);
 470         regs[i].set1(r->as_VMReg());
 471         fp_reg++;
 472       } else {
 473         regs[i].set1(VMRegImpl::stack2reg(slot));
 474         slot+=2;
 475       }
 476       break;
 477     case T_DOUBLE:
 478       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 479       if (fp_reg < FPR_PARAMS) {
 480         FloatRegister r = as_FloatRegister(fp_reg);
 481         regs[i].set2(r->as_VMReg());
 482         fp_reg++;
 483       } else {
 484         regs[i].set2(VMRegImpl::stack2reg(slot));
 485         slot+=2;
 486       }
 487       break;
 488     case T_VOID:
 489       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 490       regs[i].set_bad();
 491       break;
 492     default:
 493       ShouldNotReachHere();
 494     }
 495   }
 496   return slot;
 497 
 498 #else // AARCH64
 499 
 500   int slot = 0;
 501   int ireg = 0;
 502 #ifdef __ABI_HARD__
 503   int fp_slot = 0;
 504   int single_fpr_slot = 0;
 505 #endif // __ABI_HARD__
 506   for (int i = 0; i < total_args_passed; i++) {
 507     switch (sig_bt[i]) {
 508     case T_SHORT:
 509     case T_CHAR:
 510     case T_BYTE:
 511     case T_BOOLEAN:
 512     case T_INT:
 513     case T_ARRAY:
 514     case T_OBJECT:
 515     case T_ADDRESS:
 516 #ifndef __ABI_HARD__
 517     case T_FLOAT:
 518 #endif // !__ABI_HARD__
 519       if (ireg < 4) {
 520         Register r = as_Register(ireg);
 521         regs[i].set1(r->as_VMReg());
 522         ireg++;
 523       } else {
 524         regs[i].set1(VMRegImpl::stack2reg(slot));
 525         slot++;
 526       }
 527       break;
 528     case T_LONG:
 529 #ifndef __ABI_HARD__
 530     case T_DOUBLE:
 531 #endif // !__ABI_HARD__
 532       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 533       if (ireg <= 2) {
 534 #if (ALIGN_WIDE_ARGUMENTS == 1)
 535         if(ireg & 1) ireg++;  // Aligned location required
 536 #endif
 537         Register r1 = as_Register(ireg);
 538         Register r2 = as_Register(ireg + 1);
 539         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 540         ireg += 2;
 541 #if (ALIGN_WIDE_ARGUMENTS == 0)
 542       } else if (ireg == 3) {
 543         // uses R3 + one stack slot
 544         Register r = as_Register(ireg);
 545         regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg());
 546         ireg += 1;
 547         slot += 1;
 548 #endif
 549       } else {
 550         if (slot & 1) slot++; // Aligned location required
 551         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 552         slot += 2;
 553         ireg = 4;
 554       }
 555       break;
 556     case T_VOID:
 557       regs[i].set_bad();
 558       break;
 559 #ifdef __ABI_HARD__
 560     case T_FLOAT:
 561       if ((fp_slot < 16)||(single_fpr_slot & 1)) {
 562         if ((single_fpr_slot & 1) == 0) {
 563           single_fpr_slot = fp_slot;
 564           fp_slot += 2;
 565         }
 566         FloatRegister r = as_FloatRegister(single_fpr_slot);
 567         single_fpr_slot++;
 568         regs[i].set1(r->as_VMReg());
 569       } else {
 570         regs[i].set1(VMRegImpl::stack2reg(slot));
 571         slot++;
 572       }
 573       break;
 574     case T_DOUBLE:
 575       assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
 576       if (fp_slot <= 14) {
 577         FloatRegister r1 = as_FloatRegister(fp_slot);
 578         FloatRegister r2 = as_FloatRegister(fp_slot+1);
 579         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 580         fp_slot += 2;
 581       } else {
 582         if(slot & 1) slot++;
 583         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 584         slot += 2;
 585         single_fpr_slot = 16;
 586       }
 587       break;
 588 #endif // __ABI_HARD__
 589     default:
 590       ShouldNotReachHere();
 591     }
 592   }
 593   return slot;
 594 #endif // AARCH64
 595 }
 596 
 597 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 598                                            VMRegPair *regs,
 599                                            int total_args_passed,
 600                                            int is_outgoing) {
 601 #ifdef AARCH64
 602   // C calling convention on AArch64 is good enough.
 603   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 604 #else
 605 #ifdef __SOFTFP__
 606   // soft float is the same as the C calling convention.
 607   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 608 #endif // __SOFTFP__
 609   (void) is_outgoing;
 610   int slot = 0;
 611   int ireg = 0;
 612   int freg = 0;
 613   int single_fpr = 0;
 614 
 615   for (int i = 0; i < total_args_passed; i++) {
 616     switch (sig_bt[i]) {
 617     case T_SHORT:
 618     case T_CHAR:
 619     case T_BYTE:
 620     case T_BOOLEAN:
 621     case T_INT:
 622     case T_ARRAY:
 623     case T_OBJECT:
 624     case T_ADDRESS:
 625       if (ireg < 4) {
 626         Register r = as_Register(ireg++);
 627         regs[i].set1(r->as_VMReg());
 628       } else {
 629         regs[i].set1(VMRegImpl::stack2reg(slot++));
 630       }
 631       break;
 632     case T_FLOAT:
 633       // C2 utilizes S14/S15 for mem-mem moves
 634       if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) {
 635         if ((single_fpr & 1) == 0) {
 636           single_fpr = freg;
 637           freg += 2;
 638         }
 639         FloatRegister r = as_FloatRegister(single_fpr++);
 640         regs[i].set1(r->as_VMReg());
 641       } else {
 642         regs[i].set1(VMRegImpl::stack2reg(slot++));
 643       }
 644       break;
 645     case T_DOUBLE:
 646       // C2 utilizes S14/S15 for mem-mem moves
 647       if (freg <= 14 COMPILER2_PRESENT(-2)) {
 648         FloatRegister r1 = as_FloatRegister(freg);
 649         FloatRegister r2 = as_FloatRegister(freg + 1);
 650         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 651         freg += 2;
 652       } else {
 653         // Keep internally the aligned calling convention,
 654         // ignoring ALIGN_WIDE_ARGUMENTS
 655         if (slot & 1) slot++;
 656         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 657         slot += 2;
 658         single_fpr = 16;
 659       }
 660       break;
 661     case T_LONG:
 662       // Keep internally the aligned calling convention,
 663       // ignoring ALIGN_WIDE_ARGUMENTS
 664       if (ireg <= 2) {
 665         if (ireg & 1) ireg++;
 666         Register r1 = as_Register(ireg);
 667         Register r2 = as_Register(ireg + 1);
 668         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 669         ireg += 2;
 670       } else {
 671         if (slot & 1) slot++;
 672         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 673         slot += 2;
 674         ireg = 4;
 675       }
 676       break;
 677     case T_VOID:
 678       regs[i].set_bad();
 679       break;
 680     default:
 681       ShouldNotReachHere();
 682     }
 683   }
 684 
 685   if (slot & 1) slot++;
 686   return slot;
 687 #endif // AARCH64
 688 }
 689 
 690 static void patch_callers_callsite(MacroAssembler *masm) {
 691   Label skip;
 692 
 693   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 694   __ cbz(Rtemp, skip);
 695 
 696 #ifdef AARCH64
 697   push_param_registers(masm, FPR_PARAMS);
 698   __ raw_push(LR, ZR);
 699 #else
 700   // Pushing an even number of registers for stack alignment.
 701   // Selecting R9, which had to be saved anyway for some platforms.
 702   __ push(RegisterSet(R0, R3) | R9 | LR);
 703 #endif // AARCH64
 704 
 705   __ mov(R0, Rmethod);
 706   __ mov(R1, LR);
 707   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 708 
 709 #ifdef AARCH64
 710   __ raw_pop(LR, ZR);
 711   pop_param_registers(masm, FPR_PARAMS);
 712 #else
 713   __ pop(RegisterSet(R0, R3) | R9 | LR);
 714 #endif // AARCH64
 715 
 716   __ bind(skip);
 717 }
 718 
 719 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 720                                     int total_args_passed, int comp_args_on_stack,
 721                                     const BasicType *sig_bt, const VMRegPair *regs) {
 722   // TODO: ARM - May be can use ldm to load arguments
 723   const Register tmp = Rtemp; // avoid erasing R5_mh
 724 
 725   // Next assert may not be needed but safer. Extra analysis required
 726   // if this there is not enough free registers and we need to use R5 here.
 727   assert_different_registers(tmp, R5_mh);
 728 
 729   // 6243940 We might end up in handle_wrong_method if
 730   // the callee is deoptimized as we race thru here. If that
 731   // happens we don't want to take a safepoint because the
 732   // caller frame will look interpreted and arguments are now
 733   // "compiled" so it is much better to make this transition
 734   // invisible to the stack walking code. Unfortunately if
 735   // we try and find the callee by normal means a safepoint
 736   // is possible. So we stash the desired callee in the thread
 737   // and the vm will find there should this case occur.
 738   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
 739   __ str(Rmethod, callee_target_addr);
 740 
 741 #ifdef AARCH64
 742 
 743   assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod);
 744   assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams);
 745 
 746   if (comp_args_on_stack) {
 747     __ sub_slow(SP, SP, align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes));
 748   }
 749 
 750   for (int i = 0; i < total_args_passed; i++) {
 751     if (sig_bt[i] == T_VOID) {
 752       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 753       continue;
 754     }
 755     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 756 
 757     int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
 758     Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
 759 
 760     VMReg r = regs[i].first();
 761     bool full_word = regs[i].second()->is_valid();
 762 
 763     if (r->is_stack()) {
 764       if (full_word) {
 765         __ ldr(tmp, source_addr);
 766         __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 767       } else {
 768         __ ldr_w(tmp, source_addr);
 769         __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 770       }
 771     } else if (r->is_Register()) {
 772       if (full_word) {
 773         __ ldr(r->as_Register(), source_addr);
 774       } else {
 775         __ ldr_w(r->as_Register(), source_addr);
 776       }
 777     } else if (r->is_FloatRegister()) {
 778       if (sig_bt[i] == T_DOUBLE) {
 779         __ ldr_d(r->as_FloatRegister(), source_addr);
 780       } else {
 781         __ ldr_s(r->as_FloatRegister(), source_addr);
 782       }
 783     } else {
 784       assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
 785     }
 786   }
 787 
 788   __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
 789   __ br(tmp);
 790 
 791 #else
 792 
 793   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 794 
 795   const Register initial_sp = Rmethod; // temporarily scratched
 796 
 797   // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
 798   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
 799 
 800   __ mov(initial_sp, SP);
 801 
 802   if (comp_args_on_stack) {
 803     __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
 804   }
 805   __ bic(SP, SP, StackAlignmentInBytes - 1);
 806 
 807   for (int i = 0; i < total_args_passed; i++) {
 808     if (sig_bt[i] == T_VOID) {
 809       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 810       continue;
 811     }
 812     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 813     int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i);
 814 
 815     VMReg r_1 = regs[i].first();
 816     VMReg r_2 = regs[i].second();
 817     if (r_1->is_stack()) {
 818       int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size;
 819       if (!r_2->is_valid()) {
 820         __ ldr(tmp, Address(initial_sp, arg_offset));
 821         __ str(tmp, Address(SP, stack_offset));
 822       } else {
 823         __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 824         __ str(tmp, Address(SP, stack_offset));
 825         __ ldr(tmp, Address(initial_sp, arg_offset));
 826         __ str(tmp, Address(SP, stack_offset + wordSize));
 827       }
 828     } else if (r_1->is_Register()) {
 829       if (!r_2->is_valid()) {
 830         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset));
 831       } else {
 832         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 833         __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
 834       }
 835     } else if (r_1->is_FloatRegister()) {
 836 #ifdef __SOFTFP__
 837       ShouldNotReachHere();
 838 #endif // __SOFTFP__
 839       if (!r_2->is_valid()) {
 840         __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
 841       } else {
 842         __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 843       }
 844     } else {
 845       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 846     }
 847   }
 848 
 849   // restore Rmethod (scratched for initial_sp)
 850   __ ldr(Rmethod, callee_target_addr);
 851   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 852 
 853 #endif // AARCH64
 854 }
 855 
 856 static void gen_c2i_adapter(MacroAssembler *masm,
 857                             int total_args_passed,  int comp_args_on_stack,
 858                             const BasicType *sig_bt, const VMRegPair *regs,
 859                             Label& skip_fixup) {
 860   // TODO: ARM - May be can use stm to deoptimize arguments
 861   const Register tmp = Rtemp;
 862 
 863   patch_callers_callsite(masm);
 864   __ bind(skip_fixup);
 865 
 866   __ mov(Rsender_sp, SP); // not yet saved
 867 
 868 #ifdef AARCH64
 869 
 870   int extraspace = align_up(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes);
 871   if (extraspace) {
 872     __ sub(SP, SP, extraspace);
 873   }
 874 
 875   for (int i = 0; i < total_args_passed; i++) {
 876     if (sig_bt[i] == T_VOID) {
 877       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 878       continue;
 879     }
 880 
 881     int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
 882     Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
 883 
 884     VMReg r = regs[i].first();
 885     bool full_word = regs[i].second()->is_valid();
 886 
 887     if (r->is_stack()) {
 888       if (full_word) {
 889         __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
 890         __ str(tmp, dest_addr);
 891       } else {
 892         __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
 893         __ str_w(tmp, dest_addr);
 894       }
 895     } else if (r->is_Register()) {
 896       if (full_word) {
 897         __ str(r->as_Register(), dest_addr);
 898       } else {
 899         __ str_w(r->as_Register(), dest_addr);
 900       }
 901     } else if (r->is_FloatRegister()) {
 902       if (sig_bt[i] == T_DOUBLE) {
 903         __ str_d(r->as_FloatRegister(), dest_addr);
 904       } else {
 905         __ str_s(r->as_FloatRegister(), dest_addr);
 906       }
 907     } else {
 908       assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
 909     }
 910   }
 911 
 912   __ mov(Rparams, SP);
 913 
 914   __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset()));
 915   __ br(tmp);
 916 
 917 #else
 918 
 919   int extraspace = total_args_passed * Interpreter::stackElementSize;
 920   if (extraspace) {
 921     __ sub_slow(SP, SP, extraspace);
 922   }
 923 
 924   for (int i = 0; i < total_args_passed; i++) {
 925     if (sig_bt[i] == T_VOID) {
 926       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 927       continue;
 928     }
 929     int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
 930 
 931     VMReg r_1 = regs[i].first();
 932     VMReg r_2 = regs[i].second();
 933     if (r_1->is_stack()) {
 934       int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 935       if (!r_2->is_valid()) {
 936         __ ldr(tmp, Address(SP, arg_offset));
 937         __ str(tmp, Address(SP, stack_offset));
 938       } else {
 939         __ ldr(tmp, Address(SP, arg_offset));
 940         __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize));
 941         __ ldr(tmp, Address(SP, arg_offset + wordSize));
 942         __ str(tmp, Address(SP, stack_offset));
 943       }
 944     } else if (r_1->is_Register()) {
 945       if (!r_2->is_valid()) {
 946         __ str(r_1->as_Register(), Address(SP, stack_offset));
 947       } else {
 948         __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
 949         __ str(r_2->as_Register(), Address(SP, stack_offset));
 950       }
 951     } else if (r_1->is_FloatRegister()) {
 952 #ifdef __SOFTFP__
 953       ShouldNotReachHere();
 954 #endif // __SOFTFP__
 955       if (!r_2->is_valid()) {
 956         __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
 957       } else {
 958         __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
 959       }
 960     } else {
 961       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 962     }
 963   }
 964 
 965   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 966 
 967 #endif // AARCH64
 968 }
 969 
 970 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 971                                                             int total_args_passed,
 972                                                             int comp_args_on_stack,
 973                                                             const BasicType *sig_bt,
 974                                                             const VMRegPair *regs,
 975                                                             AdapterFingerPrint* fingerprint) {
 976   address i2c_entry = __ pc();
 977   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 978 
 979   address c2i_unverified_entry = __ pc();
 980   Label skip_fixup;
 981   const Register receiver       = R0;
 982   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
 983   const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4);
 984 
 985   __ load_klass(receiver_klass, receiver);
 986   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
 987   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_method_offset()));
 988   __ cmp(receiver_klass, holder_klass);
 989 
 990 #ifdef AARCH64
 991   Label ic_miss;
 992   __ b(ic_miss, ne);
 993   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 994   __ cbz(Rtemp, skip_fixup);
 995   __ bind(ic_miss);
 996   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
 997 #else
 998   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
 999   __ cmp(Rtemp, 0, eq);
1000   __ b(skip_fixup, eq);
1001   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
1002 #endif // AARCH64
1003 
1004   address c2i_entry = __ pc();
1005   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1006 
1007   __ flush();
1008   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1009 }
1010 
1011 
1012 static int reg2offset_in(VMReg r) {
1013   // Account for saved FP and LR
1014   return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
1015 }
1016 
1017 static int reg2offset_out(VMReg r) {
1018   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1019 }
1020 
1021 
1022 static void verify_oop_args(MacroAssembler* masm,
1023                             const methodHandle& method,
1024                             const BasicType* sig_bt,
1025                             const VMRegPair* regs) {
1026   Register temp_reg = Rmethod;  // not part of any compiled calling seq
1027   if (VerifyOops) {
1028     for (int i = 0; i < method->size_of_parameters(); i++) {
1029       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
1030         VMReg r = regs[i].first();
1031         assert(r->is_valid(), "bad oop arg");
1032         if (r->is_stack()) {
1033           __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1034           __ verify_oop(temp_reg);
1035         } else {
1036           __ verify_oop(r->as_Register());
1037         }
1038       }
1039     }
1040   }
1041 }
1042 
1043 static void gen_special_dispatch(MacroAssembler* masm,
1044                                  const methodHandle& method,
1045                                  const BasicType* sig_bt,
1046                                  const VMRegPair* regs) {
1047   verify_oop_args(masm, method, sig_bt, regs);
1048   vmIntrinsics::ID iid = method->intrinsic_id();
1049 
1050   // Now write the args into the outgoing interpreter space
1051   bool     has_receiver   = false;
1052   Register receiver_reg   = noreg;
1053   int      member_arg_pos = -1;
1054   Register member_reg     = noreg;
1055   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1056   if (ref_kind != 0) {
1057     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1058     member_reg = Rmethod;  // known to be free at this point
1059     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1060   } else if (iid == vmIntrinsics::_invokeBasic) {
1061     has_receiver = true;
1062   } else {
1063     fatal("unexpected intrinsic id %d", iid);
1064   }
1065 
1066   if (member_reg != noreg) {
1067     // Load the member_arg into register, if necessary.
1068     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1069     VMReg r = regs[member_arg_pos].first();
1070     if (r->is_stack()) {
1071       __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1072     } else {
1073       // no data motion is needed
1074       member_reg = r->as_Register();
1075     }
1076   }
1077 
1078   if (has_receiver) {
1079     // Make sure the receiver is loaded into a register.
1080     assert(method->size_of_parameters() > 0, "oob");
1081     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1082     VMReg r = regs[0].first();
1083     assert(r->is_valid(), "bad receiver arg");
1084     if (r->is_stack()) {
1085       // Porting note:  This assumes that compiled calling conventions always
1086       // pass the receiver oop in a register.  If this is not true on some
1087       // platform, pick a temp and load the receiver from stack.
1088       assert(false, "receiver always in a register");
1089       receiver_reg = j_rarg0;  // known to be free at this point
1090       __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1091     } else {
1092       // no data motion is needed
1093       receiver_reg = r->as_Register();
1094     }
1095   }
1096 
1097   // Figure out which address we are really jumping to:
1098   MethodHandles::generate_method_handle_dispatch(masm, iid,
1099                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1100 }
1101 
1102 // ---------------------------------------------------------------------------
1103 // Generate a native wrapper for a given method.  The method takes arguments
1104 // in the Java compiled code convention, marshals them to the native
1105 // convention (handlizes oops, etc), transitions to native, makes the call,
1106 // returns to java state (possibly blocking), unhandlizes any result and
1107 // returns.
1108 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1109                                                 const methodHandle& method,
1110                                                 int compile_id,
1111                                                 BasicType* in_sig_bt,
1112                                                 VMRegPair* in_regs,
1113                                                 BasicType ret_type) {
1114   if (method->is_method_handle_intrinsic()) {
1115     vmIntrinsics::ID iid = method->intrinsic_id();
1116     intptr_t start = (intptr_t)__ pc();
1117     int vep_offset = ((intptr_t)__ pc()) - start;
1118     gen_special_dispatch(masm,
1119                          method,
1120                          in_sig_bt,
1121                          in_regs);
1122     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1123     __ flush();
1124     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1125     return nmethod::new_native_nmethod(method,
1126                                        compile_id,
1127                                        masm->code(),
1128                                        vep_offset,
1129                                        frame_complete,
1130                                        stack_slots / VMRegImpl::slots_per_word,
1131                                        in_ByteSize(-1),
1132                                        in_ByteSize(-1),
1133                                        (OopMapSet*)NULL);
1134   }
1135   // Arguments for JNI method include JNIEnv and Class if static
1136 
1137   // Usage of Rtemp should be OK since scratched by native call
1138 
1139   bool is_static = method->is_static();
1140 
1141   const int total_in_args = method->size_of_parameters();
1142   int total_c_args = total_in_args + 1;
1143   if (is_static) {
1144     total_c_args++;
1145   }
1146 
1147   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1148   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1149 
1150   int argc = 0;
1151   out_sig_bt[argc++] = T_ADDRESS;
1152   if (is_static) {
1153     out_sig_bt[argc++] = T_OBJECT;
1154   }
1155 
1156   int i;
1157   for (i = 0; i < total_in_args; i++) {
1158     out_sig_bt[argc++] = in_sig_bt[i];
1159   }
1160 
1161   int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
1162   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1163   // Since object arguments need to be wrapped, we must preserve space
1164   // for those object arguments which come in registers (GPR_PARAMS maximum)
1165   // plus one more slot for Klass handle (for static methods)
1166   int oop_handle_offset = stack_slots;
1167   stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word;
1168 
1169   // Plus a lock if needed
1170   int lock_slot_offset = 0;
1171   if (method->is_synchronized()) {
1172     lock_slot_offset = stack_slots;
1173     assert(sizeof(BasicLock) == wordSize, "adjust this code");
1174     stack_slots += VMRegImpl::slots_per_word;
1175   }
1176 
1177   // Space to save return address and FP
1178   stack_slots += 2 * VMRegImpl::slots_per_word;
1179 
1180   // Calculate the final stack size taking account of alignment
1181   stack_slots = align_up(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size);
1182   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1183   int lock_slot_fp_offset = stack_size - 2 * wordSize -
1184     lock_slot_offset * VMRegImpl::stack_slot_size;
1185 
1186   // Unverified entry point
1187   address start = __ pc();
1188 
1189   // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
1190   const Register receiver = R0; // see receiverOpr()
1191   __ load_klass(Rtemp, receiver);
1192   __ cmp(Rtemp, Ricklass);
1193   Label verified;
1194 
1195   __ b(verified, eq); // jump over alignment no-ops too
1196   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
1197   __ align(CodeEntryAlignment);
1198 
1199   // Verified entry point
1200   __ bind(verified);
1201   int vep_offset = __ pc() - start;
1202 
1203 #ifdef AARCH64
1204   // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
1205   __ nop();
1206 #endif // AARCH64
1207 
1208   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
1209     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
1210     // instead of doing a full VM transition once it's been computed.
1211     Label slow_case;
1212     const Register obj_reg = R0;
1213 
1214     // Unlike for Object.hashCode, System.identityHashCode is static method and
1215     // gets object as argument instead of the receiver.
1216     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
1217       assert(method->is_static(), "method should be static");
1218       // return 0 for null reference input, return val = R0 = obj_reg = 0
1219 #ifdef AARCH64
1220       Label Continue;
1221       __ cbnz(obj_reg, Continue);
1222       __ ret();
1223       __ bind(Continue);
1224 #else
1225       __ cmp(obj_reg, 0);
1226       __ bx(LR, eq);
1227 #endif
1228     }
1229 
1230     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1231 
1232     assert(markOopDesc::unlocked_value == 1, "adjust this code");
1233     __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
1234 
1235     if (UseBiasedLocking) {
1236       assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
1237       __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
1238     }
1239 
1240 #ifdef AARCH64
1241     __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place);
1242     __ b(slow_case, eq);
1243     __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift);
1244     __ ret();
1245 #else
1246     __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
1247     __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
1248     __ bx(LR, ne);
1249 #endif // AARCH64
1250 
1251     __ bind(slow_case);
1252   }
1253 
1254   // Bang stack pages
1255   __ arm_stack_overflow_check(stack_size, Rtemp);
1256 
1257   // Setup frame linkage
1258   __ raw_push(FP, LR);
1259   __ mov(FP, SP);
1260   __ sub_slow(SP, SP, stack_size - 2*wordSize);
1261 
1262   int frame_complete = __ pc() - start;
1263 
1264   OopMapSet* oop_maps = new OopMapSet();
1265   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1266   const int extra_args = is_static ? 2 : 1;
1267   int receiver_offset = -1;
1268   int fp_regs_in_arguments = 0;
1269 
1270   for (i = total_in_args; --i >= 0; ) {
1271     switch (in_sig_bt[i]) {
1272     case T_ARRAY:
1273     case T_OBJECT: {
1274       VMReg src = in_regs[i].first();
1275       VMReg dst = out_regs[i + extra_args].first();
1276       if (src->is_stack()) {
1277         assert(dst->is_stack(), "must be");
1278         assert(i != 0, "Incoming receiver is always in a register");
1279         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1280         __ cmp(Rtemp, 0);
1281 #ifdef AARCH64
1282         __ add(Rtemp, FP, reg2offset_in(src));
1283         __ csel(Rtemp, ZR, Rtemp, eq);
1284 #else
1285         __ add(Rtemp, FP, reg2offset_in(src), ne);
1286 #endif // AARCH64
1287         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1288         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1289         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1290       } else {
1291         int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1292         __ str(src->as_Register(), Address(SP, offset));
1293         map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1294         if ((i == 0) && (!is_static)) {
1295           receiver_offset = offset;
1296         }
1297         oop_handle_offset += VMRegImpl::slots_per_word;
1298 
1299 #ifdef AARCH64
1300         __ cmp(src->as_Register(), 0);
1301         __ add(Rtemp, SP, offset);
1302         __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq);
1303         if (dst->is_stack()) {
1304           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1305         }
1306 #else
1307         if (dst->is_stack()) {
1308           __ movs(Rtemp, src->as_Register());
1309           __ add(Rtemp, SP, offset, ne);
1310           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1311         } else {
1312           __ movs(dst->as_Register(), src->as_Register());
1313           __ add(dst->as_Register(), SP, offset, ne);
1314         }
1315 #endif // AARCH64
1316       }
1317     }
1318 
1319     case T_VOID:
1320       break;
1321 
1322 #ifdef AARCH64
1323     case T_FLOAT:
1324     case T_DOUBLE: {
1325       VMReg src = in_regs[i].first();
1326       VMReg dst = out_regs[i + extra_args].first();
1327       if (src->is_stack()) {
1328         assert(dst->is_stack(), "must be");
1329         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1330         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1331       } else {
1332         assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be");
1333         assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be");
1334         fp_regs_in_arguments++;
1335       }
1336       break;
1337     }
1338 #else // AARCH64
1339 
1340 #ifdef __SOFTFP__
1341     case T_DOUBLE:
1342 #endif
1343     case T_LONG: {
1344       VMReg src_1 = in_regs[i].first();
1345       VMReg src_2 = in_regs[i].second();
1346       VMReg dst_1 = out_regs[i + extra_args].first();
1347       VMReg dst_2 = out_regs[i + extra_args].second();
1348 #if (ALIGN_WIDE_ARGUMENTS == 0)
1349       // C convention can mix a register and a stack slot for a
1350       // 64-bits native argument.
1351 
1352       // Note: following code should work independently of whether
1353       // the Java calling convention follows C convention or whether
1354       // it aligns 64-bit values.
1355       if (dst_2->is_Register()) {
1356         if (src_1->as_Register() != dst_1->as_Register()) {
1357           assert(src_1->as_Register() != dst_2->as_Register() &&
1358                  src_2->as_Register() != dst_2->as_Register(), "must be");
1359           __ mov(dst_2->as_Register(), src_2->as_Register());
1360           __ mov(dst_1->as_Register(), src_1->as_Register());
1361         } else {
1362           assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1363         }
1364       } else if (src_2->is_Register()) {
1365         if (dst_1->is_Register()) {
1366           // dst mixes a register and a stack slot
1367           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1368           assert(src_1->as_Register() != dst_1->as_Register(), "must be");
1369           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1370           __ mov(dst_1->as_Register(), src_1->as_Register());
1371         } else {
1372           // registers to stack slots
1373           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1374           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1375           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1376         }
1377       } else if (src_1->is_Register()) {
1378         if (dst_1->is_Register()) {
1379           // src and dst must be R3 + stack slot
1380           assert(dst_1->as_Register() == src_1->as_Register(), "must be");
1381           __ ldr(Rtemp,    Address(FP, reg2offset_in(src_2)));
1382           __ str(Rtemp,    Address(SP, reg2offset_out(dst_2)));
1383         } else {
1384           // <R3,stack> -> <stack,stack>
1385           assert(dst_2->is_stack() && src_2->is_stack(), "must be");
1386           __ ldr(LR, Address(FP, reg2offset_in(src_2)));
1387           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1388           __ str(LR, Address(SP, reg2offset_out(dst_2)));
1389         }
1390       } else {
1391         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1392         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1393         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1394         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1395         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1396       }
1397 #else // ALIGN_WIDE_ARGUMENTS
1398       if (src_1->is_stack()) {
1399         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1400         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1401         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1402         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1403         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1404       } else if (dst_1->is_stack()) {
1405         assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1406         __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1407         __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1408       } else if (src_1->as_Register() == dst_1->as_Register()) {
1409         assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1410       } else {
1411         assert(src_1->as_Register() != dst_2->as_Register() &&
1412                src_2->as_Register() != dst_2->as_Register(), "must be");
1413         __ mov(dst_2->as_Register(), src_2->as_Register());
1414         __ mov(dst_1->as_Register(), src_1->as_Register());
1415       }
1416 #endif // ALIGN_WIDE_ARGUMENTS
1417       break;
1418     }
1419 
1420 #if (!defined __SOFTFP__ && !defined __ABI_HARD__)
1421     case T_FLOAT: {
1422       VMReg src = in_regs[i].first();
1423       VMReg dst = out_regs[i + extra_args].first();
1424       if (src->is_stack()) {
1425         assert(dst->is_stack(), "must be");
1426         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1427         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1428       } else if (dst->is_stack()) {
1429         __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst)));
1430       } else {
1431         assert(src->is_FloatRegister() && dst->is_Register(), "must be");
1432         __ fmrs(dst->as_Register(), src->as_FloatRegister());
1433       }
1434       break;
1435     }
1436 
1437     case T_DOUBLE: {
1438       VMReg src_1 = in_regs[i].first();
1439       VMReg src_2 = in_regs[i].second();
1440       VMReg dst_1 = out_regs[i + extra_args].first();
1441       VMReg dst_2 = out_regs[i + extra_args].second();
1442       if (src_1->is_stack()) {
1443         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1444         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1445         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1446         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1447         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1448       } else if (dst_1->is_stack()) {
1449         assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be");
1450         __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1)));
1451 #if (ALIGN_WIDE_ARGUMENTS == 0)
1452       } else if (dst_2->is_stack()) {
1453         assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned
1454         // double register must go into R3 + one stack slot
1455         __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister());
1456         __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
1457 #endif
1458       } else {
1459         assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be");
1460         __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister());
1461       }
1462       break;
1463     }
1464 #endif // __SOFTFP__
1465 
1466 #ifdef __ABI_HARD__
1467     case T_FLOAT: {
1468       VMReg src = in_regs[i].first();
1469       VMReg dst = out_regs[i + extra_args].first();
1470       if (src->is_stack()) {
1471         if (dst->is_stack()) {
1472           __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1473           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1474         } else {
1475           // C2 Java calling convention does not populate S14 and S15, therefore
1476           // those need to be loaded from stack here
1477           __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src)));
1478           fp_regs_in_arguments++;
1479         }
1480       } else {
1481         assert(src->is_FloatRegister(), "must be");
1482         fp_regs_in_arguments++;
1483       }
1484       break;
1485     }
1486     case T_DOUBLE: {
1487       VMReg src_1 = in_regs[i].first();
1488       VMReg src_2 = in_regs[i].second();
1489       VMReg dst_1 = out_regs[i + extra_args].first();
1490       VMReg dst_2 = out_regs[i + extra_args].second();
1491       if (src_1->is_stack()) {
1492         if (dst_1->is_stack()) {
1493           assert(dst_2->is_stack(), "must be");
1494           __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1495           __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1496           __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1497           __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1498         } else {
1499           // C2 Java calling convention does not populate S14 and S15, therefore
1500           // those need to be loaded from stack here
1501           __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1502           fp_regs_in_arguments += 2;
1503         }
1504       } else {
1505         assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1506         fp_regs_in_arguments += 2;
1507       }
1508       break;
1509     }
1510 #endif // __ABI_HARD__
1511 #endif // AARCH64
1512 
1513     default: {
1514       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1515       VMReg src = in_regs[i].first();
1516       VMReg dst = out_regs[i + extra_args].first();
1517       if (src->is_stack()) {
1518         assert(dst->is_stack(), "must be");
1519         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1520         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1521       } else if (dst->is_stack()) {
1522         __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1523       } else {
1524         assert(src->is_Register() && dst->is_Register(), "must be");
1525         __ mov(dst->as_Register(), src->as_Register());
1526       }
1527     }
1528     }
1529   }
1530 
1531   // Get Klass mirror
1532   int klass_offset = -1;
1533   if (is_static) {
1534     klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1535     __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1536     __ add(c_rarg1, SP, klass_offset);
1537     __ str(Rtemp, Address(SP, klass_offset));
1538     map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1539   }
1540 
1541   // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1542   int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1543   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1544   oop_maps->add_gc_map(pc_offset, map);
1545 
1546 #ifndef AARCH64
1547   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1548   __ membar(MacroAssembler::StoreStore, Rtemp);
1549 #endif // !AARCH64
1550 
1551   // RedefineClasses() tracing support for obsolete method entry
1552   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1553 #ifdef AARCH64
1554     __ NOT_TESTED();
1555 #endif
1556     __ save_caller_save_registers();
1557     __ mov(R0, Rthread);
1558     __ mov_metadata(R1, method());
1559     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1560     __ restore_caller_save_registers();
1561   }
1562 
1563   const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5);
1564   const Register sync_obj    = AARCH64_ONLY(R21) NOT_AARCH64(R6);
1565   const Register disp_hdr    = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11);
1566   const Register tmp         = AARCH64_ONLY(R23) NOT_AARCH64(R8);
1567 
1568   Label slow_lock, slow_lock_biased, lock_done, fast_lock, leave;
1569   if (method->is_synchronized()) {
1570     // The first argument is a handle to sync object (a class or an instance)
1571     __ ldr(sync_obj, Address(R1));
1572     // Remember the handle for the unlocking code
1573     __ mov(sync_handle, R1);
1574 
1575     if(UseBiasedLocking) {
1576       __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1577     }
1578 
1579     const Register mark = tmp;
1580 #ifdef AARCH64
1581     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1582     assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
1583 
1584     __ ldr(mark, sync_obj);
1585 
1586     // Test if object is already locked
1587     assert(markOopDesc::unlocked_value == 1, "adjust this code");
1588     __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock);
1589 
1590     // Check for recursive lock
1591     // See comments in InterpreterMacroAssembler::lock_object for
1592     // explanations on the fast recursive locking check.
1593     __ mov(Rtemp, SP);
1594     __ sub(Rtemp, mark, Rtemp);
1595     intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
1596     Assembler::LogicalImmediate imm(mask, false);
1597     __ ands(Rtemp, Rtemp, imm);
1598     __ b(slow_lock, ne);
1599 
1600     // Recursive locking: store 0 into a lock record
1601     __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1602     __ b(lock_done);
1603 
1604     __ bind(fast_lock);
1605     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1606 
1607     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1608 #else
1609     // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1610     // That would be acceptable as either CAS or slow case path is taken in that case
1611 
1612     __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1613     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1614     __ tst(mark, markOopDesc::unlocked_value);
1615     __ b(fast_lock, ne);
1616 
1617     // Check for recursive lock
1618     // See comments in InterpreterMacroAssembler::lock_object for
1619     // explanations on the fast recursive locking check.
1620     // Check independently the low bits and the distance to SP
1621     // -1- test low 2 bits
1622     __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1623     // -2- test (hdr - SP) if the low two bits are 0
1624     __ sub(Rtemp, mark, SP, eq);
1625     __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1626     // If still 'eq' then recursive locking OK: set displaced header to 0
1627     __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1628     __ b(lock_done, eq);
1629     __ b(slow_lock);
1630 
1631     __ bind(fast_lock);
1632     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1633 
1634     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1635 #endif // AARCH64
1636 
1637     __ bind(lock_done);
1638   }
1639 
1640   // Get JNIEnv*
1641   __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1642 
1643   // Perform thread state transition
1644   __ mov(Rtemp, _thread_in_native);
1645 #ifdef AARCH64
1646   // stlr instruction is used to force all preceding writes to be observed prior to thread state change
1647   __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
1648   __ stlr_w(Rtemp, Rtemp2);
1649 #else
1650   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1651 #endif // AARCH64
1652 
1653   // Finally, call the native method
1654   __ call(method->native_function());
1655 
1656   // Set FPSCR/FPCR to a known state
1657   if (AlwaysRestoreFPU) {
1658     __ restore_default_fp_mode();
1659   }
1660 
1661   // Do a safepoint check while thread is in transition state
1662   InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1663   Label call_safepoint_runtime, return_to_java;
1664   __ mov(Rtemp, _thread_in_native_trans);
1665   __ ldr_literal(R2, safepoint_state);
1666   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1667 
1668   // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1669   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1670 
1671   __ ldr_s32(R2, Address(R2));
1672   __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
1673   __ cmp(R2, SafepointSynchronize::_not_synchronized);
1674   __ cond_cmp(R3, 0, eq);
1675   __ b(call_safepoint_runtime, ne);
1676   __ bind(return_to_java);
1677 
1678   // Perform thread state transition and reguard stack yellow pages if needed
1679   Label reguard, reguard_done;
1680   __ mov(Rtemp, _thread_in_Java);
1681   __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset()));
1682   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1683 
1684   __ cmp(R2, JavaThread::stack_guard_yellow_reserved_disabled);
1685   __ b(reguard, eq);
1686   __ bind(reguard_done);
1687 
1688   Label slow_unlock, unlock_done, retry;
1689   if (method->is_synchronized()) {
1690     __ ldr(sync_obj, Address(sync_handle));
1691 
1692     if(UseBiasedLocking) {
1693       __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1694       // disp_hdr may not have been saved on entry with biased locking
1695       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1696     }
1697 
1698     // See C1_MacroAssembler::unlock_object() for more comments
1699     __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1700     __ cbz(R2, unlock_done);
1701 
1702     __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1703 
1704     __ bind(unlock_done);
1705   }
1706 
1707   // Set last java frame and handle block to zero
1708   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1709   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1710 
1711 #ifdef AARCH64
1712   __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1713   if (CheckJNICalls) {
1714     __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1715   }
1716 
1717 
1718   switch (ret_type) {
1719   case T_BOOLEAN:
1720     __ tst(R0, 0xff);
1721     __ cset(R0, ne);
1722     break;
1723   case T_CHAR   : __ zero_extend(R0, R0, 16);  break;
1724   case T_BYTE   : __ sign_extend(R0, R0,  8);  break;
1725   case T_SHORT  : __ sign_extend(R0, R0, 16);  break;
1726   case T_INT    : // fall through
1727   case T_LONG   : // fall through
1728   case T_VOID   : // fall through
1729   case T_FLOAT  : // fall through
1730   case T_DOUBLE : /* nothing to do */          break;
1731   case T_OBJECT : // fall through
1732   case T_ARRAY  : break; // See JNIHandles::resolve below
1733   default:
1734     ShouldNotReachHere();
1735   }
1736 #else
1737   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1738   if (CheckJNICalls) {
1739     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1740   }
1741 #endif // AARCH64
1742 
1743   // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1744   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1745     __ resolve_jobject(R0,      // value
1746                        Rtemp,   // tmp1
1747                        R1_tmp); // tmp2
1748   }
1749 
1750   // Any exception pending?
1751   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1752   __ mov(SP, FP);
1753 
1754 #ifdef AARCH64
1755   Label except;
1756   __ cbnz(Rtemp, except);
1757   __ raw_pop(FP, LR);
1758   __ ret();
1759 
1760   __ bind(except);
1761   // Pop the frame and forward the exception. Rexception_pc contains return address.
1762   __ raw_pop(FP, Rexception_pc);
1763 #else
1764   __ cmp(Rtemp, 0);
1765   // Pop the frame and return if no exception pending
1766   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1767   // Pop the frame and forward the exception. Rexception_pc contains return address.
1768   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1769   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1770 #endif // AARCH64
1771   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1772 
1773   // Safepoint operation and/or pending suspend request is in progress.
1774   // Save the return values and call the runtime function by hand.
1775   __ bind(call_safepoint_runtime);
1776   push_result_registers(masm, ret_type);
1777   __ mov(R0, Rthread);
1778   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1779   pop_result_registers(masm, ret_type);
1780   __ b(return_to_java);
1781 
1782   __ bind_literal(safepoint_state);
1783 
1784   // Reguard stack pages. Save native results around a call to C runtime.
1785   __ bind(reguard);
1786   push_result_registers(masm, ret_type);
1787   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1788   pop_result_registers(masm, ret_type);
1789   __ b(reguard_done);
1790 
1791   if (method->is_synchronized()) {
1792     // Locking slow case
1793     if(UseBiasedLocking) {
1794       __ bind(slow_lock_biased);
1795       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1796     }
1797 
1798     __ bind(slow_lock);
1799 
1800     push_param_registers(masm, fp_regs_in_arguments);
1801 
1802     // last_Java_frame is already set, so do call_VM manually; no exception can occur
1803     __ mov(R0, sync_obj);
1804     __ mov(R1, disp_hdr);
1805     __ mov(R2, Rthread);
1806     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1807 
1808     pop_param_registers(masm, fp_regs_in_arguments);
1809 
1810     __ b(lock_done);
1811 
1812     // Unlocking slow case
1813     __ bind(slow_unlock);
1814 
1815     push_result_registers(masm, ret_type);
1816 
1817     // Clear pending exception before reentering VM.
1818     // Can store the oop in register since it is a leaf call.
1819     assert_different_registers(Rtmp_save1, sync_obj, disp_hdr);
1820     __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1821     Register zero = __ zero_register(Rtemp);
1822     __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
1823     __ mov(R0, sync_obj);
1824     __ mov(R1, disp_hdr);
1825     __ mov(R2, Rthread);
1826     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1827     __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1828 
1829     pop_result_registers(masm, ret_type);
1830 
1831     __ b(unlock_done);
1832   }
1833 
1834   __ flush();
1835   return nmethod::new_native_nmethod(method,
1836                                      compile_id,
1837                                      masm->code(),
1838                                      vep_offset,
1839                                      frame_complete,
1840                                      stack_slots / VMRegImpl::slots_per_word,
1841                                      in_ByteSize(is_static ? klass_offset : receiver_offset),
1842                                      in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1843                                      oop_maps);
1844 }
1845 
1846 // this function returns the adjust size (in number of words) to a c2i adapter
1847 // activation for use during deoptimization
1848 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1849   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1850 #ifdef AARCH64
1851   extra_locals_size = align_up(extra_locals_size, StackAlignmentInBytes/BytesPerWord);
1852 #endif // AARCH64
1853   return extra_locals_size;
1854 }
1855 
1856 
1857 uint SharedRuntime::out_preserve_stack_slots() {
1858   return 0;
1859 }
1860 
1861 
1862 //------------------------------generate_deopt_blob----------------------------
1863 void SharedRuntime::generate_deopt_blob() {
1864   ResourceMark rm;
1865 #ifdef AARCH64
1866   CodeBuffer buffer("deopt_blob", 1024+256, 1);
1867 #else
1868   CodeBuffer buffer("deopt_blob", 1024, 1024);
1869 #endif
1870   int frame_size_in_words;
1871   OopMapSet* oop_maps;
1872   int reexecute_offset;
1873   int exception_in_tls_offset;
1874   int exception_offset;
1875 
1876   MacroAssembler* masm = new MacroAssembler(&buffer);
1877   Label cont;
1878   const Register Rkind   = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit
1879   const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
1880   const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
1881   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1882 
1883   address start = __ pc();
1884 
1885   oop_maps = new OopMapSet();
1886   // LR saved by caller (can be live in c2 method)
1887 
1888   // A deopt is a case where LR may be live in the c2 nmethod. So it's
1889   // not possible to call the deopt blob from the nmethod and pass the
1890   // address of the deopt handler of the nmethod in LR. What happens
1891   // now is that the caller of the deopt blob pushes the current
1892   // address so the deopt blob doesn't have to do it. This way LR can
1893   // be preserved, contains the live value from the nmethod and is
1894   // saved at R14/R30_offset here.
1895   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1896   __ mov(Rkind, Deoptimization::Unpack_deopt);
1897   __ b(cont);
1898 
1899   exception_offset = __ pc() - start;
1900 
1901   // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the
1902   // exception_in_tls_offset entry point.
1903   __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1904   __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1905   // Force return value to NULL to avoid confusing the escape analysis
1906   // logic. Everything is dead here anyway.
1907   __ mov(R0, 0);
1908 
1909   exception_in_tls_offset = __ pc() - start;
1910 
1911   // Exception data is in JavaThread structure
1912   // Patch the return address of the current frame
1913   __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset()));
1914   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1915   {
1916     const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure
1917     __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset()));
1918   }
1919   __ mov(Rkind, Deoptimization::Unpack_exception);
1920   __ b(cont);
1921 
1922   reexecute_offset = __ pc() - start;
1923 
1924   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1925   __ mov(Rkind, Deoptimization::Unpack_reexecute);
1926 
1927   // Calculate UnrollBlock and save the result in Rublock
1928   __ bind(cont);
1929   __ mov(R0, Rthread);
1930   __ mov(R1, Rkind);
1931 
1932   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1933   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1934   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info));
1935   if (pc_offset == -1) {
1936     pc_offset = __ offset();
1937   }
1938   oop_maps->add_gc_map(pc_offset, map);
1939   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1940 
1941   __ mov(Rublock, R0);
1942 
1943   // Reload Rkind from the UnrollBlock (might have changed)
1944   __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1945   Label noException;
1946   __ cmp_32(Rkind, Deoptimization::Unpack_exception);   // Was exception pending?
1947   __ b(noException, ne);
1948   // handle exception case
1949 #ifdef ASSERT
1950   // assert that exception_pc is zero in tls
1951   { Label L;
1952     __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1953     __ cbz(Rexception_pc, L);
1954     __ stop("exception pc should be null");
1955     __ bind(L);
1956   }
1957 #endif
1958   __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1959   __ verify_oop(Rexception_obj);
1960   {
1961     const Register Rzero = __ zero_register(Rtemp);
1962     __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1963   }
1964 
1965   __ bind(noException);
1966 
1967   // This frame is going away.  Fetch return value, so we can move it to
1968   // a new frame.
1969   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1970 #ifndef AARCH64
1971   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1972 #endif // !AARCH64
1973 #ifndef __SOFTFP__
1974   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1975 #endif
1976   // pop frame
1977   __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1978 
1979   // Set initial stack state before pushing interpreter frames
1980   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1981   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1982   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1983 
1984 #ifdef AARCH64
1985   // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
1986   // They are needed for correct stack walking during stack overflow handling.
1987   // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
1988   __ sub(Rtemp, Rtemp, 2*wordSize);
1989   __ add(SP, SP, Rtemp, ex_uxtx);
1990   __ raw_pop(FP, LR);
1991 
1992 #ifdef ASSERT
1993   { Label L;
1994     __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1995     __ cmp(FP, Rtemp);
1996     __ b(L, eq);
1997     __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
1998     __ bind(L);
1999   }
2000   { Label L;
2001     __ ldr(Rtemp, Address(R2));
2002     __ cmp(LR, Rtemp);
2003     __ b(L, eq);
2004     __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2005     __ bind(L);
2006   }
2007 #endif // ASSERT
2008 
2009 #else
2010   __ add(SP, SP, Rtemp);
2011 #endif // AARCH64
2012 
2013 #ifdef ASSERT
2014   // Compilers generate code that bang the stack by as much as the
2015   // interpreter would need. So this stack banging should never
2016   // trigger a fault. Verify that it does not on non product builds.
2017   // See if it is enough stack to push deoptimized frames
2018   if (UseStackBanging) {
2019 #ifndef AARCH64
2020     // The compiled method that we are deoptimizing was popped from the stack.
2021     // If the stack bang results in a stack overflow, we don't return to the
2022     // method that is being deoptimized. The stack overflow exception is
2023     // propagated to the caller of the deoptimized method. Need to get the pc
2024     // from the caller in LR and restore FP.
2025     __ ldr(LR, Address(R2, 0));
2026     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2027 #endif // !AARCH64
2028     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2029     __ arm_stack_overflow_check(R8, Rtemp);
2030   }
2031 #endif
2032   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2033 
2034 #ifndef AARCH64
2035   // Pick up the initial fp we should save
2036   // XXX Note: was ldr(FP, Address(FP));
2037 
2038   // The compiler no longer uses FP as a frame pointer for the
2039   // compiled code. It can be used by the allocator in C2 or to
2040   // memorize the original SP for JSR292 call sites.
2041 
2042   // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
2043   // Deoptimization::fetch_unroll_info computes the right FP value and
2044   // stores it in Rublock.initial_info. This has been activated for ARM.
2045   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2046 #endif // !AARCH64
2047 
2048   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2049   __ mov(Rsender, SP);
2050 #ifdef AARCH64
2051   __ sub(SP, SP, Rtemp, ex_uxtx);
2052 #else
2053   __ sub(SP, SP, Rtemp);
2054 #endif // AARCH64
2055 
2056   // Push interpreter frames in a loop
2057   Label loop;
2058   __ bind(loop);
2059   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2060   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2061 
2062   __ raw_push(FP, LR);                                     // create new frame
2063   __ mov(FP, SP);
2064   __ sub(Rtemp, Rtemp, 2*wordSize);
2065 
2066 #ifdef AARCH64
2067   __ sub(SP, SP, Rtemp, ex_uxtx);
2068 #else
2069   __ sub(SP, SP, Rtemp);
2070 #endif // AARCH64
2071 
2072   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2073 #ifdef AARCH64
2074   __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2075 #else
2076   __ mov(LR, 0);
2077   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2078 #endif // AARCH64
2079 
2080   __ subs(R8, R8, 1);                               // decrement counter
2081   __ mov(Rsender, SP);
2082   __ b(loop, ne);
2083 
2084   // Re-push self-frame
2085   __ ldr(LR, Address(R2));
2086   __ raw_push(FP, LR);
2087   __ mov(FP, SP);
2088   __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
2089 
2090   // Restore frame locals after moving the frame
2091   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2092 #ifndef AARCH64
2093   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2094 #endif // !AARCH64
2095 
2096 #ifndef __SOFTFP__
2097   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2098 #endif // !__SOFTFP__
2099 
2100 #ifndef AARCH64
2101 #ifdef ASSERT
2102   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
2103   { Label L;
2104     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2105     __ cmp_32(Rkind, Rtemp);
2106     __ b(L, eq);
2107     __ stop("Rkind was overwritten");
2108     __ bind(L);
2109   }
2110 #endif
2111 #endif
2112 
2113   // Call unpack_frames with proper arguments
2114   __ mov(R0, Rthread);
2115   __ mov(R1, Rkind);
2116 
2117   pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2118   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2119   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2120   if (pc_offset == -1) {
2121     pc_offset = __ offset();
2122   }
2123   oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
2124   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2125 
2126   // Collect return values, pop self-frame and jump to interpreter
2127   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2128 #ifndef AARCH64
2129   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2130 #endif // !AARCH64
2131   // Interpreter floats controlled by __SOFTFP__, but compiler
2132   // float return value registers controlled by __ABI_HARD__
2133   // This matters for vfp-sflt builds.
2134 #ifndef __SOFTFP__
2135   // Interpreter hard float
2136 #ifdef __ABI_HARD__
2137   // Compiler float return value in FP registers
2138   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2139 #else
2140   // Compiler float return value in integer registers,
2141   // copy to D0 for interpreter (S0 <-- R0)
2142   __ fmdrr(D0_tos, R0, R1);
2143 #endif
2144 #endif // !__SOFTFP__
2145   __ mov(SP, FP);
2146 
2147 #ifdef AARCH64
2148   __ raw_pop(FP, LR);
2149   __ ret();
2150 #else
2151   __ pop(RegisterSet(FP) | RegisterSet(PC));
2152 #endif // AARCH64
2153 
2154   __ flush();
2155 
2156   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
2157                                            reexecute_offset, frame_size_in_words);
2158   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2159 }
2160 
2161 #ifdef COMPILER2
2162 
2163 //------------------------------generate_uncommon_trap_blob--------------------
2164 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2165 // instead.
2166 void SharedRuntime::generate_uncommon_trap_blob() {
2167   // allocate space for the code
2168   ResourceMark rm;
2169 
2170   // setup code generation tools
2171   int pad = VerifyThread ? 512 : 0;
2172 #ifdef _LP64
2173   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
2174 #else
2175   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
2176   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
2177   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
2178 #endif
2179   // bypassed when code generation useless
2180   MacroAssembler* masm               = new MacroAssembler(&buffer);
2181   const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
2182   const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
2183   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
2184 
2185   //
2186   // This is the entry point for all traps the compiler takes when it thinks
2187   // it cannot handle further execution of compilation code. The frame is
2188   // deoptimized in these cases and converted into interpreter frames for
2189   // execution
2190   // The steps taken by this frame are as follows:
2191   //   - push a fake "unpack_frame"
2192   //   - call the C routine Deoptimization::uncommon_trap (this function
2193   //     packs the current compiled frame into vframe arrays and returns
2194   //     information about the number and size of interpreter frames which
2195   //     are equivalent to the frame which is being deoptimized)
2196   //   - deallocate the "unpack_frame"
2197   //   - deallocate the deoptimization frame
2198   //   - in a loop using the information returned in the previous step
2199   //     push interpreter frames;
2200   //   - create a dummy "unpack_frame"
2201   //   - call the C routine: Deoptimization::unpack_frames (this function
2202   //     lays out values on the interpreter frame which was just created)
2203   //   - deallocate the dummy unpack_frame
2204   //   - return to the interpreter entry point
2205   //
2206   //  Refer to the following methods for more information:
2207   //   - Deoptimization::uncommon_trap
2208   //   - Deoptimization::unpack_frame
2209 
2210   // the unloaded class index is in R0 (first parameter to this blob)
2211 
2212   __ raw_push(FP, LR);
2213   __ set_last_Java_frame(SP, FP, false, Rtemp);
2214   __ mov(R2, Deoptimization::Unpack_uncommon_trap);
2215   __ mov(R1, R0);
2216   __ mov(R0, Rthread);
2217   __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
2218   __ mov(Rublock, R0);
2219   __ reset_last_Java_frame(Rtemp);
2220   __ raw_pop(FP, LR);
2221 
2222 #ifdef ASSERT
2223   { Label L;
2224     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2225     __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
2226     __ b(L, eq);
2227     __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
2228     __ bind(L);
2229   }
2230 #endif
2231 
2232 
2233   // Set initial stack state before pushing interpreter frames
2234   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2235   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2236   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2237 
2238 #ifdef AARCH64
2239   // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
2240   // They are needed for correct stack walking during stack overflow handling.
2241   // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
2242   __ sub(Rtemp, Rtemp, 2*wordSize);
2243   __ add(SP, SP, Rtemp, ex_uxtx);
2244   __ raw_pop(FP, LR);
2245 
2246 #ifdef ASSERT
2247   { Label L;
2248     __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2249     __ cmp(FP, Rtemp);
2250     __ b(L, eq);
2251     __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2252     __ bind(L);
2253   }
2254   { Label L;
2255     __ ldr(Rtemp, Address(R2));
2256     __ cmp(LR, Rtemp);
2257     __ b(L, eq);
2258     __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2259     __ bind(L);
2260   }
2261 #endif // ASSERT
2262 
2263 #else
2264   __ add(SP, SP, Rtemp);
2265 #endif //AARCH64
2266 
2267   // See if it is enough stack to push deoptimized frames
2268 #ifdef ASSERT
2269   // Compilers generate code that bang the stack by as much as the
2270   // interpreter would need. So this stack banging should never
2271   // trigger a fault. Verify that it does not on non product builds.
2272   if (UseStackBanging) {
2273 #ifndef AARCH64
2274     // The compiled method that we are deoptimizing was popped from the stack.
2275     // If the stack bang results in a stack overflow, we don't return to the
2276     // method that is being deoptimized. The stack overflow exception is
2277     // propagated to the caller of the deoptimized method. Need to get the pc
2278     // from the caller in LR and restore FP.
2279     __ ldr(LR, Address(R2, 0));
2280     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2281 #endif // !AARCH64
2282     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2283     __ arm_stack_overflow_check(R8, Rtemp);
2284   }
2285 #endif
2286   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2287   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2288   __ mov(Rsender, SP);
2289 #ifdef AARCH64
2290   __ sub(SP, SP, Rtemp, ex_uxtx);
2291 #else
2292   __ sub(SP, SP, Rtemp);
2293 #endif
2294 #ifndef AARCH64
2295   //  __ ldr(FP, Address(FP));
2296   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2297 #endif // AARCH64
2298 
2299   // Push interpreter frames in a loop
2300   Label loop;
2301   __ bind(loop);
2302   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2303   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2304 
2305   __ raw_push(FP, LR);                                     // create new frame
2306   __ mov(FP, SP);
2307   __ sub(Rtemp, Rtemp, 2*wordSize);
2308 
2309 #ifdef AARCH64
2310   __ sub(SP, SP, Rtemp, ex_uxtx);
2311 #else
2312   __ sub(SP, SP, Rtemp);
2313 #endif // AARCH64
2314 
2315   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2316 #ifdef AARCH64
2317   __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2318 #else
2319   __ mov(LR, 0);
2320   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2321 #endif // AARCH64
2322   __ subs(R8, R8, 1);                               // decrement counter
2323   __ mov(Rsender, SP);
2324   __ b(loop, ne);
2325 
2326   // Re-push self-frame
2327   __ ldr(LR, Address(R2));
2328   __ raw_push(FP, LR);
2329   __ mov(FP, SP);
2330 
2331   // Call unpack_frames with proper arguments
2332   __ mov(R0, Rthread);
2333   __ mov(R1, Deoptimization::Unpack_uncommon_trap);
2334   __ set_last_Java_frame(SP, FP, false, Rtemp);
2335   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2336   //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
2337   __ reset_last_Java_frame(Rtemp);
2338 
2339   __ mov(SP, FP);
2340 #ifdef AARCH64
2341   __ raw_pop(FP, LR);
2342   __ ret();
2343 #else
2344   __ pop(RegisterSet(FP) | RegisterSet(PC));
2345 #endif
2346 
2347   masm->flush();
2348   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
2349 }
2350 
2351 #endif // COMPILER2
2352 
2353 //------------------------------generate_handler_blob------
2354 //
2355 // Generate a special Compile2Runtime blob that saves all registers,
2356 // setup oopmap, and calls safepoint code to stop the compiled code for
2357 // a safepoint.
2358 //
2359 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2360   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2361 
2362   ResourceMark rm;
2363   CodeBuffer buffer("handler_blob", 256, 256);
2364   int frame_size_words;
2365   OopMapSet* oop_maps;
2366 
2367   bool cause_return = (poll_type == POLL_AT_RETURN);
2368 
2369   MacroAssembler* masm = new MacroAssembler(&buffer);
2370   address start = __ pc();
2371   oop_maps = new OopMapSet();
2372 
2373   if (!cause_return) {
2374 #ifdef AARCH64
2375     __ raw_push(LR, LR);
2376 #else
2377     __ sub(SP, SP, 4); // make room for LR which may still be live
2378                        // here if we are coming from a c2 method
2379 #endif // AARCH64
2380   }
2381 
2382   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
2383   if (!cause_return) {
2384     // update saved PC with correct value
2385     // need 2 steps because LR can be live in c2 method
2386     __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
2387     __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
2388   }
2389 
2390   __ mov(R0, Rthread);
2391   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
2392   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2393   __ call(call_ptr);
2394   if (pc_offset == -1) {
2395     pc_offset = __ offset();
2396   }
2397   oop_maps->add_gc_map(pc_offset, map);
2398   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2399 
2400   // Check for pending exception
2401   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
2402   __ cmp(Rtemp, 0);
2403 
2404 #ifdef AARCH64
2405   RegisterSaver::restore_live_registers(masm, cause_return);
2406   Register ret_addr = cause_return ? LR : Rtemp;
2407   if (!cause_return) {
2408     __ raw_pop(FP, ret_addr);
2409   }
2410 
2411   Label throw_exception;
2412   __ b(throw_exception, ne);
2413   __ br(ret_addr);
2414 
2415   __ bind(throw_exception);
2416   __ mov(Rexception_pc, ret_addr);
2417 #else // AARCH64
2418   if (!cause_return) {
2419     RegisterSaver::restore_live_registers(masm, false);
2420     __ pop(PC, eq);
2421     __ pop(Rexception_pc);
2422   } else {
2423     RegisterSaver::restore_live_registers(masm);
2424     __ bx(LR, eq);
2425     __ mov(Rexception_pc, LR);
2426   }
2427 #endif // AARCH64
2428 
2429   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2430 
2431   __ flush();
2432 
2433   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
2434 }
2435 
2436 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2437   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2438 
2439   ResourceMark rm;
2440   CodeBuffer buffer(name, 1000, 512);
2441   int frame_size_words;
2442   OopMapSet *oop_maps;
2443   int frame_complete;
2444 
2445   MacroAssembler* masm = new MacroAssembler(&buffer);
2446   Label pending_exception;
2447 
2448   int start = __ offset();
2449 
2450   oop_maps = new OopMapSet();
2451   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words);
2452 
2453   frame_complete = __ offset();
2454 
2455   __ mov(R0, Rthread);
2456 
2457   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2458   assert(start == 0, "warning: start differs from code_begin");
2459   __ call(destination);
2460   if (pc_offset == -1) {
2461     pc_offset = __ offset();
2462   }
2463   oop_maps->add_gc_map(pc_offset, map);
2464   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2465 
2466   __ ldr(R1, Address(Rthread, Thread::pending_exception_offset()));
2467   __ cbnz(R1, pending_exception);
2468 
2469   // Overwrite saved register values
2470 
2471   // Place metadata result of VM call into Rmethod
2472   __ get_vm_result_2(R1, Rtemp);
2473   __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize));
2474 
2475   // Place target address (VM call result) into Rtemp
2476   __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize));
2477 
2478   RegisterSaver::restore_live_registers(masm);
2479   __ jump(Rtemp);
2480 
2481   __ bind(pending_exception);
2482 
2483   RegisterSaver::restore_live_registers(masm);
2484   const Register Rzero = __ zero_register(Rtemp);
2485   __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset()));
2486   __ mov(Rexception_pc, LR);
2487   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2488 
2489   __ flush();
2490 
2491   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
2492 }