1 /*
   2  * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "assembler_arm.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "logging/log.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "runtime/sharedRuntime.hpp"
  36 #include "runtime/vframeArray.hpp"
  37 #include "vmreg_arm.inline.hpp"
  38 #ifdef COMPILER1
  39 #include "c1/c1_Runtime1.hpp"
  40 #endif
  41 #ifdef COMPILER2
  42 #include "opto/runtime.hpp"
  43 #endif
  44 #ifdef SHARK
  45 #include "compiler/compileBroker.hpp"
  46 #include "shark/sharkCompiler.hpp"
  47 #endif
  48 
  49 #define __ masm->
  50 
  51 class RegisterSaver {
  52 public:
  53 
  54   // Special registers:
  55   //              32-bit ARM     64-bit ARM
  56   //  Rthread:       R10            R28
  57   //  LR:            R14            R30
  58 
  59   // Rthread is callee saved in the C ABI and never changed by compiled code:
  60   // no need to save it.
  61 
  62   // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
  63   // The one at LR_offset is a return address that is needed by stack walking.
  64   // A c2 method uses LR as a standard register so it may be live when we
  65   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
  66   // in case it's live in the method we are coming from.
  67 
  68 #ifdef AARCH64
  69 
  70   //
  71   // On AArch64 registers save area has the following layout:
  72   //
  73   // |---------------------|
  74   // | return address (LR) |
  75   // | FP                  |
  76   // |---------------------|
  77   // | V31                 |
  78   // | ...                 |
  79   // | V0                  |
  80   // |---------------------|
  81   // | padding             |
  82   // | R30 (LR live value) |
  83   // |---------------------|
  84   // | R27                 |
  85   // | ...                 |
  86   // | R0                  |
  87   // |---------------------| <-- SP
  88   //
  89 
  90   enum RegisterLayout {
  91     number_of_saved_gprs = 28,
  92     number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
  93     words_per_fpr = ConcreteRegisterImpl::words_per_fpr,
  94 
  95     R0_offset  = 0,
  96     R30_offset = R0_offset + number_of_saved_gprs,
  97     D0_offset  = R30_offset + 2,
  98     FP_offset  = D0_offset + number_of_saved_fprs * words_per_fpr,
  99     LR_offset  = FP_offset + 1,
 100 
 101     reg_save_size = LR_offset + 1,
 102   };
 103 
 104   static const int Rmethod_offset;
 105   static const int Rtemp_offset;
 106 
 107 #else
 108 
 109   enum RegisterLayout {
 110     fpu_save_size = FloatRegisterImpl::number_of_registers,
 111 #ifndef __SOFTFP__
 112     D0_offset = 0,
 113 #endif
 114     R0_offset = fpu_save_size,
 115     R1_offset,
 116     R2_offset,
 117     R3_offset,
 118     R4_offset,
 119     R5_offset,
 120     R6_offset,
 121 #if (FP_REG_NUM != 7)
 122     // if not saved as FP
 123     R7_offset,
 124 #endif
 125     R8_offset,
 126     R9_offset,
 127 #if (FP_REG_NUM != 11)
 128     // if not saved as FP
 129     R11_offset,
 130 #endif
 131     R12_offset,
 132     R14_offset,
 133     FP_offset,
 134     LR_offset,
 135     reg_save_size,
 136 
 137     Rmethod_offset = R9_offset,
 138     Rtemp_offset = R12_offset,
 139   };
 140 
 141   // all regs but Rthread (R10), FP (R7 or R11), SP and PC
 142   // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
 143 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 144 
 145 #endif // AARCH64
 146 
 147   //  When LR may be live in the nmethod from which we are comming
 148   //  then lr_saved is true, the return address is saved before the
 149   //  call to save_live_register by the caller and LR contains the
 150   //  live value.
 151 
 152   static OopMap* save_live_registers(MacroAssembler* masm,
 153                                      int* total_frame_words,
 154                                      bool lr_saved = false);
 155   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
 156 
 157 };
 158 
 159 
 160 #ifdef AARCH64
 161 const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding();
 162 const int RegisterSaver::Rtemp_offset   = RegisterSaver::R0_offset + Rtemp->encoding();
 163 #endif // AARCH64
 164 
 165 
 166 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
 167                                            int* total_frame_words,
 168                                            bool lr_saved) {
 169   *total_frame_words = reg_save_size;
 170 
 171   OopMapSet *oop_maps = new OopMapSet();
 172   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 173 
 174 #ifdef AARCH64
 175   assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
 176 
 177   if (lr_saved) {
 178     // LR was stashed here, so that jump could use it as a scratch reg
 179     __ ldr(LR, Address(SP, 0));
 180     // There are two words on the stack top:
 181     //  [SP + 0]: placeholder for FP
 182     //  [SP + wordSize]: saved return address
 183     __ str(FP, Address(SP, 0));
 184   } else {
 185     __ raw_push(FP, LR);
 186   }
 187 
 188   __ sub(SP, SP, (reg_save_size - 2) * wordSize);
 189 
 190   for (int i = 0; i < number_of_saved_gprs; i += 2) {
 191     int offset = R0_offset + i;
 192     __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize));
 193     map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
 194     map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg());
 195   }
 196 
 197   __ str(R30, Address(SP, R30_offset * wordSize));
 198   map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg());
 199 
 200   for (int i = 0; i < number_of_saved_fprs; i += 2) {
 201     int offset1 = D0_offset + i * words_per_fpr;
 202     int offset2 = offset1 + words_per_fpr;
 203     Address base(SP, offset1 * wordSize);
 204     if (words_per_fpr == 2) {
 205       // pair of "wide" quad vector registers
 206       __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
 207     } else {
 208       // pair of double vector registers
 209       __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
 210     }
 211     map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
 212     map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg());
 213   }
 214 #else
 215   if (lr_saved) {
 216     __ push(RegisterSet(FP));
 217   } else {
 218     __ push(RegisterSet(FP) | RegisterSet(LR));
 219   }
 220   __ push(SAVED_BASE_REGS);
 221   if (HaveVFP) {
 222     if (VM_Version::has_vfp3_32()) {
 223       __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
 224     } else {
 225       if (FloatRegisterImpl::number_of_registers > 32) {
 226         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 227         __ sub(SP, SP, 32 * wordSize);
 228       }
 229     }
 230     __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
 231   } else {
 232     __ sub(SP, SP, fpu_save_size * wordSize);
 233   }
 234 
 235   int i;
 236   int j=0;
 237   for (i = R0_offset; i <= R9_offset; i++) {
 238     if (j == FP_REG_NUM) {
 239       // skip the FP register, managed below.
 240       j++;
 241     }
 242     map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
 243     j++;
 244   }
 245   assert(j == R10->encoding(), "must be");
 246 #if (FP_REG_NUM != 11)
 247   // add R11, if not managed as FP
 248   map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
 249 #endif
 250   map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
 251   map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
 252   if (HaveVFP) {
 253     for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
 254       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
 255       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
 256     }
 257   }
 258 #endif // AARCH64
 259 
 260   return map;
 261 }
 262 
 263 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
 264 #ifdef AARCH64
 265   for (int i = 0; i < number_of_saved_gprs; i += 2) {
 266     __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
 267   }
 268 
 269   __ ldr(R30, Address(SP, R30_offset * wordSize));
 270 
 271   for (int i = 0; i < number_of_saved_fprs; i += 2) {
 272     Address base(SP, (D0_offset + i * words_per_fpr) * wordSize);
 273     if (words_per_fpr == 2) {
 274       // pair of "wide" quad vector registers
 275       __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
 276     } else {
 277       // pair of double vector registers
 278       __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
 279     }
 280   }
 281 
 282   __ add(SP, SP, (reg_save_size - 2) * wordSize);
 283 
 284   if (restore_lr) {
 285     __ raw_pop(FP, LR);
 286   } else {
 287     __ ldr(FP, Address(SP, 0));
 288   }
 289 #else
 290   if (HaveVFP) {
 291     __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
 292     if (VM_Version::has_vfp3_32()) {
 293       __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
 294     } else {
 295       if (FloatRegisterImpl::number_of_registers > 32) {
 296         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 297         __ add(SP, SP, 32 * wordSize);
 298       }
 299     }
 300   } else {
 301     __ add(SP, SP, fpu_save_size * wordSize);
 302   }
 303   __ pop(SAVED_BASE_REGS);
 304   if (restore_lr) {
 305     __ pop(RegisterSet(FP) | RegisterSet(LR));
 306   } else {
 307     __ pop(RegisterSet(FP));
 308   }
 309 #endif // AARCH64
 310 }
 311 
 312 #ifdef AARCH64
 313 
 314 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 315   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 316     __ str_d(D0, Address(SP, -2*wordSize, pre_indexed));
 317   } else {
 318     __ raw_push(R0, ZR);
 319   }
 320 }
 321 
 322 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 323   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 324     __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed));
 325   } else {
 326     __ raw_pop(R0, ZR);
 327   }
 328 }
 329 
 330 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 331   __ raw_push(R0, R1);
 332   __ raw_push(R2, R3);
 333   __ raw_push(R4, R5);
 334   __ raw_push(R6, R7);
 335 
 336   assert(FPR_PARAMS == 8, "adjust this code");
 337   assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
 338 
 339   if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed));
 340   if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed));
 341   if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed));
 342   if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed));
 343 }
 344 
 345 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 346   assert(FPR_PARAMS == 8, "adjust this code");
 347   assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
 348 
 349   if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed));
 350   if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed));
 351   if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed));
 352   if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed));
 353 
 354   __ raw_pop(R6, R7);
 355   __ raw_pop(R4, R5);
 356   __ raw_pop(R2, R3);
 357   __ raw_pop(R0, R1);
 358 }
 359 
 360 #else // AARCH64
 361 
 362 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 363 #ifdef __ABI_HARD__
 364   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 365     __ sub(SP, SP, 8);
 366     __ fstd(D0, Address(SP));
 367     return;
 368   }
 369 #endif // __ABI_HARD__
 370   __ raw_push(R0, R1);
 371 }
 372 
 373 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 374 #ifdef __ABI_HARD__
 375   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 376     __ fldd(D0, Address(SP));
 377     __ add(SP, SP, 8);
 378     return;
 379   }
 380 #endif // __ABI_HARD__
 381   __ raw_pop(R0, R1);
 382 }
 383 
 384 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 385   // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment
 386   __ push(RegisterSet(R0, R3));
 387 
 388 #ifdef __ABI_HARD__
 389   // preserve arguments
 390   // Likely not needed as the locking code won't probably modify volatile FP registers,
 391   // but there is no way to guarantee that
 392   if (fp_regs_in_arguments) {
 393     // convert fp_regs_in_arguments to a number of double registers
 394     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 395     __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 396   }
 397 #endif // __ ABI_HARD__
 398 }
 399 
 400 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 401 #ifdef __ABI_HARD__
 402   if (fp_regs_in_arguments) {
 403     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 404     __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 405   }
 406 #endif // __ABI_HARD__
 407 
 408   __ pop(RegisterSet(R0, R3));
 409 }
 410 
 411 #endif // AARCH64
 412 
 413 
 414 // Is vector's size (in bytes) bigger than a size saved by default?
 415 // All vector registers are saved by default on ARM.
 416 bool SharedRuntime::is_wide_vector(int size) {
 417   return false;
 418 }
 419 
 420 size_t SharedRuntime::trampoline_size() {
 421   return 16;
 422 }
 423 
 424 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 425   InlinedAddress dest(destination);
 426   __ indirect_jump(dest, Rtemp);
 427   __ bind_literal(dest);
 428 }
 429 
 430 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 431                                         VMRegPair *regs,
 432                                         VMRegPair *regs2,
 433                                         int total_args_passed) {
 434   assert(regs2 == NULL, "not needed on arm");
 435 #ifdef AARCH64
 436   int slot = 0; // counted in 32-bit VMReg slots
 437   int reg = 0;
 438   int fp_reg = 0;
 439   for (int i = 0; i < total_args_passed; i++) {
 440     switch (sig_bt[i]) {
 441     case T_SHORT:
 442     case T_CHAR:
 443     case T_BYTE:
 444     case T_BOOLEAN:
 445     case T_INT:
 446       if (reg < GPR_PARAMS) {
 447         Register r = as_Register(reg);
 448         regs[i].set1(r->as_VMReg());
 449         reg++;
 450       } else {
 451         regs[i].set1(VMRegImpl::stack2reg(slot));
 452         slot+=2;
 453       }
 454       break;
 455     case T_LONG:
 456       assert(sig_bt[i+1] == T_VOID, "missing Half" );
 457       // fall through
 458     case T_ARRAY:
 459     case T_OBJECT:
 460     case T_ADDRESS:
 461       if (reg < GPR_PARAMS) {
 462         Register r = as_Register(reg);
 463         regs[i].set2(r->as_VMReg());
 464         reg++;
 465       } else {
 466         regs[i].set2(VMRegImpl::stack2reg(slot));
 467         slot+=2;
 468       }
 469       break;
 470     case T_FLOAT:
 471       if (fp_reg < FPR_PARAMS) {
 472         FloatRegister r = as_FloatRegister(fp_reg);
 473         regs[i].set1(r->as_VMReg());
 474         fp_reg++;
 475       } else {
 476         regs[i].set1(VMRegImpl::stack2reg(slot));
 477         slot+=2;
 478       }
 479       break;
 480     case T_DOUBLE:
 481       assert(sig_bt[i+1] == T_VOID, "missing Half" );
 482       if (fp_reg < FPR_PARAMS) {
 483         FloatRegister r = as_FloatRegister(fp_reg);
 484         regs[i].set2(r->as_VMReg());
 485         fp_reg++;
 486       } else {
 487         regs[i].set2(VMRegImpl::stack2reg(slot));
 488         slot+=2;
 489       }
 490       break;
 491     case T_VOID:
 492       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 493       regs[i].set_bad();
 494       break;
 495     default:
 496       ShouldNotReachHere();
 497     }
 498   }
 499   return slot;
 500 
 501 #else // AARCH64
 502 
 503   int slot = 0;
 504   int ireg = 0;
 505 #ifdef __ABI_HARD__
 506   int fp_slot = 0;
 507   int single_fpr_slot = 0;
 508 #endif // __ABI_HARD__
 509   for (int i = 0; i < total_args_passed; i++) {
 510     switch (sig_bt[i]) {
 511     case T_SHORT:
 512     case T_CHAR:
 513     case T_BYTE:
 514     case T_BOOLEAN:
 515     case T_INT:
 516     case T_ARRAY:
 517     case T_OBJECT:
 518     case T_ADDRESS:
 519 #ifndef __ABI_HARD__
 520     case T_FLOAT:
 521 #endif // !__ABI_HARD__
 522       if (ireg < 4) {
 523         Register r = as_Register(ireg);
 524         regs[i].set1(r->as_VMReg());
 525         ireg++;
 526       } else {
 527         regs[i].set1(VMRegImpl::stack2reg(slot));
 528         slot++;
 529       }
 530       break;
 531     case T_LONG:
 532 #ifndef __ABI_HARD__
 533     case T_DOUBLE:
 534 #endif // !__ABI_HARD__
 535       assert(sig_bt[i+1] == T_VOID, "missing Half" );
 536       if (ireg <= 2) {
 537 #if (ALIGN_WIDE_ARGUMENTS == 1)
 538         if(ireg & 1) ireg++;  // Aligned location required
 539 #endif
 540         Register r1 = as_Register(ireg);
 541         Register r2 = as_Register(ireg + 1);
 542         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 543         ireg += 2;
 544 #if (ALIGN_WIDE_ARGUMENTS == 0)
 545       } else if (ireg == 3) {
 546         // uses R3 + one stack slot
 547         Register r = as_Register(ireg);
 548         regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg());
 549         ireg += 1;
 550         slot += 1;
 551 #endif
 552       } else {
 553         if (slot & 1) slot++; // Aligned location required
 554         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 555         slot += 2;
 556         ireg = 4;
 557       }
 558       break;
 559     case T_VOID:
 560       regs[i].set_bad();
 561       break;
 562 #ifdef __ABI_HARD__
 563     case T_FLOAT:
 564       if ((fp_slot < 16)||(single_fpr_slot & 1)) {
 565         if ((single_fpr_slot & 1) == 0) {
 566           single_fpr_slot = fp_slot;
 567           fp_slot += 2;
 568         }
 569         FloatRegister r = as_FloatRegister(single_fpr_slot);
 570         single_fpr_slot++;
 571         regs[i].set1(r->as_VMReg());
 572       } else {
 573         regs[i].set1(VMRegImpl::stack2reg(slot));
 574         slot++;
 575       }
 576       break;
 577     case T_DOUBLE:
 578       assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
 579       if (fp_slot <= 14) {
 580         FloatRegister r1 = as_FloatRegister(fp_slot);
 581         FloatRegister r2 = as_FloatRegister(fp_slot+1);
 582         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 583         fp_slot += 2;
 584       } else {
 585         if(slot & 1) slot++;
 586         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 587         slot += 2;
 588         single_fpr_slot = 16;
 589       }
 590       break;
 591 #endif // __ABI_HARD__
 592     default:
 593       ShouldNotReachHere();
 594     }
 595   }
 596   return slot;
 597 #endif // AARCH64
 598 }
 599 
 600 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 601                                            VMRegPair *regs,
 602                                            int total_args_passed,
 603                                            int is_outgoing) {
 604 #ifdef AARCH64
 605   // C calling convention on AArch64 is good enough.
 606   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 607 #else
 608 #ifdef __SOFTFP__
 609   // soft float is the same as the C calling convention.
 610   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 611 #endif // __SOFTFP__
 612   (void) is_outgoing;
 613   int slot = 0;
 614   int ireg = 0;
 615   int freg = 0;
 616   int single_fpr = 0;
 617 
 618   for (int i = 0; i < total_args_passed; i++) {
 619     switch (sig_bt[i]) {
 620     case T_SHORT:
 621     case T_CHAR:
 622     case T_BYTE:
 623     case T_BOOLEAN:
 624     case T_INT:
 625     case T_ARRAY:
 626     case T_OBJECT:
 627     case T_ADDRESS:
 628       if (ireg < 4) {
 629         Register r = as_Register(ireg++);
 630         regs[i].set1(r->as_VMReg());
 631       } else {
 632         regs[i].set1(VMRegImpl::stack2reg(slot++));
 633       }
 634       break;
 635     case T_FLOAT:
 636       // C2 utilizes S14/S15 for mem-mem moves
 637       if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) {
 638         if ((single_fpr & 1) == 0) {
 639           single_fpr = freg;
 640           freg += 2;
 641         }
 642         FloatRegister r = as_FloatRegister(single_fpr++);
 643         regs[i].set1(r->as_VMReg());
 644       } else {
 645         regs[i].set1(VMRegImpl::stack2reg(slot++));
 646       }
 647       break;
 648     case T_DOUBLE:
 649       // C2 utilizes S14/S15 for mem-mem moves
 650       if (freg <= 14 COMPILER2_PRESENT(-2)) {
 651         FloatRegister r1 = as_FloatRegister(freg);
 652         FloatRegister r2 = as_FloatRegister(freg + 1);
 653         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 654         freg += 2;
 655       } else {
 656         // Keep internally the aligned calling convention,
 657         // ignoring ALIGN_WIDE_ARGUMENTS
 658         if (slot & 1) slot++;
 659         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 660         slot += 2;
 661         single_fpr = 16;
 662       }
 663       break;
 664     case T_LONG:
 665       // Keep internally the aligned calling convention,
 666       // ignoring ALIGN_WIDE_ARGUMENTS
 667       if (ireg <= 2) {
 668         if (ireg & 1) ireg++;
 669         Register r1 = as_Register(ireg);
 670         Register r2 = as_Register(ireg + 1);
 671         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 672         ireg += 2;
 673       } else {
 674         if (slot & 1) slot++;
 675         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 676         slot += 2;
 677         ireg = 4;
 678       }
 679       break;
 680     case T_VOID:
 681       regs[i].set_bad();
 682       break;
 683     default:
 684       ShouldNotReachHere();
 685     }
 686   }
 687 
 688   if (slot & 1) slot++;
 689   return slot;
 690 #endif // AARCH64
 691 }
 692 
 693 static void patch_callers_callsite(MacroAssembler *masm) {
 694   Label skip;
 695 
 696   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 697   __ cbz(Rtemp, skip);
 698 
 699 #ifdef AARCH64
 700   push_param_registers(masm, FPR_PARAMS);
 701   __ raw_push(LR, ZR);
 702 #else
 703   // Pushing an even number of registers for stack alignment.
 704   // Selecting R9, which had to be saved anyway for some platforms.
 705   __ push(RegisterSet(R0, R3) | R9 | LR);
 706 #endif // AARCH64
 707 
 708   __ mov(R0, Rmethod);
 709   __ mov(R1, LR);
 710   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 711 
 712 #ifdef AARCH64
 713   __ raw_pop(LR, ZR);
 714   pop_param_registers(masm, FPR_PARAMS);
 715 #else
 716   __ pop(RegisterSet(R0, R3) | R9 | LR);
 717 #endif // AARCH64
 718 
 719   __ bind(skip);
 720 }
 721 
 722 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 723                                     int total_args_passed, int comp_args_on_stack,
 724                                     const BasicType *sig_bt, const VMRegPair *regs) {
 725   // TODO: ARM - May be can use ldm to load arguments
 726   const Register tmp = Rtemp; // avoid erasing R5_mh
 727 
 728   // Next assert may not be needed but safer. Extra analysis required
 729   // if this there is not enough free registers and we need to use R5 here.
 730   assert_different_registers(tmp, R5_mh);
 731 
 732   // 6243940 We might end up in handle_wrong_method if
 733   // the callee is deoptimized as we race thru here. If that
 734   // happens we don't want to take a safepoint because the
 735   // caller frame will look interpreted and arguments are now
 736   // "compiled" so it is much better to make this transition
 737   // invisible to the stack walking code. Unfortunately if
 738   // we try and find the callee by normal means a safepoint
 739   // is possible. So we stash the desired callee in the thread
 740   // and the vm will find there should this case occur.
 741   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
 742   __ str(Rmethod, callee_target_addr);
 743 
 744 #ifdef AARCH64
 745 
 746   assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod);
 747   assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams);
 748 
 749   if (comp_args_on_stack) {
 750     __ sub_slow(SP, SP, round_to(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes));
 751   }
 752 
 753   for (int i = 0; i < total_args_passed; i++) {
 754     if (sig_bt[i] == T_VOID) {
 755       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 756       continue;
 757     }
 758     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 759 
 760     int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
 761     Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
 762 
 763     VMReg r = regs[i].first();
 764     bool full_word = regs[i].second()->is_valid();
 765 
 766     if (r->is_stack()) {
 767       if (full_word) {
 768         __ ldr(tmp, source_addr);
 769         __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 770       } else {
 771         __ ldr_w(tmp, source_addr);
 772         __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 773       }
 774     } else if (r->is_Register()) {
 775       if (full_word) {
 776         __ ldr(r->as_Register(), source_addr);
 777       } else {
 778         __ ldr_w(r->as_Register(), source_addr);
 779       }
 780     } else if (r->is_FloatRegister()) {
 781       if (sig_bt[i] == T_DOUBLE) {
 782         __ ldr_d(r->as_FloatRegister(), source_addr);
 783       } else {
 784         __ ldr_s(r->as_FloatRegister(), source_addr);
 785       }
 786     } else {
 787       assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
 788     }
 789   }
 790 
 791   __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
 792   __ br(tmp);
 793 
 794 #else
 795 
 796   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 797 
 798   const Register initial_sp = Rmethod; // temporarily scratched
 799 
 800   // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
 801   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
 802 
 803   __ mov(initial_sp, SP);
 804 
 805   if (comp_args_on_stack) {
 806     __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
 807   }
 808   __ bic(SP, SP, StackAlignmentInBytes - 1);
 809 
 810   for (int i = 0; i < total_args_passed; i++) {
 811     if (sig_bt[i] == T_VOID) {
 812       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 813       continue;
 814     }
 815     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 816     int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i);
 817 
 818     VMReg r_1 = regs[i].first();
 819     VMReg r_2 = regs[i].second();
 820     if (r_1->is_stack()) {
 821       int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size;
 822       if (!r_2->is_valid()) {
 823         __ ldr(tmp, Address(initial_sp, arg_offset));
 824         __ str(tmp, Address(SP, stack_offset));
 825       } else {
 826         __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 827         __ str(tmp, Address(SP, stack_offset));
 828         __ ldr(tmp, Address(initial_sp, arg_offset));
 829         __ str(tmp, Address(SP, stack_offset + wordSize));
 830       }
 831     } else if (r_1->is_Register()) {
 832       if (!r_2->is_valid()) {
 833         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset));
 834       } else {
 835         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 836         __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
 837       }
 838     } else if (r_1->is_FloatRegister()) {
 839 #ifdef __SOFTFP__
 840       ShouldNotReachHere();
 841 #endif // __SOFTFP__
 842       if (!r_2->is_valid()) {
 843         __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
 844       } else {
 845         __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 846       }
 847     } else {
 848       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 849     }
 850   }
 851 
 852   // restore Rmethod (scratched for initial_sp)
 853   __ ldr(Rmethod, callee_target_addr);
 854   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 855 
 856 #endif // AARCH64
 857 }
 858 
 859 static void gen_c2i_adapter(MacroAssembler *masm,
 860                             int total_args_passed,  int comp_args_on_stack,
 861                             const BasicType *sig_bt, const VMRegPair *regs,
 862                             Label& skip_fixup) {
 863   // TODO: ARM - May be can use stm to deoptimize arguments
 864   const Register tmp = Rtemp;
 865 
 866   patch_callers_callsite(masm);
 867   __ bind(skip_fixup);
 868 
 869   __ mov(Rsender_sp, SP); // not yet saved
 870 
 871 #ifdef AARCH64
 872 
 873   int extraspace = round_to(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes);
 874   if (extraspace) {
 875     __ sub(SP, SP, extraspace);
 876   }
 877 
 878   for (int i = 0; i < total_args_passed; i++) {
 879     if (sig_bt[i] == T_VOID) {
 880       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 881       continue;
 882     }
 883 
 884     int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
 885     Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
 886 
 887     VMReg r = regs[i].first();
 888     bool full_word = regs[i].second()->is_valid();
 889 
 890     if (r->is_stack()) {
 891       if (full_word) {
 892         __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
 893         __ str(tmp, dest_addr);
 894       } else {
 895         __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
 896         __ str_w(tmp, dest_addr);
 897       }
 898     } else if (r->is_Register()) {
 899       if (full_word) {
 900         __ str(r->as_Register(), dest_addr);
 901       } else {
 902         __ str_w(r->as_Register(), dest_addr);
 903       }
 904     } else if (r->is_FloatRegister()) {
 905       if (sig_bt[i] == T_DOUBLE) {
 906         __ str_d(r->as_FloatRegister(), dest_addr);
 907       } else {
 908         __ str_s(r->as_FloatRegister(), dest_addr);
 909       }
 910     } else {
 911       assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
 912     }
 913   }
 914 
 915   __ mov(Rparams, SP);
 916 
 917   __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset()));
 918   __ br(tmp);
 919 
 920 #else
 921 
 922   int extraspace = total_args_passed * Interpreter::stackElementSize;
 923   if (extraspace) {
 924     __ sub_slow(SP, SP, extraspace);
 925   }
 926 
 927   for (int i = 0; i < total_args_passed; i++) {
 928     if (sig_bt[i] == T_VOID) {
 929       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 930       continue;
 931     }
 932     int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
 933 
 934     VMReg r_1 = regs[i].first();
 935     VMReg r_2 = regs[i].second();
 936     if (r_1->is_stack()) {
 937       int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 938       if (!r_2->is_valid()) {
 939         __ ldr(tmp, Address(SP, arg_offset));
 940         __ str(tmp, Address(SP, stack_offset));
 941       } else {
 942         __ ldr(tmp, Address(SP, arg_offset));
 943         __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize));
 944         __ ldr(tmp, Address(SP, arg_offset + wordSize));
 945         __ str(tmp, Address(SP, stack_offset));
 946       }
 947     } else if (r_1->is_Register()) {
 948       if (!r_2->is_valid()) {
 949         __ str(r_1->as_Register(), Address(SP, stack_offset));
 950       } else {
 951         __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
 952         __ str(r_2->as_Register(), Address(SP, stack_offset));
 953       }
 954     } else if (r_1->is_FloatRegister()) {
 955 #ifdef __SOFTFP__
 956       ShouldNotReachHere();
 957 #endif // __SOFTFP__
 958       if (!r_2->is_valid()) {
 959         __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
 960       } else {
 961         __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
 962       }
 963     } else {
 964       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 965     }
 966   }
 967 
 968   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 969 
 970 #endif // AARCH64
 971 }
 972 
 973 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 974                                                             int total_args_passed,
 975                                                             int comp_args_on_stack,
 976                                                             const BasicType *sig_bt,
 977                                                             const VMRegPair *regs,
 978                                                             AdapterFingerPrint* fingerprint) {
 979   address i2c_entry = __ pc();
 980   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 981 
 982   address c2i_unverified_entry = __ pc();
 983   Label skip_fixup;
 984   const Register receiver       = R0;
 985   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
 986   const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4);
 987 
 988   __ load_klass(receiver_klass, receiver);
 989   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
 990   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_method_offset()));
 991   __ cmp(receiver_klass, holder_klass);
 992 
 993 #ifdef AARCH64
 994   Label ic_miss;
 995   __ b(ic_miss, ne);
 996   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 997   __ cbz(Rtemp, skip_fixup);
 998   __ bind(ic_miss);
 999   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
1000 #else
1001   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
1002   __ cmp(Rtemp, 0, eq);
1003   __ b(skip_fixup, eq);
1004   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
1005 #endif // AARCH64
1006 
1007   address c2i_entry = __ pc();
1008   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1009 
1010   __ flush();
1011   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1012 }
1013 
1014 
1015 static int reg2offset_in(VMReg r) {
1016   // Account for saved FP and LR
1017   return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
1018 }
1019 
1020 static int reg2offset_out(VMReg r) {
1021   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1022 }
1023 
1024 
1025 static void verify_oop_args(MacroAssembler* masm,
1026                             methodHandle method,
1027                             const BasicType* sig_bt,
1028                             const VMRegPair* regs) {
1029   Register temp_reg = Rmethod;  // not part of any compiled calling seq
1030   if (VerifyOops) {
1031     for (int i = 0; i < method->size_of_parameters(); i++) {
1032       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
1033         VMReg r = regs[i].first();
1034         assert(r->is_valid(), "bad oop arg");
1035         if (r->is_stack()) {
1036           __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1037           __ verify_oop(temp_reg);
1038         } else {
1039           __ verify_oop(r->as_Register());
1040         }
1041       }
1042     }
1043   }
1044 }
1045 
1046 static void gen_special_dispatch(MacroAssembler* masm,
1047                                  methodHandle method,
1048                                  const BasicType* sig_bt,
1049                                  const VMRegPair* regs) {
1050   verify_oop_args(masm, method, sig_bt, regs);
1051   vmIntrinsics::ID iid = method->intrinsic_id();
1052 
1053   // Now write the args into the outgoing interpreter space
1054   bool     has_receiver   = false;
1055   Register receiver_reg   = noreg;
1056   int      member_arg_pos = -1;
1057   Register member_reg     = noreg;
1058   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1059   if (ref_kind != 0) {
1060     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1061     member_reg = Rmethod;  // known to be free at this point
1062     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1063   } else if (iid == vmIntrinsics::_invokeBasic) {
1064     has_receiver = true;
1065   } else {
1066     fatal("unexpected intrinsic id %d", iid);
1067   }
1068 
1069   if (member_reg != noreg) {
1070     // Load the member_arg into register, if necessary.
1071     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1072     VMReg r = regs[member_arg_pos].first();
1073     if (r->is_stack()) {
1074       __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1075     } else {
1076       // no data motion is needed
1077       member_reg = r->as_Register();
1078     }
1079   }
1080 
1081   if (has_receiver) {
1082     // Make sure the receiver is loaded into a register.
1083     assert(method->size_of_parameters() > 0, "oob");
1084     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1085     VMReg r = regs[0].first();
1086     assert(r->is_valid(), "bad receiver arg");
1087     if (r->is_stack()) {
1088       // Porting note:  This assumes that compiled calling conventions always
1089       // pass the receiver oop in a register.  If this is not true on some
1090       // platform, pick a temp and load the receiver from stack.
1091       assert(false, "receiver always in a register");
1092       receiver_reg = j_rarg0;  // known to be free at this point
1093       __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1094     } else {
1095       // no data motion is needed
1096       receiver_reg = r->as_Register();
1097     }
1098   }
1099 
1100   // Figure out which address we are really jumping to:
1101   MethodHandles::generate_method_handle_dispatch(masm, iid,
1102                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1103 }
1104 
1105 // ---------------------------------------------------------------------------
1106 // Generate a native wrapper for a given method.  The method takes arguments
1107 // in the Java compiled code convention, marshals them to the native
1108 // convention (handlizes oops, etc), transitions to native, makes the call,
1109 // returns to java state (possibly blocking), unhandlizes any result and
1110 // returns.
1111 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1112                                                 const methodHandle& method,
1113                                                 int compile_id,
1114                                                 BasicType* in_sig_bt,
1115                                                 VMRegPair* in_regs,
1116                                                 BasicType ret_type) {
1117   if (method->is_method_handle_intrinsic()) {
1118     vmIntrinsics::ID iid = method->intrinsic_id();
1119     intptr_t start = (intptr_t)__ pc();
1120     int vep_offset = ((intptr_t)__ pc()) - start;
1121     gen_special_dispatch(masm,
1122                          method,
1123                          in_sig_bt,
1124                          in_regs);
1125     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1126     __ flush();
1127     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1128     return nmethod::new_native_nmethod(method,
1129                                        compile_id,
1130                                        masm->code(),
1131                                        vep_offset,
1132                                        frame_complete,
1133                                        stack_slots / VMRegImpl::slots_per_word,
1134                                        in_ByteSize(-1),
1135                                        in_ByteSize(-1),
1136                                        (OopMapSet*)NULL);
1137   }
1138   // Arguments for JNI method include JNIEnv and Class if static
1139 
1140   // Usage of Rtemp should be OK since scratched by native call
1141 
1142   bool is_static = method->is_static();
1143 
1144   const int total_in_args = method->size_of_parameters();
1145   int total_c_args = total_in_args + 1;
1146   if (is_static) {
1147     total_c_args++;
1148   }
1149 
1150   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1151   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1152 
1153   int argc = 0;
1154   out_sig_bt[argc++] = T_ADDRESS;
1155   if (is_static) {
1156     out_sig_bt[argc++] = T_OBJECT;
1157   }
1158 
1159   int i;
1160   for (i = 0; i < total_in_args; i++) {
1161     out_sig_bt[argc++] = in_sig_bt[i];
1162   }
1163 
1164   int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
1165   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1166   // Since object arguments need to be wrapped, we must preserve space
1167   // for those object arguments which come in registers (GPR_PARAMS maximum)
1168   // plus one more slot for Klass handle (for static methods)
1169   int oop_handle_offset = stack_slots;
1170   stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word;
1171 
1172   // Plus a lock if needed
1173   int lock_slot_offset = 0;
1174   if (method->is_synchronized()) {
1175     lock_slot_offset = stack_slots;
1176     assert(sizeof(BasicLock) == wordSize, "adjust this code");
1177     stack_slots += VMRegImpl::slots_per_word;
1178   }
1179 
1180   // Space to save return address and FP
1181   stack_slots += 2 * VMRegImpl::slots_per_word;
1182 
1183   // Calculate the final stack size taking account of alignment
1184   stack_slots = round_to(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size);
1185   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1186   int lock_slot_fp_offset = stack_size - 2 * wordSize -
1187     lock_slot_offset * VMRegImpl::stack_slot_size;
1188 
1189   // Unverified entry point
1190   address start = __ pc();
1191 
1192   // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
1193   const Register receiver = R0; // see receiverOpr()
1194   __ load_klass(Rtemp, receiver);
1195   __ cmp(Rtemp, Ricklass);
1196   Label verified;
1197 
1198   __ b(verified, eq); // jump over alignment no-ops too
1199   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
1200   __ align(CodeEntryAlignment);
1201 
1202   // Verified entry point
1203   __ bind(verified);
1204   int vep_offset = __ pc() - start;
1205 
1206 #ifdef AARCH64
1207   // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
1208   __ nop();
1209 #endif // AARCH64
1210 
1211   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
1212     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
1213     // instead of doing a full VM transition once it's been computed.
1214     Label slow_case;
1215     const Register obj_reg = R0;
1216 
1217     // Unlike for Object.hashCode, System.identityHashCode is static method and
1218     // gets object as argument instead of the receiver.
1219     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
1220       assert(method->is_static(), "method should be static");
1221       // return 0 for null reference input, return val = R0 = obj_reg = 0
1222 #ifdef AARCH64
1223       Label Continue;
1224       __ cbnz(obj_reg, Continue);
1225       __ ret();
1226       __ bind(Continue);
1227 #else
1228       __ cmp(obj_reg, 0);
1229       __ bx(LR, eq);
1230 #endif
1231     }
1232 
1233     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1234 
1235     assert(markOopDesc::unlocked_value == 1, "adjust this code");
1236     __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
1237 
1238     if (UseBiasedLocking) {
1239       assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
1240       __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
1241     }
1242 
1243 #ifdef AARCH64
1244     __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place);
1245     __ b(slow_case, eq);
1246     __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift);
1247     __ ret();
1248 #else
1249     __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
1250     __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
1251     __ bx(LR, ne);
1252 #endif // AARCH64
1253 
1254     __ bind(slow_case);
1255   }
1256 
1257   // Bang stack pages
1258   __ arm_stack_overflow_check(stack_size, Rtemp);
1259 
1260   // Setup frame linkage
1261   __ raw_push(FP, LR);
1262   __ mov(FP, SP);
1263   __ sub_slow(SP, SP, stack_size - 2*wordSize);
1264 
1265   int frame_complete = __ pc() - start;
1266 
1267   OopMapSet* oop_maps = new OopMapSet();
1268   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1269   const int extra_args = is_static ? 2 : 1;
1270   int receiver_offset = -1;
1271   int fp_regs_in_arguments = 0;
1272 
1273   for (i = total_in_args; --i >= 0; ) {
1274     switch (in_sig_bt[i]) {
1275     case T_ARRAY:
1276     case T_OBJECT: {
1277       VMReg src = in_regs[i].first();
1278       VMReg dst = out_regs[i + extra_args].first();
1279       if (src->is_stack()) {
1280         assert(dst->is_stack(), "must be");
1281         assert(i != 0, "Incoming receiver is always in a register");
1282         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1283         __ cmp(Rtemp, 0);
1284 #ifdef AARCH64
1285         __ add(Rtemp, FP, reg2offset_in(src));
1286         __ csel(Rtemp, ZR, Rtemp, eq);
1287 #else
1288         __ add(Rtemp, FP, reg2offset_in(src), ne);
1289 #endif // AARCH64
1290         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1291         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1292         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1293       } else {
1294         int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1295         __ str(src->as_Register(), Address(SP, offset));
1296         map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1297         if ((i == 0) && (!is_static)) {
1298           receiver_offset = offset;
1299         }
1300         oop_handle_offset += VMRegImpl::slots_per_word;
1301 
1302 #ifdef AARCH64
1303         __ cmp(src->as_Register(), 0);
1304         __ add(Rtemp, SP, offset);
1305         __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq);
1306         if (dst->is_stack()) {
1307           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1308         }
1309 #else
1310         if (dst->is_stack()) {
1311           __ movs(Rtemp, src->as_Register());
1312           __ add(Rtemp, SP, offset, ne);
1313           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1314         } else {
1315           __ movs(dst->as_Register(), src->as_Register());
1316           __ add(dst->as_Register(), SP, offset, ne);
1317         }
1318 #endif // AARCH64
1319       }
1320     }
1321 
1322     case T_VOID:
1323       break;
1324 
1325 #ifdef AARCH64
1326     case T_FLOAT:
1327     case T_DOUBLE: {
1328       VMReg src = in_regs[i].first();
1329       VMReg dst = out_regs[i + extra_args].first();
1330       if (src->is_stack()) {
1331         assert(dst->is_stack(), "must be");
1332         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1333         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1334       } else {
1335         assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be");
1336         assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be");
1337         fp_regs_in_arguments++;
1338       }
1339       break;
1340     }
1341 #else // AARCH64
1342 
1343 #ifdef __SOFTFP__
1344     case T_DOUBLE:
1345 #endif
1346     case T_LONG: {
1347       VMReg src_1 = in_regs[i].first();
1348       VMReg src_2 = in_regs[i].second();
1349       VMReg dst_1 = out_regs[i + extra_args].first();
1350       VMReg dst_2 = out_regs[i + extra_args].second();
1351 #if (ALIGN_WIDE_ARGUMENTS == 0)
1352       // C convention can mix a register and a stack slot for a
1353       // 64-bits native argument.
1354 
1355       // Note: following code should work independently of whether
1356       // the Java calling convention follows C convention or whether
1357       // it aligns 64-bit values.
1358       if (dst_2->is_Register()) {
1359         if (src_1->as_Register() != dst_1->as_Register()) {
1360           assert(src_1->as_Register() != dst_2->as_Register() &&
1361                  src_2->as_Register() != dst_2->as_Register(), "must be");
1362           __ mov(dst_2->as_Register(), src_2->as_Register());
1363           __ mov(dst_1->as_Register(), src_1->as_Register());
1364         } else {
1365           assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1366         }
1367       } else if (src_2->is_Register()) {
1368         if (dst_1->is_Register()) {
1369           // dst mixes a register and a stack slot
1370           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1371           assert(src_1->as_Register() != dst_1->as_Register(), "must be");
1372           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1373           __ mov(dst_1->as_Register(), src_1->as_Register());
1374         } else {
1375           // registers to stack slots
1376           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1377           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1378           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1379         }
1380       } else if (src_1->is_Register()) {
1381         if (dst_1->is_Register()) {
1382           // src and dst must be R3 + stack slot
1383           assert(dst_1->as_Register() == src_1->as_Register(), "must be");
1384           __ ldr(Rtemp,    Address(FP, reg2offset_in(src_2)));
1385           __ str(Rtemp,    Address(SP, reg2offset_out(dst_2)));
1386         } else {
1387           // <R3,stack> -> <stack,stack>
1388           assert(dst_2->is_stack() && src_2->is_stack(), "must be");
1389           __ ldr(LR, Address(FP, reg2offset_in(src_2)));
1390           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1391           __ str(LR, Address(SP, reg2offset_out(dst_2)));
1392         }
1393       } else {
1394         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1395         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1396         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1397         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1398         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1399       }
1400 #else // ALIGN_WIDE_ARGUMENTS
1401       if (src_1->is_stack()) {
1402         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1403         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1404         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1405         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1406         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1407       } else if (dst_1->is_stack()) {
1408         assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1409         __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1410         __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1411       } else if (src_1->as_Register() == dst_1->as_Register()) {
1412         assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1413       } else {
1414         assert(src_1->as_Register() != dst_2->as_Register() &&
1415                src_2->as_Register() != dst_2->as_Register(), "must be");
1416         __ mov(dst_2->as_Register(), src_2->as_Register());
1417         __ mov(dst_1->as_Register(), src_1->as_Register());
1418       }
1419 #endif // ALIGN_WIDE_ARGUMENTS
1420       break;
1421     }
1422 
1423 #if (!defined __SOFTFP__ && !defined __ABI_HARD__)
1424     case T_FLOAT: {
1425       VMReg src = in_regs[i].first();
1426       VMReg dst = out_regs[i + extra_args].first();
1427       if (src->is_stack()) {
1428         assert(dst->is_stack(), "must be");
1429         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1430         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1431       } else if (dst->is_stack()) {
1432         __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst)));
1433       } else {
1434         assert(src->is_FloatRegister() && dst->is_Register(), "must be");
1435         __ fmrs(dst->as_Register(), src->as_FloatRegister());
1436       }
1437       break;
1438     }
1439 
1440     case T_DOUBLE: {
1441       VMReg src_1 = in_regs[i].first();
1442       VMReg src_2 = in_regs[i].second();
1443       VMReg dst_1 = out_regs[i + extra_args].first();
1444       VMReg dst_2 = out_regs[i + extra_args].second();
1445       if (src_1->is_stack()) {
1446         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1447         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1448         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1449         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1450         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1451       } else if (dst_1->is_stack()) {
1452         assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be");
1453         __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1)));
1454 #if (ALIGN_WIDE_ARGUMENTS == 0)
1455       } else if (dst_2->is_stack()) {
1456         assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned
1457         // double register must go into R3 + one stack slot
1458         __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister());
1459         __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
1460 #endif
1461       } else {
1462         assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be");
1463         __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister());
1464       }
1465       break;
1466     }
1467 #endif // __SOFTFP__
1468 
1469 #ifdef __ABI_HARD__
1470     case T_FLOAT: {
1471       VMReg src = in_regs[i].first();
1472       VMReg dst = out_regs[i + extra_args].first();
1473       if (src->is_stack()) {
1474         if (dst->is_stack()) {
1475           __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1476           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1477         } else {
1478           // C2 Java calling convention does not populate S14 and S15, therefore
1479           // those need to be loaded from stack here
1480           __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src)));
1481           fp_regs_in_arguments++;
1482         }
1483       } else {
1484         assert(src->is_FloatRegister(), "must be");
1485         fp_regs_in_arguments++;
1486       }
1487       break;
1488     }
1489     case T_DOUBLE: {
1490       VMReg src_1 = in_regs[i].first();
1491       VMReg src_2 = in_regs[i].second();
1492       VMReg dst_1 = out_regs[i + extra_args].first();
1493       VMReg dst_2 = out_regs[i + extra_args].second();
1494       if (src_1->is_stack()) {
1495         if (dst_1->is_stack()) {
1496           assert(dst_2->is_stack(), "must be");
1497           __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1498           __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1499           __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1500           __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1501         } else {
1502           // C2 Java calling convention does not populate S14 and S15, therefore
1503           // those need to be loaded from stack here
1504           __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1505           fp_regs_in_arguments += 2;
1506         }
1507       } else {
1508         assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1509         fp_regs_in_arguments += 2;
1510       }
1511       break;
1512     }
1513 #endif // __ABI_HARD__
1514 #endif // AARCH64
1515 
1516     default: {
1517       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1518       VMReg src = in_regs[i].first();
1519       VMReg dst = out_regs[i + extra_args].first();
1520       if (src->is_stack()) {
1521         assert(dst->is_stack(), "must be");
1522         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1523         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1524       } else if (dst->is_stack()) {
1525         __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1526       } else {
1527         assert(src->is_Register() && dst->is_Register(), "must be");
1528         __ mov(dst->as_Register(), src->as_Register());
1529       }
1530     }
1531     }
1532   }
1533 
1534   // Get Klass mirror
1535   int klass_offset = -1;
1536   if (is_static) {
1537     klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1538     __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1539     __ add(c_rarg1, SP, klass_offset);
1540     __ str(Rtemp, Address(SP, klass_offset));
1541     map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1542   }
1543 
1544   // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1545   int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1546   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1547   oop_maps->add_gc_map(pc_offset, map);
1548 
1549 #ifndef AARCH64
1550   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1551   __ membar(MacroAssembler::StoreStore, Rtemp);
1552 #endif // !AARCH64
1553 
1554   // RedefineClasses() tracing support for obsolete method entry
1555   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1556 #ifdef AARCH64
1557     __ NOT_TESTED();
1558 #endif
1559     __ save_caller_save_registers();
1560     __ mov(R0, Rthread);
1561     __ mov_metadata(R1, method());
1562     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1563     __ restore_caller_save_registers();
1564   }
1565 
1566   const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5);
1567   const Register sync_obj    = AARCH64_ONLY(R21) NOT_AARCH64(R6);
1568   const Register disp_hdr    = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11);
1569   const Register tmp         = AARCH64_ONLY(R23) NOT_AARCH64(R8);
1570 
1571   Label slow_lock, slow_lock_biased, lock_done, fast_lock, leave;
1572   if (method->is_synchronized()) {
1573     // The first argument is a handle to sync object (a class or an instance)
1574     __ ldr(sync_obj, Address(R1));
1575     // Remember the handle for the unlocking code
1576     __ mov(sync_handle, R1);
1577 
1578     if(UseBiasedLocking) {
1579       __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1580     }
1581 
1582     const Register mark = tmp;
1583 #ifdef AARCH64
1584     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1585     assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
1586 
1587     __ ldr(mark, sync_obj);
1588 
1589     // Test if object is already locked
1590     assert(markOopDesc::unlocked_value == 1, "adjust this code");
1591     __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock);
1592 
1593     // Check for recursive lock
1594     // See comments in InterpreterMacroAssembler::lock_object for
1595     // explanations on the fast recursive locking check.
1596     __ mov(Rtemp, SP);
1597     __ sub(Rtemp, mark, Rtemp);
1598     intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
1599     Assembler::LogicalImmediate imm(mask, false);
1600     __ ands(Rtemp, Rtemp, imm);
1601     __ b(slow_lock, ne);
1602 
1603     // Recursive locking: store 0 into a lock record
1604     __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1605     __ b(lock_done);
1606 
1607     __ bind(fast_lock);
1608     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1609 
1610     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1611 #else
1612     // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1613     // That would be acceptable as either CAS or slow case path is taken in that case
1614 
1615     __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1616     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1617     __ tst(mark, markOopDesc::unlocked_value);
1618     __ b(fast_lock, ne);
1619 
1620     // Check for recursive lock
1621     // See comments in InterpreterMacroAssembler::lock_object for
1622     // explanations on the fast recursive locking check.
1623     // Check independently the low bits and the distance to SP
1624     // -1- test low 2 bits
1625     __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1626     // -2- test (hdr - SP) if the low two bits are 0
1627     __ sub(Rtemp, mark, SP, eq);
1628     __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1629     // If still 'eq' then recursive locking OK: set displaced header to 0
1630     __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1631     __ b(lock_done, eq);
1632     __ b(slow_lock);
1633 
1634     __ bind(fast_lock);
1635     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1636 
1637     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1638 #endif // AARCH64
1639 
1640     __ bind(lock_done);
1641   }
1642 
1643   // Get JNIEnv*
1644   __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1645 
1646   // Perform thread state transition
1647   __ mov(Rtemp, _thread_in_native);
1648 #ifdef AARCH64
1649   // stlr instruction is used to force all preceding writes to be observed prior to thread state change
1650   __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
1651   __ stlr_w(Rtemp, Rtemp2);
1652 #else
1653   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1654 #endif // AARCH64
1655 
1656   // Finally, call the native method
1657   __ call(method->native_function());
1658 
1659   // Set FPSCR/FPCR to a known state
1660   if (AlwaysRestoreFPU) {
1661     __ restore_default_fp_mode();
1662   }
1663 
1664   // Do a safepoint check while thread is in transition state
1665   InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1666   Label call_safepoint_runtime, return_to_java;
1667   __ mov(Rtemp, _thread_in_native_trans);
1668   __ ldr_literal(R2, safepoint_state);
1669   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1670 
1671   // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1672   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1673 
1674   __ ldr_s32(R2, Address(R2));
1675   __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
1676   __ cmp(R2, SafepointSynchronize::_not_synchronized);
1677   __ cond_cmp(R3, 0, eq);
1678   __ b(call_safepoint_runtime, ne);
1679   __ bind(return_to_java);
1680 
1681   // Perform thread state transition and reguard stack yellow pages if needed
1682   Label reguard, reguard_done;
1683   __ mov(Rtemp, _thread_in_Java);
1684   __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset()));
1685   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1686 
1687   __ cmp(R2, JavaThread::stack_guard_yellow_reserved_disabled);
1688   __ b(reguard, eq);
1689   __ bind(reguard_done);
1690 
1691   Label slow_unlock, unlock_done, retry;
1692   if (method->is_synchronized()) {
1693     __ ldr(sync_obj, Address(sync_handle));
1694 
1695     if(UseBiasedLocking) {
1696       __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1697       // disp_hdr may not have been saved on entry with biased locking
1698       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1699     }
1700 
1701     // See C1_MacroAssembler::unlock_object() for more comments
1702     __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1703     __ cbz(R2, unlock_done);
1704 
1705     __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1706 
1707     __ bind(unlock_done);
1708   }
1709 
1710   // Set last java frame and handle block to zero
1711   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1712   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1713 
1714 #ifdef AARCH64
1715   __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1716   if (CheckJNICalls) {
1717     __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1718   }
1719 
1720 
1721   switch (ret_type) {
1722   case T_BOOLEAN:
1723     __ tst(R0, 0xff);
1724     __ cset(R0, ne);
1725     break;
1726   case T_CHAR   : __ zero_extend(R0, R0, 16);  break;
1727   case T_BYTE   : __ sign_extend(R0, R0,  8);  break;
1728   case T_SHORT  : __ sign_extend(R0, R0, 16);  break;
1729   case T_INT    : // fall through
1730   case T_LONG   : // fall through
1731   case T_VOID   : // fall through
1732   case T_FLOAT  : // fall through
1733   case T_DOUBLE : /* nothing to do */          break;
1734   case T_OBJECT : // fall through
1735   case T_ARRAY  : {
1736     Label L;
1737     __ cbz(R0, L);
1738     __ ldr(R0, Address(R0));
1739     __ verify_oop(R0);
1740     __ bind(L);
1741     break;
1742   }
1743   default:
1744     ShouldNotReachHere();
1745   }
1746 #else
1747   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1748   if (CheckJNICalls) {
1749     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1750   }
1751 
1752   // Unhandle the result
1753   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1754     __ cmp(R0, 0);
1755     __ ldr(R0, Address(R0), ne);
1756   }
1757 #endif // AARCH64
1758 
1759   // Any exception pending?
1760   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1761   __ mov(SP, FP);
1762 
1763 #ifdef AARCH64
1764   Label except;
1765   __ cbnz(Rtemp, except);
1766   __ raw_pop(FP, LR);
1767   __ ret();
1768 
1769   __ bind(except);
1770   // Pop the frame and forward the exception. Rexception_pc contains return address.
1771   __ raw_pop(FP, Rexception_pc);
1772 #else
1773   __ cmp(Rtemp, 0);
1774   // Pop the frame and return if no exception pending
1775   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1776   // Pop the frame and forward the exception. Rexception_pc contains return address.
1777   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1778   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1779 #endif // AARCH64
1780   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1781 
1782   // Safepoint operation and/or pending suspend request is in progress.
1783   // Save the return values and call the runtime function by hand.
1784   __ bind(call_safepoint_runtime);
1785   push_result_registers(masm, ret_type);
1786   __ mov(R0, Rthread);
1787   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1788   pop_result_registers(masm, ret_type);
1789   __ b(return_to_java);
1790 
1791   __ bind_literal(safepoint_state);
1792 
1793   // Reguard stack pages. Save native results around a call to C runtime.
1794   __ bind(reguard);
1795   push_result_registers(masm, ret_type);
1796   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1797   pop_result_registers(masm, ret_type);
1798   __ b(reguard_done);
1799 
1800   if (method->is_synchronized()) {
1801     // Locking slow case
1802     if(UseBiasedLocking) {
1803       __ bind(slow_lock_biased);
1804       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1805     }
1806 
1807     __ bind(slow_lock);
1808 
1809     push_param_registers(masm, fp_regs_in_arguments);
1810 
1811     // last_Java_frame is already set, so do call_VM manually; no exception can occur
1812     __ mov(R0, sync_obj);
1813     __ mov(R1, disp_hdr);
1814     __ mov(R2, Rthread);
1815     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1816 
1817     pop_param_registers(masm, fp_regs_in_arguments);
1818 
1819     __ b(lock_done);
1820 
1821     // Unlocking slow case
1822     __ bind(slow_unlock);
1823 
1824     push_result_registers(masm, ret_type);
1825 
1826     // Clear pending exception before reentering VM.
1827     // Can store the oop in register since it is a leaf call.
1828     assert_different_registers(Rtmp_save1, sync_obj, disp_hdr);
1829     __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1830     Register zero = __ zero_register(Rtemp);
1831     __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
1832     __ mov(R0, sync_obj);
1833     __ mov(R1, disp_hdr);
1834     __ mov(R2, Rthread);
1835     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1836     __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1837 
1838     pop_result_registers(masm, ret_type);
1839 
1840     __ b(unlock_done);
1841   }
1842 
1843   __ flush();
1844   return nmethod::new_native_nmethod(method,
1845                                      compile_id,
1846                                      masm->code(),
1847                                      vep_offset,
1848                                      frame_complete,
1849                                      stack_slots / VMRegImpl::slots_per_word,
1850                                      in_ByteSize(is_static ? klass_offset : receiver_offset),
1851                                      in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1852                                      oop_maps);
1853 }
1854 
1855 // this function returns the adjust size (in number of words) to a c2i adapter
1856 // activation for use during deoptimization
1857 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1858   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1859 #ifdef AARCH64
1860   extra_locals_size = round_to(extra_locals_size, StackAlignmentInBytes/BytesPerWord);
1861 #endif // AARCH64
1862   return extra_locals_size;
1863 }
1864 
1865 
1866 uint SharedRuntime::out_preserve_stack_slots() {
1867   return 0;
1868 }
1869 
1870 
1871 //------------------------------generate_deopt_blob----------------------------
1872 void SharedRuntime::generate_deopt_blob() {
1873   ResourceMark rm;
1874 #ifdef AARCH64
1875   CodeBuffer buffer("deopt_blob", 1024+256, 1);
1876 #else
1877   CodeBuffer buffer("deopt_blob", 1024, 1024);
1878 #endif
1879   int frame_size_in_words;
1880   OopMapSet* oop_maps;
1881   int reexecute_offset;
1882   int exception_in_tls_offset;
1883   int exception_offset;
1884 
1885   MacroAssembler* masm = new MacroAssembler(&buffer);
1886   Label cont;
1887   const Register Rkind   = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit
1888   const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
1889   const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
1890   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1891 
1892   address start = __ pc();
1893 
1894   oop_maps = new OopMapSet();
1895   // LR saved by caller (can be live in c2 method)
1896 
1897   // A deopt is a case where LR may be live in the c2 nmethod. So it's
1898   // not possible to call the deopt blob from the nmethod and pass the
1899   // address of the deopt handler of the nmethod in LR. What happens
1900   // now is that the caller of the deopt blob pushes the current
1901   // address so the deopt blob doesn't have to do it. This way LR can
1902   // be preserved, contains the live value from the nmethod and is
1903   // saved at R14/R30_offset here.
1904   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1905   __ mov(Rkind, Deoptimization::Unpack_deopt);
1906   __ b(cont);
1907 
1908   exception_offset = __ pc() - start;
1909 
1910   // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the
1911   // exception_in_tls_offset entry point.
1912   __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1913   __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1914   // Force return value to NULL to avoid confusing the escape analysis
1915   // logic. Everything is dead here anyway.
1916   __ mov(R0, 0);
1917 
1918   exception_in_tls_offset = __ pc() - start;
1919 
1920   // Exception data is in JavaThread structure
1921   // Patch the return address of the current frame
1922   __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset()));
1923   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1924   {
1925     const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure
1926     __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset()));
1927   }
1928   __ mov(Rkind, Deoptimization::Unpack_exception);
1929   __ b(cont);
1930 
1931   reexecute_offset = __ pc() - start;
1932 
1933   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1934   __ mov(Rkind, Deoptimization::Unpack_reexecute);
1935 
1936   // Calculate UnrollBlock and save the result in Rublock
1937   __ bind(cont);
1938   __ mov(R0, Rthread);
1939   __ mov(R1, Rkind);
1940 
1941   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1942   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1943   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info));
1944   if (pc_offset == -1) {
1945     pc_offset = __ offset();
1946   }
1947   oop_maps->add_gc_map(pc_offset, map);
1948   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1949 
1950   __ mov(Rublock, R0);
1951 
1952   // Reload Rkind from the UnrollBlock (might have changed)
1953   __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1954   Label noException;
1955   __ cmp_32(Rkind, Deoptimization::Unpack_exception);   // Was exception pending?
1956   __ b(noException, ne);
1957   // handle exception case
1958 #ifdef ASSERT
1959   // assert that exception_pc is zero in tls
1960   { Label L;
1961     __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1962     __ cbz(Rexception_pc, L);
1963     __ stop("exception pc should be null");
1964     __ bind(L);
1965   }
1966 #endif
1967   __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1968   __ verify_oop(Rexception_obj);
1969   {
1970     const Register Rzero = __ zero_register(Rtemp);
1971     __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1972   }
1973 
1974   __ bind(noException);
1975 
1976   // This frame is going away.  Fetch return value, so we can move it to
1977   // a new frame.
1978   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1979 #ifndef AARCH64
1980   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1981 #endif // !AARCH64
1982 #ifndef __SOFTFP__
1983   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1984 #endif
1985   // pop frame
1986   __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1987 
1988   // Set initial stack state before pushing interpreter frames
1989   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1990   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1991   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1992 
1993 #ifdef AARCH64
1994   // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
1995   // They are needed for correct stack walking during stack overflow handling.
1996   // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
1997   __ sub(Rtemp, Rtemp, 2*wordSize);
1998   __ add(SP, SP, Rtemp, ex_uxtx);
1999   __ raw_pop(FP, LR);
2000 
2001 #ifdef ASSERT
2002   { Label L;
2003     __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2004     __ cmp(FP, Rtemp);
2005     __ b(L, eq);
2006     __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2007     __ bind(L);
2008   }
2009   { Label L;
2010     __ ldr(Rtemp, Address(R2));
2011     __ cmp(LR, Rtemp);
2012     __ b(L, eq);
2013     __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2014     __ bind(L);
2015   }
2016 #endif // ASSERT
2017 
2018 #else
2019   __ add(SP, SP, Rtemp);
2020 #endif // AARCH64
2021 
2022 #ifdef ASSERT
2023   // Compilers generate code that bang the stack by as much as the
2024   // interpreter would need. So this stack banging should never
2025   // trigger a fault. Verify that it does not on non product builds.
2026   // See if it is enough stack to push deoptimized frames
2027   if (UseStackBanging) {
2028 #ifndef AARCH64
2029     // The compiled method that we are deoptimizing was popped from the stack.
2030     // If the stack bang results in a stack overflow, we don't return to the
2031     // method that is being deoptimized. The stack overflow exception is
2032     // propagated to the caller of the deoptimized method. Need to get the pc
2033     // from the caller in LR and restore FP.
2034     __ ldr(LR, Address(R2, 0));
2035     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2036 #endif // !AARCH64
2037     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2038     __ arm_stack_overflow_check(R8, Rtemp);
2039   }
2040 #endif
2041   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2042 
2043 #ifndef AARCH64
2044   // Pick up the initial fp we should save
2045   // XXX Note: was ldr(FP, Address(FP));
2046 
2047   // The compiler no longer uses FP as a frame pointer for the
2048   // compiled code. It can be used by the allocator in C2 or to
2049   // memorize the original SP for JSR292 call sites.
2050 
2051   // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
2052   // Deoptimization::fetch_unroll_info computes the right FP value and
2053   // stores it in Rublock.initial_info. This has been activated for ARM.
2054   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2055 #endif // !AARCH64
2056 
2057   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2058   __ mov(Rsender, SP);
2059 #ifdef AARCH64
2060   __ sub(SP, SP, Rtemp, ex_uxtx);
2061 #else
2062   __ sub(SP, SP, Rtemp);
2063 #endif // AARCH64
2064 
2065   // Push interpreter frames in a loop
2066   Label loop;
2067   __ bind(loop);
2068   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2069   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2070 
2071   __ raw_push(FP, LR);                                     // create new frame
2072   __ mov(FP, SP);
2073   __ sub(Rtemp, Rtemp, 2*wordSize);
2074 
2075 #ifdef AARCH64
2076   __ sub(SP, SP, Rtemp, ex_uxtx);
2077 #else
2078   __ sub(SP, SP, Rtemp);
2079 #endif // AARCH64
2080 
2081   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2082 #ifdef AARCH64
2083   __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2084 #else
2085   __ mov(LR, 0);
2086   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2087 #endif // AARCH64
2088 
2089   __ subs(R8, R8, 1);                               // decrement counter
2090   __ mov(Rsender, SP);
2091   __ b(loop, ne);
2092 
2093   // Re-push self-frame
2094   __ ldr(LR, Address(R2));
2095   __ raw_push(FP, LR);
2096   __ mov(FP, SP);
2097   __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
2098 
2099   // Restore frame locals after moving the frame
2100   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2101 #ifndef AARCH64
2102   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2103 #endif // !AARCH64
2104 
2105 #ifndef __SOFTFP__
2106   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2107 #endif // !__SOFTFP__
2108 
2109 #ifndef AARCH64
2110 #ifdef ASSERT
2111   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
2112   { Label L;
2113     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2114     __ cmp_32(Rkind, Rtemp);
2115     __ b(L, eq);
2116     __ stop("Rkind was overwritten");
2117     __ bind(L);
2118   }
2119 #endif
2120 #endif
2121 
2122   // Call unpack_frames with proper arguments
2123   __ mov(R0, Rthread);
2124   __ mov(R1, Rkind);
2125 
2126   pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2127   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2128   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2129   if (pc_offset == -1) {
2130     pc_offset = __ offset();
2131   }
2132   oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
2133   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2134 
2135   // Collect return values, pop self-frame and jump to interpreter
2136   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2137 #ifndef AARCH64
2138   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2139 #endif // !AARCH64
2140   // Interpreter floats controlled by __SOFTFP__, but compiler
2141   // float return value registers controlled by __ABI_HARD__
2142   // This matters for vfp-sflt builds.
2143 #ifndef __SOFTFP__
2144   // Interpreter hard float
2145 #ifdef __ABI_HARD__
2146   // Compiler float return value in FP registers
2147   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2148 #else
2149   // Compiler float return value in integer registers,
2150   // copy to D0 for interpreter (S0 <-- R0)
2151   __ fmdrr(D0_tos, R0, R1);
2152 #endif
2153 #endif // !__SOFTFP__
2154   __ mov(SP, FP);
2155 
2156 #ifdef AARCH64
2157   __ raw_pop(FP, LR);
2158   __ ret();
2159 #else
2160   __ pop(RegisterSet(FP) | RegisterSet(PC));
2161 #endif // AARCH64
2162 
2163   __ flush();
2164 
2165   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
2166                                            reexecute_offset, frame_size_in_words);
2167   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2168 }
2169 
2170 #ifdef COMPILER2
2171 
2172 //------------------------------generate_uncommon_trap_blob--------------------
2173 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2174 // instead.
2175 void SharedRuntime::generate_uncommon_trap_blob() {
2176   // allocate space for the code
2177   ResourceMark rm;
2178 
2179   // setup code generation tools
2180   int pad = VerifyThread ? 512 : 0;
2181 #ifdef _LP64
2182   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
2183 #else
2184   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
2185   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
2186   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
2187 #endif
2188   // bypassed when code generation useless
2189   MacroAssembler* masm               = new MacroAssembler(&buffer);
2190   const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
2191   const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
2192   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
2193 
2194   //
2195   // This is the entry point for all traps the compiler takes when it thinks
2196   // it cannot handle further execution of compilation code. The frame is
2197   // deoptimized in these cases and converted into interpreter frames for
2198   // execution
2199   // The steps taken by this frame are as follows:
2200   //   - push a fake "unpack_frame"
2201   //   - call the C routine Deoptimization::uncommon_trap (this function
2202   //     packs the current compiled frame into vframe arrays and returns
2203   //     information about the number and size of interpreter frames which
2204   //     are equivalent to the frame which is being deoptimized)
2205   //   - deallocate the "unpack_frame"
2206   //   - deallocate the deoptimization frame
2207   //   - in a loop using the information returned in the previous step
2208   //     push interpreter frames;
2209   //   - create a dummy "unpack_frame"
2210   //   - call the C routine: Deoptimization::unpack_frames (this function
2211   //     lays out values on the interpreter frame which was just created)
2212   //   - deallocate the dummy unpack_frame
2213   //   - return to the interpreter entry point
2214   //
2215   //  Refer to the following methods for more information:
2216   //   - Deoptimization::uncommon_trap
2217   //   - Deoptimization::unpack_frame
2218 
2219   // the unloaded class index is in R0 (first parameter to this blob)
2220 
2221   __ raw_push(FP, LR);
2222   __ set_last_Java_frame(SP, FP, false, Rtemp);
2223   __ mov(R2, Deoptimization::Unpack_uncommon_trap);
2224   __ mov(R1, R0);
2225   __ mov(R0, Rthread);
2226   __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
2227   __ mov(Rublock, R0);
2228   __ reset_last_Java_frame(Rtemp);
2229   __ raw_pop(FP, LR);
2230 
2231 #ifdef ASSERT
2232   { Label L;
2233     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2234     __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
2235     __ b(L, eq);
2236     __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
2237     __ bind(L);
2238   }
2239 #endif
2240 
2241 
2242   // Set initial stack state before pushing interpreter frames
2243   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2244   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2245   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2246 
2247 #ifdef AARCH64
2248   // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
2249   // They are needed for correct stack walking during stack overflow handling.
2250   // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
2251   __ sub(Rtemp, Rtemp, 2*wordSize);
2252   __ add(SP, SP, Rtemp, ex_uxtx);
2253   __ raw_pop(FP, LR);
2254 
2255 #ifdef ASSERT
2256   { Label L;
2257     __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2258     __ cmp(FP, Rtemp);
2259     __ b(L, eq);
2260     __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2261     __ bind(L);
2262   }
2263   { Label L;
2264     __ ldr(Rtemp, Address(R2));
2265     __ cmp(LR, Rtemp);
2266     __ b(L, eq);
2267     __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2268     __ bind(L);
2269   }
2270 #endif // ASSERT
2271 
2272 #else
2273   __ add(SP, SP, Rtemp);
2274 #endif //AARCH64
2275 
2276   // See if it is enough stack to push deoptimized frames
2277 #ifdef ASSERT
2278   // Compilers generate code that bang the stack by as much as the
2279   // interpreter would need. So this stack banging should never
2280   // trigger a fault. Verify that it does not on non product builds.
2281   if (UseStackBanging) {
2282 #ifndef AARCH64
2283     // The compiled method that we are deoptimizing was popped from the stack.
2284     // If the stack bang results in a stack overflow, we don't return to the
2285     // method that is being deoptimized. The stack overflow exception is
2286     // propagated to the caller of the deoptimized method. Need to get the pc
2287     // from the caller in LR and restore FP.
2288     __ ldr(LR, Address(R2, 0));
2289     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2290 #endif // !AARCH64
2291     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2292     __ arm_stack_overflow_check(R8, Rtemp);
2293   }
2294 #endif
2295   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2296   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2297   __ mov(Rsender, SP);
2298 #ifdef AARCH64
2299   __ sub(SP, SP, Rtemp, ex_uxtx);
2300 #else
2301   __ sub(SP, SP, Rtemp);
2302 #endif
2303 #ifndef AARCH64
2304   //  __ ldr(FP, Address(FP));
2305   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2306 #endif // AARCH64
2307 
2308   // Push interpreter frames in a loop
2309   Label loop;
2310   __ bind(loop);
2311   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2312   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2313 
2314   __ raw_push(FP, LR);                                     // create new frame
2315   __ mov(FP, SP);
2316   __ sub(Rtemp, Rtemp, 2*wordSize);
2317 
2318 #ifdef AARCH64
2319   __ sub(SP, SP, Rtemp, ex_uxtx);
2320 #else
2321   __ sub(SP, SP, Rtemp);
2322 #endif // AARCH64
2323 
2324   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2325 #ifdef AARCH64
2326   __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2327 #else
2328   __ mov(LR, 0);
2329   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2330 #endif // AARCH64
2331   __ subs(R8, R8, 1);                               // decrement counter
2332   __ mov(Rsender, SP);
2333   __ b(loop, ne);
2334 
2335   // Re-push self-frame
2336   __ ldr(LR, Address(R2));
2337   __ raw_push(FP, LR);
2338   __ mov(FP, SP);
2339 
2340   // Call unpack_frames with proper arguments
2341   __ mov(R0, Rthread);
2342   __ mov(R1, Deoptimization::Unpack_uncommon_trap);
2343   __ set_last_Java_frame(SP, FP, false, Rtemp);
2344   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2345   //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
2346   __ reset_last_Java_frame(Rtemp);
2347 
2348   __ mov(SP, FP);
2349 #ifdef AARCH64
2350   __ raw_pop(FP, LR);
2351   __ ret();
2352 #else
2353   __ pop(RegisterSet(FP) | RegisterSet(PC));
2354 #endif
2355 
2356   masm->flush();
2357   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
2358 }
2359 
2360 #endif // COMPILER2
2361 
2362 //------------------------------generate_handler_blob------
2363 //
2364 // Generate a special Compile2Runtime blob that saves all registers,
2365 // setup oopmap, and calls safepoint code to stop the compiled code for
2366 // a safepoint.
2367 //
2368 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2369   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2370 
2371   ResourceMark rm;
2372   CodeBuffer buffer("handler_blob", 256, 256);
2373   int frame_size_words;
2374   OopMapSet* oop_maps;
2375 
2376   bool cause_return = (poll_type == POLL_AT_RETURN);
2377 
2378   MacroAssembler* masm = new MacroAssembler(&buffer);
2379   address start = __ pc();
2380   oop_maps = new OopMapSet();
2381 
2382   if (!cause_return) {
2383 #ifdef AARCH64
2384     __ raw_push(LR, LR);
2385 #else
2386     __ sub(SP, SP, 4); // make room for LR which may still be live
2387                        // here if we are coming from a c2 method
2388 #endif // AARCH64
2389   }
2390 
2391   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
2392   if (!cause_return) {
2393     // update saved PC with correct value
2394     // need 2 steps because LR can be live in c2 method
2395     __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
2396     __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
2397   }
2398 
2399   __ mov(R0, Rthread);
2400   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
2401   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2402   __ call(call_ptr);
2403   if (pc_offset == -1) {
2404     pc_offset = __ offset();
2405   }
2406   oop_maps->add_gc_map(pc_offset, map);
2407   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2408 
2409   // Check for pending exception
2410   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
2411   __ cmp(Rtemp, 0);
2412 
2413 #ifdef AARCH64
2414   RegisterSaver::restore_live_registers(masm, cause_return);
2415   Register ret_addr = cause_return ? LR : Rtemp;
2416   if (!cause_return) {
2417     __ raw_pop(FP, ret_addr);
2418   }
2419 
2420   Label throw_exception;
2421   __ b(throw_exception, ne);
2422   __ br(ret_addr);
2423 
2424   __ bind(throw_exception);
2425   __ mov(Rexception_pc, ret_addr);
2426 #else // AARCH64
2427   if (!cause_return) {
2428     RegisterSaver::restore_live_registers(masm, false);
2429     __ pop(PC, eq);
2430     __ pop(Rexception_pc);
2431   } else {
2432     RegisterSaver::restore_live_registers(masm);
2433     __ bx(LR, eq);
2434     __ mov(Rexception_pc, LR);
2435   }
2436 #endif // AARCH64
2437 
2438   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2439 
2440   __ flush();
2441 
2442   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
2443 }
2444 
2445 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2446   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2447 
2448   ResourceMark rm;
2449   CodeBuffer buffer(name, 1000, 512);
2450   int frame_size_words;
2451   OopMapSet *oop_maps;
2452   int frame_complete;
2453 
2454   MacroAssembler* masm = new MacroAssembler(&buffer);
2455   Label pending_exception;
2456 
2457   int start = __ offset();
2458 
2459   oop_maps = new OopMapSet();
2460   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words);
2461 
2462   frame_complete = __ offset();
2463 
2464   __ mov(R0, Rthread);
2465 
2466   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2467   assert(start == 0, "warning: start differs from code_begin");
2468   __ call(destination);
2469   if (pc_offset == -1) {
2470     pc_offset = __ offset();
2471   }
2472   oop_maps->add_gc_map(pc_offset, map);
2473   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2474 
2475   __ ldr(R1, Address(Rthread, Thread::pending_exception_offset()));
2476   __ cbnz(R1, pending_exception);
2477 
2478   // Overwrite saved register values
2479 
2480   // Place metadata result of VM call into Rmethod
2481   __ get_vm_result_2(R1, Rtemp);
2482   __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize));
2483 
2484   // Place target address (VM call result) into Rtemp
2485   __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize));
2486 
2487   RegisterSaver::restore_live_registers(masm);
2488   __ jump(Rtemp);
2489 
2490   __ bind(pending_exception);
2491 
2492   RegisterSaver::restore_live_registers(masm);
2493   const Register Rzero = __ zero_register(Rtemp);
2494   __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset()));
2495   __ mov(Rexception_pc, LR);
2496   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2497 
2498   __ flush();
2499 
2500   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
2501 }