1 /*
   2  * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "assembler_arm.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "logging/log.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "runtime/sharedRuntime.hpp"
  36 #include "runtime/vframeArray.hpp"
  37 #include "utilities/align.hpp"
  38 #include "vmreg_arm.inline.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_Runtime1.hpp"
  41 #endif
  42 #ifdef COMPILER2
  43 #include "opto/runtime.hpp"
  44 #endif
  45 #ifdef SHARK
  46 #include "compiler/compileBroker.hpp"
  47 #include "shark/sharkCompiler.hpp"
  48 #endif
  49 
  50 #define __ masm->
  51 
  52 class RegisterSaver {
  53 public:
  54 
  55   // Special registers:
  56   //              32-bit ARM     64-bit ARM
  57   //  Rthread:       R10            R28
  58   //  LR:            R14            R30
  59 
  60   // Rthread is callee saved in the C ABI and never changed by compiled code:
  61   // no need to save it.
  62 
  63   // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
  64   // The one at LR_offset is a return address that is needed by stack walking.
  65   // A c2 method uses LR as a standard register so it may be live when we
  66   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
  67   // in case it's live in the method we are coming from.
  68 
  69 #ifdef AARCH64
  70 
  71   //
  72   // On AArch64 registers save area has the following layout:
  73   //
  74   // |---------------------|
  75   // | return address (LR) |
  76   // | FP                  |
  77   // |---------------------|
  78   // | V31                 |
  79   // | ...                 |
  80   // | V0                  |
  81   // |---------------------|
  82   // | padding             |
  83   // | R30 (LR live value) |
  84   // |---------------------|
  85   // | R27                 |
  86   // | ...                 |
  87   // | R0                  |
  88   // |---------------------| <-- SP
  89   //
  90 
  91   enum RegisterLayout {
  92     number_of_saved_gprs = 28,
  93     number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
  94     words_per_fpr = ConcreteRegisterImpl::words_per_fpr,
  95 
  96     R0_offset  = 0,
  97     R30_offset = R0_offset + number_of_saved_gprs,
  98     D0_offset  = R30_offset + 2,
  99     FP_offset  = D0_offset + number_of_saved_fprs * words_per_fpr,
 100     LR_offset  = FP_offset + 1,
 101 
 102     reg_save_size = LR_offset + 1,
 103   };
 104 
 105   static const int Rmethod_offset;
 106   static const int Rtemp_offset;
 107 
 108 #else
 109 
 110   enum RegisterLayout {
 111     fpu_save_size = FloatRegisterImpl::number_of_registers,
 112 #ifndef __SOFTFP__
 113     D0_offset = 0,
 114 #endif
 115     R0_offset = fpu_save_size,
 116     R1_offset,
 117     R2_offset,
 118     R3_offset,
 119     R4_offset,
 120     R5_offset,
 121     R6_offset,
 122 #if (FP_REG_NUM != 7)
 123     // if not saved as FP
 124     R7_offset,
 125 #endif
 126     R8_offset,
 127     R9_offset,
 128 #if (FP_REG_NUM != 11)
 129     // if not saved as FP
 130     R11_offset,
 131 #endif
 132     R12_offset,
 133     R14_offset,
 134     FP_offset,
 135     LR_offset,
 136     reg_save_size,
 137 
 138     Rmethod_offset = R9_offset,
 139     Rtemp_offset = R12_offset,
 140   };
 141 
 142   // all regs but Rthread (R10), FP (R7 or R11), SP and PC
 143   // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
 144 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 145 
 146 #endif // AARCH64
 147 
 148   //  When LR may be live in the nmethod from which we are comming
 149   //  then lr_saved is true, the return address is saved before the
 150   //  call to save_live_register by the caller and LR contains the
 151   //  live value.
 152 
 153   static OopMap* save_live_registers(MacroAssembler* masm,
 154                                      int* total_frame_words,
 155                                      bool lr_saved = false);
 156   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
 157 
 158 };
 159 
 160 
 161 #ifdef AARCH64
 162 const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding();
 163 const int RegisterSaver::Rtemp_offset   = RegisterSaver::R0_offset + Rtemp->encoding();
 164 #endif // AARCH64
 165 
 166 
 167 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
 168                                            int* total_frame_words,
 169                                            bool lr_saved) {
 170   *total_frame_words = reg_save_size;
 171 
 172   OopMapSet *oop_maps = new OopMapSet();
 173   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 174 
 175 #ifdef AARCH64
 176   assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
 177 
 178   if (lr_saved) {
 179     // LR was stashed here, so that jump could use it as a scratch reg
 180     __ ldr(LR, Address(SP, 0));
 181     // There are two words on the stack top:
 182     //  [SP + 0]: placeholder for FP
 183     //  [SP + wordSize]: saved return address
 184     __ str(FP, Address(SP, 0));
 185   } else {
 186     __ raw_push(FP, LR);
 187   }
 188 
 189   __ sub(SP, SP, (reg_save_size - 2) * wordSize);
 190 
 191   for (int i = 0; i < number_of_saved_gprs; i += 2) {
 192     int offset = R0_offset + i;
 193     __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize));
 194     map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
 195     map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg());
 196   }
 197 
 198   __ str(R30, Address(SP, R30_offset * wordSize));
 199   map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg());
 200 
 201   for (int i = 0; i < number_of_saved_fprs; i += 2) {
 202     int offset1 = D0_offset + i * words_per_fpr;
 203     int offset2 = offset1 + words_per_fpr;
 204     Address base(SP, offset1 * wordSize);
 205     if (words_per_fpr == 2) {
 206       // pair of "wide" quad vector registers
 207       __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
 208     } else {
 209       // pair of double vector registers
 210       __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
 211     }
 212     map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
 213     map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg());
 214   }
 215 #else
 216   if (lr_saved) {
 217     __ push(RegisterSet(FP));
 218   } else {
 219     __ push(RegisterSet(FP) | RegisterSet(LR));
 220   }
 221   __ push(SAVED_BASE_REGS);
 222   if (HaveVFP) {
 223     if (VM_Version::has_vfp3_32()) {
 224       __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
 225     } else {
 226       if (FloatRegisterImpl::number_of_registers > 32) {
 227         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 228         __ sub(SP, SP, 32 * wordSize);
 229       }
 230     }
 231     __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
 232   } else {
 233     __ sub(SP, SP, fpu_save_size * wordSize);
 234   }
 235 
 236   int i;
 237   int j=0;
 238   for (i = R0_offset; i <= R9_offset; i++) {
 239     if (j == FP_REG_NUM) {
 240       // skip the FP register, managed below.
 241       j++;
 242     }
 243     map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
 244     j++;
 245   }
 246   assert(j == R10->encoding(), "must be");
 247 #if (FP_REG_NUM != 11)
 248   // add R11, if not managed as FP
 249   map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
 250 #endif
 251   map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
 252   map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
 253   if (HaveVFP) {
 254     for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
 255       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
 256       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
 257     }
 258   }
 259 #endif // AARCH64
 260 
 261   return map;
 262 }
 263 
 264 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
 265 #ifdef AARCH64
 266   for (int i = 0; i < number_of_saved_gprs; i += 2) {
 267     __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
 268   }
 269 
 270   __ ldr(R30, Address(SP, R30_offset * wordSize));
 271 
 272   for (int i = 0; i < number_of_saved_fprs; i += 2) {
 273     Address base(SP, (D0_offset + i * words_per_fpr) * wordSize);
 274     if (words_per_fpr == 2) {
 275       // pair of "wide" quad vector registers
 276       __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
 277     } else {
 278       // pair of double vector registers
 279       __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
 280     }
 281   }
 282 
 283   __ add(SP, SP, (reg_save_size - 2) * wordSize);
 284 
 285   if (restore_lr) {
 286     __ raw_pop(FP, LR);
 287   } else {
 288     __ ldr(FP, Address(SP, 0));
 289   }
 290 #else
 291   if (HaveVFP) {
 292     __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
 293     if (VM_Version::has_vfp3_32()) {
 294       __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
 295     } else {
 296       if (FloatRegisterImpl::number_of_registers > 32) {
 297         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 298         __ add(SP, SP, 32 * wordSize);
 299       }
 300     }
 301   } else {
 302     __ add(SP, SP, fpu_save_size * wordSize);
 303   }
 304   __ pop(SAVED_BASE_REGS);
 305   if (restore_lr) {
 306     __ pop(RegisterSet(FP) | RegisterSet(LR));
 307   } else {
 308     __ pop(RegisterSet(FP));
 309   }
 310 #endif // AARCH64
 311 }
 312 
 313 #ifdef AARCH64
 314 
 315 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 316   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 317     __ str_d(D0, Address(SP, -2*wordSize, pre_indexed));
 318   } else {
 319     __ raw_push(R0, ZR);
 320   }
 321 }
 322 
 323 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 324   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 325     __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed));
 326   } else {
 327     __ raw_pop(R0, ZR);
 328   }
 329 }
 330 
 331 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 332   __ raw_push(R0, R1);
 333   __ raw_push(R2, R3);
 334   __ raw_push(R4, R5);
 335   __ raw_push(R6, R7);
 336 
 337   assert(FPR_PARAMS == 8, "adjust this code");
 338   assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
 339 
 340   if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed));
 341   if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed));
 342   if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed));
 343   if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed));
 344 }
 345 
 346 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 347   assert(FPR_PARAMS == 8, "adjust this code");
 348   assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
 349 
 350   if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed));
 351   if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed));
 352   if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed));
 353   if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed));
 354 
 355   __ raw_pop(R6, R7);
 356   __ raw_pop(R4, R5);
 357   __ raw_pop(R2, R3);
 358   __ raw_pop(R0, R1);
 359 }
 360 
 361 #else // AARCH64
 362 
 363 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 364 #ifdef __ABI_HARD__
 365   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 366     __ sub(SP, SP, 8);
 367     __ fstd(D0, Address(SP));
 368     return;
 369   }
 370 #endif // __ABI_HARD__
 371   __ raw_push(R0, R1);
 372 }
 373 
 374 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 375 #ifdef __ABI_HARD__
 376   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 377     __ fldd(D0, Address(SP));
 378     __ add(SP, SP, 8);
 379     return;
 380   }
 381 #endif // __ABI_HARD__
 382   __ raw_pop(R0, R1);
 383 }
 384 
 385 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 386   // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment
 387   __ push(RegisterSet(R0, R3));
 388 
 389 #ifdef __ABI_HARD__
 390   // preserve arguments
 391   // Likely not needed as the locking code won't probably modify volatile FP registers,
 392   // but there is no way to guarantee that
 393   if (fp_regs_in_arguments) {
 394     // convert fp_regs_in_arguments to a number of double registers
 395     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 396     __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 397   }
 398 #endif // __ ABI_HARD__
 399 }
 400 
 401 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 402 #ifdef __ABI_HARD__
 403   if (fp_regs_in_arguments) {
 404     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 405     __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 406   }
 407 #endif // __ABI_HARD__
 408 
 409   __ pop(RegisterSet(R0, R3));
 410 }
 411 
 412 #endif // AARCH64
 413 
 414 
 415 // Is vector's size (in bytes) bigger than a size saved by default?
 416 // All vector registers are saved by default on ARM.
 417 bool SharedRuntime::is_wide_vector(int size) {
 418   return false;
 419 }
 420 
 421 size_t SharedRuntime::trampoline_size() {
 422   return 16;
 423 }
 424 
 425 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 426   InlinedAddress dest(destination);
 427   __ indirect_jump(dest, Rtemp);
 428   __ bind_literal(dest);
 429 }
 430 
 431 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 432                                         VMRegPair *regs,
 433                                         VMRegPair *regs2,
 434                                         int total_args_passed) {
 435   assert(regs2 == NULL, "not needed on arm");
 436 #ifdef AARCH64
 437   int slot = 0; // counted in 32-bit VMReg slots
 438   int reg = 0;
 439   int fp_reg = 0;
 440   for (int i = 0; i < total_args_passed; i++) {
 441     switch (sig_bt[i]) {
 442     case T_SHORT:
 443     case T_CHAR:
 444     case T_BYTE:
 445     case T_BOOLEAN:
 446     case T_INT:
 447       if (reg < GPR_PARAMS) {
 448         Register r = as_Register(reg);
 449         regs[i].set1(r->as_VMReg());
 450         reg++;
 451       } else {
 452         regs[i].set1(VMRegImpl::stack2reg(slot));
 453         slot+=2;
 454       }
 455       break;
 456     case T_LONG:
 457       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 458       // fall through
 459     case T_ARRAY:
 460     case T_OBJECT:
 461     case T_ADDRESS:
 462       if (reg < GPR_PARAMS) {
 463         Register r = as_Register(reg);
 464         regs[i].set2(r->as_VMReg());
 465         reg++;
 466       } else {
 467         regs[i].set2(VMRegImpl::stack2reg(slot));
 468         slot+=2;
 469       }
 470       break;
 471     case T_FLOAT:
 472       if (fp_reg < FPR_PARAMS) {
 473         FloatRegister r = as_FloatRegister(fp_reg);
 474         regs[i].set1(r->as_VMReg());
 475         fp_reg++;
 476       } else {
 477         regs[i].set1(VMRegImpl::stack2reg(slot));
 478         slot+=2;
 479       }
 480       break;
 481     case T_DOUBLE:
 482       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 483       if (fp_reg < FPR_PARAMS) {
 484         FloatRegister r = as_FloatRegister(fp_reg);
 485         regs[i].set2(r->as_VMReg());
 486         fp_reg++;
 487       } else {
 488         regs[i].set2(VMRegImpl::stack2reg(slot));
 489         slot+=2;
 490       }
 491       break;
 492     case T_VOID:
 493       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 494       regs[i].set_bad();
 495       break;
 496     default:
 497       ShouldNotReachHere();
 498     }
 499   }
 500   return slot;
 501 
 502 #else // AARCH64
 503 
 504   int slot = 0;
 505   int ireg = 0;
 506 #ifdef __ABI_HARD__
 507   int fp_slot = 0;
 508   int single_fpr_slot = 0;
 509 #endif // __ABI_HARD__
 510   for (int i = 0; i < total_args_passed; i++) {
 511     switch (sig_bt[i]) {
 512     case T_SHORT:
 513     case T_CHAR:
 514     case T_BYTE:
 515     case T_BOOLEAN:
 516     case T_INT:
 517     case T_ARRAY:
 518     case T_OBJECT:
 519     case T_ADDRESS:
 520 #ifndef __ABI_HARD__
 521     case T_FLOAT:
 522 #endif // !__ABI_HARD__
 523       if (ireg < 4) {
 524         Register r = as_Register(ireg);
 525         regs[i].set1(r->as_VMReg());
 526         ireg++;
 527       } else {
 528         regs[i].set1(VMRegImpl::stack2reg(slot));
 529         slot++;
 530       }
 531       break;
 532     case T_LONG:
 533 #ifndef __ABI_HARD__
 534     case T_DOUBLE:
 535 #endif // !__ABI_HARD__
 536       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 537       if (ireg <= 2) {
 538 #if (ALIGN_WIDE_ARGUMENTS == 1)
 539         if(ireg & 1) ireg++;  // Aligned location required
 540 #endif
 541         Register r1 = as_Register(ireg);
 542         Register r2 = as_Register(ireg + 1);
 543         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 544         ireg += 2;
 545 #if (ALIGN_WIDE_ARGUMENTS == 0)
 546       } else if (ireg == 3) {
 547         // uses R3 + one stack slot
 548         Register r = as_Register(ireg);
 549         regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg());
 550         ireg += 1;
 551         slot += 1;
 552 #endif
 553       } else {
 554         if (slot & 1) slot++; // Aligned location required
 555         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 556         slot += 2;
 557         ireg = 4;
 558       }
 559       break;
 560     case T_VOID:
 561       regs[i].set_bad();
 562       break;
 563 #ifdef __ABI_HARD__
 564     case T_FLOAT:
 565       if ((fp_slot < 16)||(single_fpr_slot & 1)) {
 566         if ((single_fpr_slot & 1) == 0) {
 567           single_fpr_slot = fp_slot;
 568           fp_slot += 2;
 569         }
 570         FloatRegister r = as_FloatRegister(single_fpr_slot);
 571         single_fpr_slot++;
 572         regs[i].set1(r->as_VMReg());
 573       } else {
 574         regs[i].set1(VMRegImpl::stack2reg(slot));
 575         slot++;
 576       }
 577       break;
 578     case T_DOUBLE:
 579       assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
 580       if (fp_slot <= 14) {
 581         FloatRegister r1 = as_FloatRegister(fp_slot);
 582         FloatRegister r2 = as_FloatRegister(fp_slot+1);
 583         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 584         fp_slot += 2;
 585       } else {
 586         if(slot & 1) slot++;
 587         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 588         slot += 2;
 589         single_fpr_slot = 16;
 590       }
 591       break;
 592 #endif // __ABI_HARD__
 593     default:
 594       ShouldNotReachHere();
 595     }
 596   }
 597   return slot;
 598 #endif // AARCH64
 599 }
 600 
 601 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 602                                            VMRegPair *regs,
 603                                            int total_args_passed,
 604                                            int is_outgoing) {
 605 #ifdef AARCH64
 606   // C calling convention on AArch64 is good enough.
 607   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 608 #else
 609 #ifdef __SOFTFP__
 610   // soft float is the same as the C calling convention.
 611   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 612 #endif // __SOFTFP__
 613   (void) is_outgoing;
 614   int slot = 0;
 615   int ireg = 0;
 616   int freg = 0;
 617   int single_fpr = 0;
 618 
 619   for (int i = 0; i < total_args_passed; i++) {
 620     switch (sig_bt[i]) {
 621     case T_SHORT:
 622     case T_CHAR:
 623     case T_BYTE:
 624     case T_BOOLEAN:
 625     case T_INT:
 626     case T_ARRAY:
 627     case T_OBJECT:
 628     case T_ADDRESS:
 629       if (ireg < 4) {
 630         Register r = as_Register(ireg++);
 631         regs[i].set1(r->as_VMReg());
 632       } else {
 633         regs[i].set1(VMRegImpl::stack2reg(slot++));
 634       }
 635       break;
 636     case T_FLOAT:
 637       // C2 utilizes S14/S15 for mem-mem moves
 638       if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) {
 639         if ((single_fpr & 1) == 0) {
 640           single_fpr = freg;
 641           freg += 2;
 642         }
 643         FloatRegister r = as_FloatRegister(single_fpr++);
 644         regs[i].set1(r->as_VMReg());
 645       } else {
 646         regs[i].set1(VMRegImpl::stack2reg(slot++));
 647       }
 648       break;
 649     case T_DOUBLE:
 650       // C2 utilizes S14/S15 for mem-mem moves
 651       if (freg <= 14 COMPILER2_PRESENT(-2)) {
 652         FloatRegister r1 = as_FloatRegister(freg);
 653         FloatRegister r2 = as_FloatRegister(freg + 1);
 654         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 655         freg += 2;
 656       } else {
 657         // Keep internally the aligned calling convention,
 658         // ignoring ALIGN_WIDE_ARGUMENTS
 659         if (slot & 1) slot++;
 660         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 661         slot += 2;
 662         single_fpr = 16;
 663       }
 664       break;
 665     case T_LONG:
 666       // Keep internally the aligned calling convention,
 667       // ignoring ALIGN_WIDE_ARGUMENTS
 668       if (ireg <= 2) {
 669         if (ireg & 1) ireg++;
 670         Register r1 = as_Register(ireg);
 671         Register r2 = as_Register(ireg + 1);
 672         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 673         ireg += 2;
 674       } else {
 675         if (slot & 1) slot++;
 676         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 677         slot += 2;
 678         ireg = 4;
 679       }
 680       break;
 681     case T_VOID:
 682       regs[i].set_bad();
 683       break;
 684     default:
 685       ShouldNotReachHere();
 686     }
 687   }
 688 
 689   if (slot & 1) slot++;
 690   return slot;
 691 #endif // AARCH64
 692 }
 693 
 694 static void patch_callers_callsite(MacroAssembler *masm) {
 695   Label skip;
 696 
 697   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 698   __ cbz(Rtemp, skip);
 699 
 700 #ifdef AARCH64
 701   push_param_registers(masm, FPR_PARAMS);
 702   __ raw_push(LR, ZR);
 703 #else
 704   // Pushing an even number of registers for stack alignment.
 705   // Selecting R9, which had to be saved anyway for some platforms.
 706   __ push(RegisterSet(R0, R3) | R9 | LR);
 707 #endif // AARCH64
 708 
 709   __ mov(R0, Rmethod);
 710   __ mov(R1, LR);
 711   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 712 
 713 #ifdef AARCH64
 714   __ raw_pop(LR, ZR);
 715   pop_param_registers(masm, FPR_PARAMS);
 716 #else
 717   __ pop(RegisterSet(R0, R3) | R9 | LR);
 718 #endif // AARCH64
 719 
 720   __ bind(skip);
 721 }
 722 
 723 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 724                                     int total_args_passed, int comp_args_on_stack,
 725                                     const BasicType *sig_bt, const VMRegPair *regs) {
 726   // TODO: ARM - May be can use ldm to load arguments
 727   const Register tmp = Rtemp; // avoid erasing R5_mh
 728 
 729   // Next assert may not be needed but safer. Extra analysis required
 730   // if this there is not enough free registers and we need to use R5 here.
 731   assert_different_registers(tmp, R5_mh);
 732 
 733   // 6243940 We might end up in handle_wrong_method if
 734   // the callee is deoptimized as we race thru here. If that
 735   // happens we don't want to take a safepoint because the
 736   // caller frame will look interpreted and arguments are now
 737   // "compiled" so it is much better to make this transition
 738   // invisible to the stack walking code. Unfortunately if
 739   // we try and find the callee by normal means a safepoint
 740   // is possible. So we stash the desired callee in the thread
 741   // and the vm will find there should this case occur.
 742   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
 743   __ str(Rmethod, callee_target_addr);
 744 
 745 #ifdef AARCH64
 746 
 747   assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod);
 748   assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams);
 749 
 750   if (comp_args_on_stack) {
 751     __ sub_slow(SP, SP, align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes));
 752   }
 753 
 754   for (int i = 0; i < total_args_passed; i++) {
 755     if (sig_bt[i] == T_VOID) {
 756       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 757       continue;
 758     }
 759     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 760 
 761     int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
 762     Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
 763 
 764     VMReg r = regs[i].first();
 765     bool full_word = regs[i].second()->is_valid();
 766 
 767     if (r->is_stack()) {
 768       if (full_word) {
 769         __ ldr(tmp, source_addr);
 770         __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 771       } else {
 772         __ ldr_w(tmp, source_addr);
 773         __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 774       }
 775     } else if (r->is_Register()) {
 776       if (full_word) {
 777         __ ldr(r->as_Register(), source_addr);
 778       } else {
 779         __ ldr_w(r->as_Register(), source_addr);
 780       }
 781     } else if (r->is_FloatRegister()) {
 782       if (sig_bt[i] == T_DOUBLE) {
 783         __ ldr_d(r->as_FloatRegister(), source_addr);
 784       } else {
 785         __ ldr_s(r->as_FloatRegister(), source_addr);
 786       }
 787     } else {
 788       assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
 789     }
 790   }
 791 
 792   __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
 793   __ br(tmp);
 794 
 795 #else
 796 
 797   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 798 
 799   const Register initial_sp = Rmethod; // temporarily scratched
 800 
 801   // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
 802   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
 803 
 804   __ mov(initial_sp, SP);
 805 
 806   if (comp_args_on_stack) {
 807     __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
 808   }
 809   __ bic(SP, SP, StackAlignmentInBytes - 1);
 810 
 811   for (int i = 0; i < total_args_passed; i++) {
 812     if (sig_bt[i] == T_VOID) {
 813       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 814       continue;
 815     }
 816     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 817     int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i);
 818 
 819     VMReg r_1 = regs[i].first();
 820     VMReg r_2 = regs[i].second();
 821     if (r_1->is_stack()) {
 822       int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size;
 823       if (!r_2->is_valid()) {
 824         __ ldr(tmp, Address(initial_sp, arg_offset));
 825         __ str(tmp, Address(SP, stack_offset));
 826       } else {
 827         __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 828         __ str(tmp, Address(SP, stack_offset));
 829         __ ldr(tmp, Address(initial_sp, arg_offset));
 830         __ str(tmp, Address(SP, stack_offset + wordSize));
 831       }
 832     } else if (r_1->is_Register()) {
 833       if (!r_2->is_valid()) {
 834         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset));
 835       } else {
 836         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 837         __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
 838       }
 839     } else if (r_1->is_FloatRegister()) {
 840 #ifdef __SOFTFP__
 841       ShouldNotReachHere();
 842 #endif // __SOFTFP__
 843       if (!r_2->is_valid()) {
 844         __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
 845       } else {
 846         __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 847       }
 848     } else {
 849       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 850     }
 851   }
 852 
 853   // restore Rmethod (scratched for initial_sp)
 854   __ ldr(Rmethod, callee_target_addr);
 855   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 856 
 857 #endif // AARCH64
 858 }
 859 
 860 static void gen_c2i_adapter(MacroAssembler *masm,
 861                             int total_args_passed,  int comp_args_on_stack,
 862                             const BasicType *sig_bt, const VMRegPair *regs,
 863                             Label& skip_fixup) {
 864   // TODO: ARM - May be can use stm to deoptimize arguments
 865   const Register tmp = Rtemp;
 866 
 867   patch_callers_callsite(masm);
 868   __ bind(skip_fixup);
 869 
 870   __ mov(Rsender_sp, SP); // not yet saved
 871 
 872 #ifdef AARCH64
 873 
 874   int extraspace = align_up(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes);
 875   if (extraspace) {
 876     __ sub(SP, SP, extraspace);
 877   }
 878 
 879   for (int i = 0; i < total_args_passed; i++) {
 880     if (sig_bt[i] == T_VOID) {
 881       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 882       continue;
 883     }
 884 
 885     int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
 886     Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
 887 
 888     VMReg r = regs[i].first();
 889     bool full_word = regs[i].second()->is_valid();
 890 
 891     if (r->is_stack()) {
 892       if (full_word) {
 893         __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
 894         __ str(tmp, dest_addr);
 895       } else {
 896         __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
 897         __ str_w(tmp, dest_addr);
 898       }
 899     } else if (r->is_Register()) {
 900       if (full_word) {
 901         __ str(r->as_Register(), dest_addr);
 902       } else {
 903         __ str_w(r->as_Register(), dest_addr);
 904       }
 905     } else if (r->is_FloatRegister()) {
 906       if (sig_bt[i] == T_DOUBLE) {
 907         __ str_d(r->as_FloatRegister(), dest_addr);
 908       } else {
 909         __ str_s(r->as_FloatRegister(), dest_addr);
 910       }
 911     } else {
 912       assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
 913     }
 914   }
 915 
 916   __ mov(Rparams, SP);
 917 
 918   __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset()));
 919   __ br(tmp);
 920 
 921 #else
 922 
 923   int extraspace = total_args_passed * Interpreter::stackElementSize;
 924   if (extraspace) {
 925     __ sub_slow(SP, SP, extraspace);
 926   }
 927 
 928   for (int i = 0; i < total_args_passed; i++) {
 929     if (sig_bt[i] == T_VOID) {
 930       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 931       continue;
 932     }
 933     int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
 934 
 935     VMReg r_1 = regs[i].first();
 936     VMReg r_2 = regs[i].second();
 937     if (r_1->is_stack()) {
 938       int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 939       if (!r_2->is_valid()) {
 940         __ ldr(tmp, Address(SP, arg_offset));
 941         __ str(tmp, Address(SP, stack_offset));
 942       } else {
 943         __ ldr(tmp, Address(SP, arg_offset));
 944         __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize));
 945         __ ldr(tmp, Address(SP, arg_offset + wordSize));
 946         __ str(tmp, Address(SP, stack_offset));
 947       }
 948     } else if (r_1->is_Register()) {
 949       if (!r_2->is_valid()) {
 950         __ str(r_1->as_Register(), Address(SP, stack_offset));
 951       } else {
 952         __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
 953         __ str(r_2->as_Register(), Address(SP, stack_offset));
 954       }
 955     } else if (r_1->is_FloatRegister()) {
 956 #ifdef __SOFTFP__
 957       ShouldNotReachHere();
 958 #endif // __SOFTFP__
 959       if (!r_2->is_valid()) {
 960         __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
 961       } else {
 962         __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
 963       }
 964     } else {
 965       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 966     }
 967   }
 968 
 969   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 970 
 971 #endif // AARCH64
 972 }
 973 
 974 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 975                                                             int total_args_passed,
 976                                                             int comp_args_on_stack,
 977                                                             const BasicType *sig_bt,
 978                                                             const VMRegPair *regs,
 979                                                             AdapterFingerPrint* fingerprint) {
 980   address i2c_entry = __ pc();
 981   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 982 
 983   address c2i_unverified_entry = __ pc();
 984   Label skip_fixup;
 985   const Register receiver       = R0;
 986   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
 987   const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4);
 988 
 989   __ load_klass(receiver_klass, receiver);
 990   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
 991   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_method_offset()));
 992   __ cmp(receiver_klass, holder_klass);
 993 
 994 #ifdef AARCH64
 995   Label ic_miss;
 996   __ b(ic_miss, ne);
 997   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 998   __ cbz(Rtemp, skip_fixup);
 999   __ bind(ic_miss);
1000   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
1001 #else
1002   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
1003   __ cmp(Rtemp, 0, eq);
1004   __ b(skip_fixup, eq);
1005   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
1006 #endif // AARCH64
1007 
1008   address c2i_entry = __ pc();
1009   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1010 
1011   __ flush();
1012   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1013 }
1014 
1015 
1016 static int reg2offset_in(VMReg r) {
1017   // Account for saved FP and LR
1018   return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
1019 }
1020 
1021 static int reg2offset_out(VMReg r) {
1022   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1023 }
1024 
1025 
1026 static void verify_oop_args(MacroAssembler* masm,
1027                             const methodHandle& method,
1028                             const BasicType* sig_bt,
1029                             const VMRegPair* regs) {
1030   Register temp_reg = Rmethod;  // not part of any compiled calling seq
1031   if (VerifyOops) {
1032     for (int i = 0; i < method->size_of_parameters(); i++) {
1033       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
1034         VMReg r = regs[i].first();
1035         assert(r->is_valid(), "bad oop arg");
1036         if (r->is_stack()) {
1037           __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1038           __ verify_oop(temp_reg);
1039         } else {
1040           __ verify_oop(r->as_Register());
1041         }
1042       }
1043     }
1044   }
1045 }
1046 
1047 static void gen_special_dispatch(MacroAssembler* masm,
1048                                  const methodHandle& method,
1049                                  const BasicType* sig_bt,
1050                                  const VMRegPair* regs) {
1051   verify_oop_args(masm, method, sig_bt, regs);
1052   vmIntrinsics::ID iid = method->intrinsic_id();
1053 
1054   // Now write the args into the outgoing interpreter space
1055   bool     has_receiver   = false;
1056   Register receiver_reg   = noreg;
1057   int      member_arg_pos = -1;
1058   Register member_reg     = noreg;
1059   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1060   if (ref_kind != 0) {
1061     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1062     member_reg = Rmethod;  // known to be free at this point
1063     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1064   } else if (iid == vmIntrinsics::_invokeBasic) {
1065     has_receiver = true;
1066   } else {
1067     fatal("unexpected intrinsic id %d", iid);
1068   }
1069 
1070   if (member_reg != noreg) {
1071     // Load the member_arg into register, if necessary.
1072     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1073     VMReg r = regs[member_arg_pos].first();
1074     if (r->is_stack()) {
1075       __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1076     } else {
1077       // no data motion is needed
1078       member_reg = r->as_Register();
1079     }
1080   }
1081 
1082   if (has_receiver) {
1083     // Make sure the receiver is loaded into a register.
1084     assert(method->size_of_parameters() > 0, "oob");
1085     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1086     VMReg r = regs[0].first();
1087     assert(r->is_valid(), "bad receiver arg");
1088     if (r->is_stack()) {
1089       // Porting note:  This assumes that compiled calling conventions always
1090       // pass the receiver oop in a register.  If this is not true on some
1091       // platform, pick a temp and load the receiver from stack.
1092       assert(false, "receiver always in a register");
1093       receiver_reg = j_rarg0;  // known to be free at this point
1094       __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1095     } else {
1096       // no data motion is needed
1097       receiver_reg = r->as_Register();
1098     }
1099   }
1100 
1101   // Figure out which address we are really jumping to:
1102   MethodHandles::generate_method_handle_dispatch(masm, iid,
1103                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1104 }
1105 
1106 // ---------------------------------------------------------------------------
1107 // Generate a native wrapper for a given method.  The method takes arguments
1108 // in the Java compiled code convention, marshals them to the native
1109 // convention (handlizes oops, etc), transitions to native, makes the call,
1110 // returns to java state (possibly blocking), unhandlizes any result and
1111 // returns.
1112 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1113                                                 const methodHandle& method,
1114                                                 int compile_id,
1115                                                 BasicType* in_sig_bt,
1116                                                 VMRegPair* in_regs,
1117                                                 BasicType ret_type) {
1118   if (method->is_method_handle_intrinsic()) {
1119     vmIntrinsics::ID iid = method->intrinsic_id();
1120     intptr_t start = (intptr_t)__ pc();
1121     int vep_offset = ((intptr_t)__ pc()) - start;
1122     gen_special_dispatch(masm,
1123                          method,
1124                          in_sig_bt,
1125                          in_regs);
1126     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1127     __ flush();
1128     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1129     return nmethod::new_native_nmethod(method,
1130                                        compile_id,
1131                                        masm->code(),
1132                                        vep_offset,
1133                                        frame_complete,
1134                                        stack_slots / VMRegImpl::slots_per_word,
1135                                        in_ByteSize(-1),
1136                                        in_ByteSize(-1),
1137                                        (OopMapSet*)NULL);
1138   }
1139   // Arguments for JNI method include JNIEnv and Class if static
1140 
1141   // Usage of Rtemp should be OK since scratched by native call
1142 
1143   bool is_static = method->is_static();
1144 
1145   const int total_in_args = method->size_of_parameters();
1146   int total_c_args = total_in_args + 1;
1147   if (is_static) {
1148     total_c_args++;
1149   }
1150 
1151   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1152   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1153 
1154   int argc = 0;
1155   out_sig_bt[argc++] = T_ADDRESS;
1156   if (is_static) {
1157     out_sig_bt[argc++] = T_OBJECT;
1158   }
1159 
1160   int i;
1161   for (i = 0; i < total_in_args; i++) {
1162     out_sig_bt[argc++] = in_sig_bt[i];
1163   }
1164 
1165   int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
1166   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1167   // Since object arguments need to be wrapped, we must preserve space
1168   // for those object arguments which come in registers (GPR_PARAMS maximum)
1169   // plus one more slot for Klass handle (for static methods)
1170   int oop_handle_offset = stack_slots;
1171   stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word;
1172 
1173   // Plus a lock if needed
1174   int lock_slot_offset = 0;
1175   if (method->is_synchronized()) {
1176     lock_slot_offset = stack_slots;
1177     assert(sizeof(BasicLock) == wordSize, "adjust this code");
1178     stack_slots += VMRegImpl::slots_per_word;
1179   }
1180 
1181   // Space to save return address and FP
1182   stack_slots += 2 * VMRegImpl::slots_per_word;
1183 
1184   // Calculate the final stack size taking account of alignment
1185   stack_slots = align_up(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size);
1186   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1187   int lock_slot_fp_offset = stack_size - 2 * wordSize -
1188     lock_slot_offset * VMRegImpl::stack_slot_size;
1189 
1190   // Unverified entry point
1191   address start = __ pc();
1192 
1193   // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
1194   const Register receiver = R0; // see receiverOpr()
1195   __ load_klass(Rtemp, receiver);
1196   __ cmp(Rtemp, Ricklass);
1197   Label verified;
1198 
1199   __ b(verified, eq); // jump over alignment no-ops too
1200   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
1201   __ align(CodeEntryAlignment);
1202 
1203   // Verified entry point
1204   __ bind(verified);
1205   int vep_offset = __ pc() - start;
1206 
1207 #ifdef AARCH64
1208   // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
1209   __ nop();
1210 #endif // AARCH64
1211 
1212   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
1213     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
1214     // instead of doing a full VM transition once it's been computed.
1215     Label slow_case;
1216     const Register obj_reg = R0;
1217 
1218     // Unlike for Object.hashCode, System.identityHashCode is static method and
1219     // gets object as argument instead of the receiver.
1220     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
1221       assert(method->is_static(), "method should be static");
1222       // return 0 for null reference input, return val = R0 = obj_reg = 0
1223 #ifdef AARCH64
1224       Label Continue;
1225       __ cbnz(obj_reg, Continue);
1226       __ ret();
1227       __ bind(Continue);
1228 #else
1229       __ cmp(obj_reg, 0);
1230       __ bx(LR, eq);
1231 #endif
1232     }
1233 
1234     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1235 
1236     assert(markOopDesc::unlocked_value == 1, "adjust this code");
1237     __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
1238 
1239     if (UseBiasedLocking) {
1240       assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
1241       __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
1242     }
1243 
1244 #ifdef AARCH64
1245     __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place);
1246     __ b(slow_case, eq);
1247     __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift);
1248     __ ret();
1249 #else
1250     __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
1251     __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
1252     __ bx(LR, ne);
1253 #endif // AARCH64
1254 
1255     __ bind(slow_case);
1256   }
1257 
1258   // Bang stack pages
1259   __ arm_stack_overflow_check(stack_size, Rtemp);
1260 
1261   // Setup frame linkage
1262   __ raw_push(FP, LR);
1263   __ mov(FP, SP);
1264   __ sub_slow(SP, SP, stack_size - 2*wordSize);
1265 
1266   int frame_complete = __ pc() - start;
1267 
1268   OopMapSet* oop_maps = new OopMapSet();
1269   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1270   const int extra_args = is_static ? 2 : 1;
1271   int receiver_offset = -1;
1272   int fp_regs_in_arguments = 0;
1273 
1274   for (i = total_in_args; --i >= 0; ) {
1275     switch (in_sig_bt[i]) {
1276     case T_ARRAY:
1277     case T_OBJECT: {
1278       VMReg src = in_regs[i].first();
1279       VMReg dst = out_regs[i + extra_args].first();
1280       if (src->is_stack()) {
1281         assert(dst->is_stack(), "must be");
1282         assert(i != 0, "Incoming receiver is always in a register");
1283         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1284         __ cmp(Rtemp, 0);
1285 #ifdef AARCH64
1286         __ add(Rtemp, FP, reg2offset_in(src));
1287         __ csel(Rtemp, ZR, Rtemp, eq);
1288 #else
1289         __ add(Rtemp, FP, reg2offset_in(src), ne);
1290 #endif // AARCH64
1291         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1292         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1293         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1294       } else {
1295         int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1296         __ str(src->as_Register(), Address(SP, offset));
1297         map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1298         if ((i == 0) && (!is_static)) {
1299           receiver_offset = offset;
1300         }
1301         oop_handle_offset += VMRegImpl::slots_per_word;
1302 
1303 #ifdef AARCH64
1304         __ cmp(src->as_Register(), 0);
1305         __ add(Rtemp, SP, offset);
1306         __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq);
1307         if (dst->is_stack()) {
1308           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1309         }
1310 #else
1311         if (dst->is_stack()) {
1312           __ movs(Rtemp, src->as_Register());
1313           __ add(Rtemp, SP, offset, ne);
1314           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1315         } else {
1316           __ movs(dst->as_Register(), src->as_Register());
1317           __ add(dst->as_Register(), SP, offset, ne);
1318         }
1319 #endif // AARCH64
1320       }
1321     }
1322 
1323     case T_VOID:
1324       break;
1325 
1326 #ifdef AARCH64
1327     case T_FLOAT:
1328     case T_DOUBLE: {
1329       VMReg src = in_regs[i].first();
1330       VMReg dst = out_regs[i + extra_args].first();
1331       if (src->is_stack()) {
1332         assert(dst->is_stack(), "must be");
1333         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1334         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1335       } else {
1336         assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be");
1337         assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be");
1338         fp_regs_in_arguments++;
1339       }
1340       break;
1341     }
1342 #else // AARCH64
1343 
1344 #ifdef __SOFTFP__
1345     case T_DOUBLE:
1346 #endif
1347     case T_LONG: {
1348       VMReg src_1 = in_regs[i].first();
1349       VMReg src_2 = in_regs[i].second();
1350       VMReg dst_1 = out_regs[i + extra_args].first();
1351       VMReg dst_2 = out_regs[i + extra_args].second();
1352 #if (ALIGN_WIDE_ARGUMENTS == 0)
1353       // C convention can mix a register and a stack slot for a
1354       // 64-bits native argument.
1355 
1356       // Note: following code should work independently of whether
1357       // the Java calling convention follows C convention or whether
1358       // it aligns 64-bit values.
1359       if (dst_2->is_Register()) {
1360         if (src_1->as_Register() != dst_1->as_Register()) {
1361           assert(src_1->as_Register() != dst_2->as_Register() &&
1362                  src_2->as_Register() != dst_2->as_Register(), "must be");
1363           __ mov(dst_2->as_Register(), src_2->as_Register());
1364           __ mov(dst_1->as_Register(), src_1->as_Register());
1365         } else {
1366           assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1367         }
1368       } else if (src_2->is_Register()) {
1369         if (dst_1->is_Register()) {
1370           // dst mixes a register and a stack slot
1371           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1372           assert(src_1->as_Register() != dst_1->as_Register(), "must be");
1373           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1374           __ mov(dst_1->as_Register(), src_1->as_Register());
1375         } else {
1376           // registers to stack slots
1377           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1378           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1379           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1380         }
1381       } else if (src_1->is_Register()) {
1382         if (dst_1->is_Register()) {
1383           // src and dst must be R3 + stack slot
1384           assert(dst_1->as_Register() == src_1->as_Register(), "must be");
1385           __ ldr(Rtemp,    Address(FP, reg2offset_in(src_2)));
1386           __ str(Rtemp,    Address(SP, reg2offset_out(dst_2)));
1387         } else {
1388           // <R3,stack> -> <stack,stack>
1389           assert(dst_2->is_stack() && src_2->is_stack(), "must be");
1390           __ ldr(LR, Address(FP, reg2offset_in(src_2)));
1391           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1392           __ str(LR, Address(SP, reg2offset_out(dst_2)));
1393         }
1394       } else {
1395         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1396         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1397         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1398         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1399         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1400       }
1401 #else // ALIGN_WIDE_ARGUMENTS
1402       if (src_1->is_stack()) {
1403         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1404         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1405         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1406         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1407         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1408       } else if (dst_1->is_stack()) {
1409         assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1410         __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1411         __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1412       } else if (src_1->as_Register() == dst_1->as_Register()) {
1413         assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1414       } else {
1415         assert(src_1->as_Register() != dst_2->as_Register() &&
1416                src_2->as_Register() != dst_2->as_Register(), "must be");
1417         __ mov(dst_2->as_Register(), src_2->as_Register());
1418         __ mov(dst_1->as_Register(), src_1->as_Register());
1419       }
1420 #endif // ALIGN_WIDE_ARGUMENTS
1421       break;
1422     }
1423 
1424 #if (!defined __SOFTFP__ && !defined __ABI_HARD__)
1425     case T_FLOAT: {
1426       VMReg src = in_regs[i].first();
1427       VMReg dst = out_regs[i + extra_args].first();
1428       if (src->is_stack()) {
1429         assert(dst->is_stack(), "must be");
1430         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1431         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1432       } else if (dst->is_stack()) {
1433         __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst)));
1434       } else {
1435         assert(src->is_FloatRegister() && dst->is_Register(), "must be");
1436         __ fmrs(dst->as_Register(), src->as_FloatRegister());
1437       }
1438       break;
1439     }
1440 
1441     case T_DOUBLE: {
1442       VMReg src_1 = in_regs[i].first();
1443       VMReg src_2 = in_regs[i].second();
1444       VMReg dst_1 = out_regs[i + extra_args].first();
1445       VMReg dst_2 = out_regs[i + extra_args].second();
1446       if (src_1->is_stack()) {
1447         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1448         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1449         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1450         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1451         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1452       } else if (dst_1->is_stack()) {
1453         assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be");
1454         __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1)));
1455 #if (ALIGN_WIDE_ARGUMENTS == 0)
1456       } else if (dst_2->is_stack()) {
1457         assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned
1458         // double register must go into R3 + one stack slot
1459         __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister());
1460         __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
1461 #endif
1462       } else {
1463         assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be");
1464         __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister());
1465       }
1466       break;
1467     }
1468 #endif // __SOFTFP__
1469 
1470 #ifdef __ABI_HARD__
1471     case T_FLOAT: {
1472       VMReg src = in_regs[i].first();
1473       VMReg dst = out_regs[i + extra_args].first();
1474       if (src->is_stack()) {
1475         if (dst->is_stack()) {
1476           __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1477           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1478         } else {
1479           // C2 Java calling convention does not populate S14 and S15, therefore
1480           // those need to be loaded from stack here
1481           __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src)));
1482           fp_regs_in_arguments++;
1483         }
1484       } else {
1485         assert(src->is_FloatRegister(), "must be");
1486         fp_regs_in_arguments++;
1487       }
1488       break;
1489     }
1490     case T_DOUBLE: {
1491       VMReg src_1 = in_regs[i].first();
1492       VMReg src_2 = in_regs[i].second();
1493       VMReg dst_1 = out_regs[i + extra_args].first();
1494       VMReg dst_2 = out_regs[i + extra_args].second();
1495       if (src_1->is_stack()) {
1496         if (dst_1->is_stack()) {
1497           assert(dst_2->is_stack(), "must be");
1498           __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1499           __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1500           __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1501           __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1502         } else {
1503           // C2 Java calling convention does not populate S14 and S15, therefore
1504           // those need to be loaded from stack here
1505           __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1506           fp_regs_in_arguments += 2;
1507         }
1508       } else {
1509         assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1510         fp_regs_in_arguments += 2;
1511       }
1512       break;
1513     }
1514 #endif // __ABI_HARD__
1515 #endif // AARCH64
1516 
1517     default: {
1518       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1519       VMReg src = in_regs[i].first();
1520       VMReg dst = out_regs[i + extra_args].first();
1521       if (src->is_stack()) {
1522         assert(dst->is_stack(), "must be");
1523         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1524         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1525       } else if (dst->is_stack()) {
1526         __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1527       } else {
1528         assert(src->is_Register() && dst->is_Register(), "must be");
1529         __ mov(dst->as_Register(), src->as_Register());
1530       }
1531     }
1532     }
1533   }
1534 
1535   // Get Klass mirror
1536   int klass_offset = -1;
1537   if (is_static) {
1538     klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1539     __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1540     __ add(c_rarg1, SP, klass_offset);
1541     __ str(Rtemp, Address(SP, klass_offset));
1542     map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1543   }
1544 
1545   // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1546   int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1547   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1548   oop_maps->add_gc_map(pc_offset, map);
1549 
1550 #ifndef AARCH64
1551   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1552   __ membar(MacroAssembler::StoreStore, Rtemp);
1553 #endif // !AARCH64
1554 
1555   // RedefineClasses() tracing support for obsolete method entry
1556   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1557 #ifdef AARCH64
1558     __ NOT_TESTED();
1559 #endif
1560     __ save_caller_save_registers();
1561     __ mov(R0, Rthread);
1562     __ mov_metadata(R1, method());
1563     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1564     __ restore_caller_save_registers();
1565   }
1566 
1567   const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5);
1568   const Register sync_obj    = AARCH64_ONLY(R21) NOT_AARCH64(R6);
1569   const Register disp_hdr    = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11);
1570   const Register tmp         = AARCH64_ONLY(R23) NOT_AARCH64(R8);
1571 
1572   Label slow_lock, slow_lock_biased, lock_done, fast_lock, leave;
1573   if (method->is_synchronized()) {
1574     // The first argument is a handle to sync object (a class or an instance)
1575     __ ldr(sync_obj, Address(R1));
1576     // Remember the handle for the unlocking code
1577     __ mov(sync_handle, R1);
1578 
1579     if(UseBiasedLocking) {
1580       __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1581     }
1582 
1583     const Register mark = tmp;
1584 #ifdef AARCH64
1585     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1586     assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
1587 
1588     __ ldr(mark, sync_obj);
1589 
1590     // Test if object is already locked
1591     assert(markOopDesc::unlocked_value == 1, "adjust this code");
1592     __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock);
1593 
1594     // Check for recursive lock
1595     // See comments in InterpreterMacroAssembler::lock_object for
1596     // explanations on the fast recursive locking check.
1597     __ mov(Rtemp, SP);
1598     __ sub(Rtemp, mark, Rtemp);
1599     intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
1600     Assembler::LogicalImmediate imm(mask, false);
1601     __ ands(Rtemp, Rtemp, imm);
1602     __ b(slow_lock, ne);
1603 
1604     // Recursive locking: store 0 into a lock record
1605     __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1606     __ b(lock_done);
1607 
1608     __ bind(fast_lock);
1609     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1610 
1611     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1612 #else
1613     // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1614     // That would be acceptable as either CAS or slow case path is taken in that case
1615 
1616     __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1617     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1618     __ tst(mark, markOopDesc::unlocked_value);
1619     __ b(fast_lock, ne);
1620 
1621     // Check for recursive lock
1622     // See comments in InterpreterMacroAssembler::lock_object for
1623     // explanations on the fast recursive locking check.
1624     // Check independently the low bits and the distance to SP
1625     // -1- test low 2 bits
1626     __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1627     // -2- test (hdr - SP) if the low two bits are 0
1628     __ sub(Rtemp, mark, SP, eq);
1629     __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1630     // If still 'eq' then recursive locking OK: set displaced header to 0
1631     __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1632     __ b(lock_done, eq);
1633     __ b(slow_lock);
1634 
1635     __ bind(fast_lock);
1636     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1637 
1638     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1639 #endif // AARCH64
1640 
1641     __ bind(lock_done);
1642   }
1643 
1644   // Get JNIEnv*
1645   __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1646 
1647   // Perform thread state transition
1648   __ mov(Rtemp, _thread_in_native);
1649 #ifdef AARCH64
1650   // stlr instruction is used to force all preceding writes to be observed prior to thread state change
1651   __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
1652   __ stlr_w(Rtemp, Rtemp2);
1653 #else
1654   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1655 #endif // AARCH64
1656 
1657   // Finally, call the native method
1658   __ call(method->native_function());
1659 
1660   // Set FPSCR/FPCR to a known state
1661   if (AlwaysRestoreFPU) {
1662     __ restore_default_fp_mode();
1663   }
1664 
1665   // Do a safepoint check while thread is in transition state
1666   InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1667   Label call_safepoint_runtime, return_to_java;
1668   __ mov(Rtemp, _thread_in_native_trans);
1669   __ ldr_literal(R2, safepoint_state);
1670   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1671 
1672   // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1673   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1674 
1675   __ ldr_s32(R2, Address(R2));
1676   __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
1677   __ cmp(R2, SafepointSynchronize::_not_synchronized);
1678   __ cond_cmp(R3, 0, eq);
1679   __ b(call_safepoint_runtime, ne);
1680   __ bind(return_to_java);
1681 
1682   // Perform thread state transition and reguard stack yellow pages if needed
1683   Label reguard, reguard_done;
1684   __ mov(Rtemp, _thread_in_Java);
1685   __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset()));
1686   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1687 
1688   __ cmp(R2, JavaThread::stack_guard_yellow_reserved_disabled);
1689   __ b(reguard, eq);
1690   __ bind(reguard_done);
1691 
1692   Label slow_unlock, unlock_done, retry;
1693   if (method->is_synchronized()) {
1694     __ ldr(sync_obj, Address(sync_handle));
1695 
1696     if(UseBiasedLocking) {
1697       __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1698       // disp_hdr may not have been saved on entry with biased locking
1699       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1700     }
1701 
1702     // See C1_MacroAssembler::unlock_object() for more comments
1703     __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1704     __ cbz(R2, unlock_done);
1705 
1706     __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1707 
1708     __ bind(unlock_done);
1709   }
1710 
1711   // Set last java frame and handle block to zero
1712   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1713   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1714 
1715 #ifdef AARCH64
1716   __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1717   if (CheckJNICalls) {
1718     __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1719   }
1720 
1721 
1722   switch (ret_type) {
1723   case T_BOOLEAN:
1724     __ tst(R0, 0xff);
1725     __ cset(R0, ne);
1726     break;
1727   case T_CHAR   : __ zero_extend(R0, R0, 16);  break;
1728   case T_BYTE   : __ sign_extend(R0, R0,  8);  break;
1729   case T_SHORT  : __ sign_extend(R0, R0, 16);  break;
1730   case T_INT    : // fall through
1731   case T_LONG   : // fall through
1732   case T_VOID   : // fall through
1733   case T_FLOAT  : // fall through
1734   case T_DOUBLE : /* nothing to do */          break;
1735   case T_OBJECT : // fall through
1736   case T_ARRAY  : break; // See JNIHandles::resolve below
1737   default:
1738     ShouldNotReachHere();
1739   }
1740 #else
1741   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1742   if (CheckJNICalls) {
1743     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1744   }
1745 #endif // AARCH64
1746 
1747   // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1748   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1749     __ resolve_jobject(R0,      // value
1750                        Rtemp,   // tmp1
1751                        R1_tmp); // tmp2
1752   }
1753 
1754   // Any exception pending?
1755   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1756   __ mov(SP, FP);
1757 
1758 #ifdef AARCH64
1759   Label except;
1760   __ cbnz(Rtemp, except);
1761   __ raw_pop(FP, LR);
1762   __ ret();
1763 
1764   __ bind(except);
1765   // Pop the frame and forward the exception. Rexception_pc contains return address.
1766   __ raw_pop(FP, Rexception_pc);
1767 #else
1768   __ cmp(Rtemp, 0);
1769   // Pop the frame and return if no exception pending
1770   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1771   // Pop the frame and forward the exception. Rexception_pc contains return address.
1772   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1773   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1774 #endif // AARCH64
1775   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1776 
1777   // Safepoint operation and/or pending suspend request is in progress.
1778   // Save the return values and call the runtime function by hand.
1779   __ bind(call_safepoint_runtime);
1780   push_result_registers(masm, ret_type);
1781   __ mov(R0, Rthread);
1782   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1783   pop_result_registers(masm, ret_type);
1784   __ b(return_to_java);
1785 
1786   __ bind_literal(safepoint_state);
1787 
1788   // Reguard stack pages. Save native results around a call to C runtime.
1789   __ bind(reguard);
1790   push_result_registers(masm, ret_type);
1791   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1792   pop_result_registers(masm, ret_type);
1793   __ b(reguard_done);
1794 
1795   if (method->is_synchronized()) {
1796     // Locking slow case
1797     if(UseBiasedLocking) {
1798       __ bind(slow_lock_biased);
1799       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1800     }
1801 
1802     __ bind(slow_lock);
1803 
1804     push_param_registers(masm, fp_regs_in_arguments);
1805 
1806     // last_Java_frame is already set, so do call_VM manually; no exception can occur
1807     __ mov(R0, sync_obj);
1808     __ mov(R1, disp_hdr);
1809     __ mov(R2, Rthread);
1810     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1811 
1812     pop_param_registers(masm, fp_regs_in_arguments);
1813 
1814     __ b(lock_done);
1815 
1816     // Unlocking slow case
1817     __ bind(slow_unlock);
1818 
1819     push_result_registers(masm, ret_type);
1820 
1821     // Clear pending exception before reentering VM.
1822     // Can store the oop in register since it is a leaf call.
1823     assert_different_registers(Rtmp_save1, sync_obj, disp_hdr);
1824     __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1825     Register zero = __ zero_register(Rtemp);
1826     __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
1827     __ mov(R0, sync_obj);
1828     __ mov(R1, disp_hdr);
1829     __ mov(R2, Rthread);
1830     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1831     __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1832 
1833     pop_result_registers(masm, ret_type);
1834 
1835     __ b(unlock_done);
1836   }
1837 
1838   __ flush();
1839   return nmethod::new_native_nmethod(method,
1840                                      compile_id,
1841                                      masm->code(),
1842                                      vep_offset,
1843                                      frame_complete,
1844                                      stack_slots / VMRegImpl::slots_per_word,
1845                                      in_ByteSize(is_static ? klass_offset : receiver_offset),
1846                                      in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1847                                      oop_maps);
1848 }
1849 
1850 // this function returns the adjust size (in number of words) to a c2i adapter
1851 // activation for use during deoptimization
1852 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1853   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1854 #ifdef AARCH64
1855   extra_locals_size = align_up(extra_locals_size, StackAlignmentInBytes/BytesPerWord);
1856 #endif // AARCH64
1857   return extra_locals_size;
1858 }
1859 
1860 
1861 uint SharedRuntime::out_preserve_stack_slots() {
1862   return 0;
1863 }
1864 
1865 
1866 //------------------------------generate_deopt_blob----------------------------
1867 void SharedRuntime::generate_deopt_blob() {
1868   ResourceMark rm;
1869 #ifdef AARCH64
1870   CodeBuffer buffer("deopt_blob", 1024+256, 1);
1871 #else
1872   CodeBuffer buffer("deopt_blob", 1024, 1024);
1873 #endif
1874   int frame_size_in_words;
1875   OopMapSet* oop_maps;
1876   int reexecute_offset;
1877   int exception_in_tls_offset;
1878   int exception_offset;
1879 
1880   MacroAssembler* masm = new MacroAssembler(&buffer);
1881   Label cont;
1882   const Register Rkind   = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit
1883   const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
1884   const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
1885   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1886 
1887   address start = __ pc();
1888 
1889   oop_maps = new OopMapSet();
1890   // LR saved by caller (can be live in c2 method)
1891 
1892   // A deopt is a case where LR may be live in the c2 nmethod. So it's
1893   // not possible to call the deopt blob from the nmethod and pass the
1894   // address of the deopt handler of the nmethod in LR. What happens
1895   // now is that the caller of the deopt blob pushes the current
1896   // address so the deopt blob doesn't have to do it. This way LR can
1897   // be preserved, contains the live value from the nmethod and is
1898   // saved at R14/R30_offset here.
1899   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1900   __ mov(Rkind, Deoptimization::Unpack_deopt);
1901   __ b(cont);
1902 
1903   exception_offset = __ pc() - start;
1904 
1905   // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the
1906   // exception_in_tls_offset entry point.
1907   __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1908   __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1909   // Force return value to NULL to avoid confusing the escape analysis
1910   // logic. Everything is dead here anyway.
1911   __ mov(R0, 0);
1912 
1913   exception_in_tls_offset = __ pc() - start;
1914 
1915   // Exception data is in JavaThread structure
1916   // Patch the return address of the current frame
1917   __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset()));
1918   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1919   {
1920     const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure
1921     __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset()));
1922   }
1923   __ mov(Rkind, Deoptimization::Unpack_exception);
1924   __ b(cont);
1925 
1926   reexecute_offset = __ pc() - start;
1927 
1928   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1929   __ mov(Rkind, Deoptimization::Unpack_reexecute);
1930 
1931   // Calculate UnrollBlock and save the result in Rublock
1932   __ bind(cont);
1933   __ mov(R0, Rthread);
1934   __ mov(R1, Rkind);
1935 
1936   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1937   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1938   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info));
1939   if (pc_offset == -1) {
1940     pc_offset = __ offset();
1941   }
1942   oop_maps->add_gc_map(pc_offset, map);
1943   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1944 
1945   __ mov(Rublock, R0);
1946 
1947   // Reload Rkind from the UnrollBlock (might have changed)
1948   __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1949   Label noException;
1950   __ cmp_32(Rkind, Deoptimization::Unpack_exception);   // Was exception pending?
1951   __ b(noException, ne);
1952   // handle exception case
1953 #ifdef ASSERT
1954   // assert that exception_pc is zero in tls
1955   { Label L;
1956     __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1957     __ cbz(Rexception_pc, L);
1958     __ stop("exception pc should be null");
1959     __ bind(L);
1960   }
1961 #endif
1962   __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1963   __ verify_oop(Rexception_obj);
1964   {
1965     const Register Rzero = __ zero_register(Rtemp);
1966     __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1967   }
1968 
1969   __ bind(noException);
1970 
1971   // This frame is going away.  Fetch return value, so we can move it to
1972   // a new frame.
1973   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1974 #ifndef AARCH64
1975   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1976 #endif // !AARCH64
1977 #ifndef __SOFTFP__
1978   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1979 #endif
1980   // pop frame
1981   __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1982 
1983   // Set initial stack state before pushing interpreter frames
1984   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1985   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1986   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1987 
1988 #ifdef AARCH64
1989   // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
1990   // They are needed for correct stack walking during stack overflow handling.
1991   // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
1992   __ sub(Rtemp, Rtemp, 2*wordSize);
1993   __ add(SP, SP, Rtemp, ex_uxtx);
1994   __ raw_pop(FP, LR);
1995 
1996 #ifdef ASSERT
1997   { Label L;
1998     __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1999     __ cmp(FP, Rtemp);
2000     __ b(L, eq);
2001     __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2002     __ bind(L);
2003   }
2004   { Label L;
2005     __ ldr(Rtemp, Address(R2));
2006     __ cmp(LR, Rtemp);
2007     __ b(L, eq);
2008     __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2009     __ bind(L);
2010   }
2011 #endif // ASSERT
2012 
2013 #else
2014   __ add(SP, SP, Rtemp);
2015 #endif // AARCH64
2016 
2017 #ifdef ASSERT
2018   // Compilers generate code that bang the stack by as much as the
2019   // interpreter would need. So this stack banging should never
2020   // trigger a fault. Verify that it does not on non product builds.
2021   // See if it is enough stack to push deoptimized frames
2022   if (UseStackBanging) {
2023 #ifndef AARCH64
2024     // The compiled method that we are deoptimizing was popped from the stack.
2025     // If the stack bang results in a stack overflow, we don't return to the
2026     // method that is being deoptimized. The stack overflow exception is
2027     // propagated to the caller of the deoptimized method. Need to get the pc
2028     // from the caller in LR and restore FP.
2029     __ ldr(LR, Address(R2, 0));
2030     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2031 #endif // !AARCH64
2032     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2033     __ arm_stack_overflow_check(R8, Rtemp);
2034   }
2035 #endif
2036   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2037 
2038 #ifndef AARCH64
2039   // Pick up the initial fp we should save
2040   // XXX Note: was ldr(FP, Address(FP));
2041 
2042   // The compiler no longer uses FP as a frame pointer for the
2043   // compiled code. It can be used by the allocator in C2 or to
2044   // memorize the original SP for JSR292 call sites.
2045 
2046   // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
2047   // Deoptimization::fetch_unroll_info computes the right FP value and
2048   // stores it in Rublock.initial_info. This has been activated for ARM.
2049   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2050 #endif // !AARCH64
2051 
2052   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2053   __ mov(Rsender, SP);
2054 #ifdef AARCH64
2055   __ sub(SP, SP, Rtemp, ex_uxtx);
2056 #else
2057   __ sub(SP, SP, Rtemp);
2058 #endif // AARCH64
2059 
2060   // Push interpreter frames in a loop
2061   Label loop;
2062   __ bind(loop);
2063   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2064   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2065 
2066   __ raw_push(FP, LR);                                     // create new frame
2067   __ mov(FP, SP);
2068   __ sub(Rtemp, Rtemp, 2*wordSize);
2069 
2070 #ifdef AARCH64
2071   __ sub(SP, SP, Rtemp, ex_uxtx);
2072 #else
2073   __ sub(SP, SP, Rtemp);
2074 #endif // AARCH64
2075 
2076   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2077 #ifdef AARCH64
2078   __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2079 #else
2080   __ mov(LR, 0);
2081   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2082 #endif // AARCH64
2083 
2084   __ subs(R8, R8, 1);                               // decrement counter
2085   __ mov(Rsender, SP);
2086   __ b(loop, ne);
2087 
2088   // Re-push self-frame
2089   __ ldr(LR, Address(R2));
2090   __ raw_push(FP, LR);
2091   __ mov(FP, SP);
2092   __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
2093 
2094   // Restore frame locals after moving the frame
2095   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2096 #ifndef AARCH64
2097   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2098 #endif // !AARCH64
2099 
2100 #ifndef __SOFTFP__
2101   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2102 #endif // !__SOFTFP__
2103 
2104 #ifndef AARCH64
2105 #ifdef ASSERT
2106   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
2107   { Label L;
2108     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2109     __ cmp_32(Rkind, Rtemp);
2110     __ b(L, eq);
2111     __ stop("Rkind was overwritten");
2112     __ bind(L);
2113   }
2114 #endif
2115 #endif
2116 
2117   // Call unpack_frames with proper arguments
2118   __ mov(R0, Rthread);
2119   __ mov(R1, Rkind);
2120 
2121   pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2122   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2123   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2124   if (pc_offset == -1) {
2125     pc_offset = __ offset();
2126   }
2127   oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
2128   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2129 
2130   // Collect return values, pop self-frame and jump to interpreter
2131   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2132 #ifndef AARCH64
2133   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2134 #endif // !AARCH64
2135   // Interpreter floats controlled by __SOFTFP__, but compiler
2136   // float return value registers controlled by __ABI_HARD__
2137   // This matters for vfp-sflt builds.
2138 #ifndef __SOFTFP__
2139   // Interpreter hard float
2140 #ifdef __ABI_HARD__
2141   // Compiler float return value in FP registers
2142   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2143 #else
2144   // Compiler float return value in integer registers,
2145   // copy to D0 for interpreter (S0 <-- R0)
2146   __ fmdrr(D0_tos, R0, R1);
2147 #endif
2148 #endif // !__SOFTFP__
2149   __ mov(SP, FP);
2150 
2151 #ifdef AARCH64
2152   __ raw_pop(FP, LR);
2153   __ ret();
2154 #else
2155   __ pop(RegisterSet(FP) | RegisterSet(PC));
2156 #endif // AARCH64
2157 
2158   __ flush();
2159 
2160   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
2161                                            reexecute_offset, frame_size_in_words);
2162   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2163 }
2164 
2165 #ifdef COMPILER2
2166 
2167 //------------------------------generate_uncommon_trap_blob--------------------
2168 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2169 // instead.
2170 void SharedRuntime::generate_uncommon_trap_blob() {
2171   // allocate space for the code
2172   ResourceMark rm;
2173 
2174   // setup code generation tools
2175   int pad = VerifyThread ? 512 : 0;
2176 #ifdef _LP64
2177   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
2178 #else
2179   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
2180   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
2181   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
2182 #endif
2183   // bypassed when code generation useless
2184   MacroAssembler* masm               = new MacroAssembler(&buffer);
2185   const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
2186   const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
2187   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
2188 
2189   //
2190   // This is the entry point for all traps the compiler takes when it thinks
2191   // it cannot handle further execution of compilation code. The frame is
2192   // deoptimized in these cases and converted into interpreter frames for
2193   // execution
2194   // The steps taken by this frame are as follows:
2195   //   - push a fake "unpack_frame"
2196   //   - call the C routine Deoptimization::uncommon_trap (this function
2197   //     packs the current compiled frame into vframe arrays and returns
2198   //     information about the number and size of interpreter frames which
2199   //     are equivalent to the frame which is being deoptimized)
2200   //   - deallocate the "unpack_frame"
2201   //   - deallocate the deoptimization frame
2202   //   - in a loop using the information returned in the previous step
2203   //     push interpreter frames;
2204   //   - create a dummy "unpack_frame"
2205   //   - call the C routine: Deoptimization::unpack_frames (this function
2206   //     lays out values on the interpreter frame which was just created)
2207   //   - deallocate the dummy unpack_frame
2208   //   - return to the interpreter entry point
2209   //
2210   //  Refer to the following methods for more information:
2211   //   - Deoptimization::uncommon_trap
2212   //   - Deoptimization::unpack_frame
2213 
2214   // the unloaded class index is in R0 (first parameter to this blob)
2215 
2216   __ raw_push(FP, LR);
2217   __ set_last_Java_frame(SP, FP, false, Rtemp);
2218   __ mov(R2, Deoptimization::Unpack_uncommon_trap);
2219   __ mov(R1, R0);
2220   __ mov(R0, Rthread);
2221   __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
2222   __ mov(Rublock, R0);
2223   __ reset_last_Java_frame(Rtemp);
2224   __ raw_pop(FP, LR);
2225 
2226 #ifdef ASSERT
2227   { Label L;
2228     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2229     __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
2230     __ b(L, eq);
2231     __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
2232     __ bind(L);
2233   }
2234 #endif
2235 
2236 
2237   // Set initial stack state before pushing interpreter frames
2238   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2239   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2240   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2241 
2242 #ifdef AARCH64
2243   // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
2244   // They are needed for correct stack walking during stack overflow handling.
2245   // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
2246   __ sub(Rtemp, Rtemp, 2*wordSize);
2247   __ add(SP, SP, Rtemp, ex_uxtx);
2248   __ raw_pop(FP, LR);
2249 
2250 #ifdef ASSERT
2251   { Label L;
2252     __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2253     __ cmp(FP, Rtemp);
2254     __ b(L, eq);
2255     __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2256     __ bind(L);
2257   }
2258   { Label L;
2259     __ ldr(Rtemp, Address(R2));
2260     __ cmp(LR, Rtemp);
2261     __ b(L, eq);
2262     __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2263     __ bind(L);
2264   }
2265 #endif // ASSERT
2266 
2267 #else
2268   __ add(SP, SP, Rtemp);
2269 #endif //AARCH64
2270 
2271   // See if it is enough stack to push deoptimized frames
2272 #ifdef ASSERT
2273   // Compilers generate code that bang the stack by as much as the
2274   // interpreter would need. So this stack banging should never
2275   // trigger a fault. Verify that it does not on non product builds.
2276   if (UseStackBanging) {
2277 #ifndef AARCH64
2278     // The compiled method that we are deoptimizing was popped from the stack.
2279     // If the stack bang results in a stack overflow, we don't return to the
2280     // method that is being deoptimized. The stack overflow exception is
2281     // propagated to the caller of the deoptimized method. Need to get the pc
2282     // from the caller in LR and restore FP.
2283     __ ldr(LR, Address(R2, 0));
2284     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2285 #endif // !AARCH64
2286     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2287     __ arm_stack_overflow_check(R8, Rtemp);
2288   }
2289 #endif
2290   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2291   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2292   __ mov(Rsender, SP);
2293 #ifdef AARCH64
2294   __ sub(SP, SP, Rtemp, ex_uxtx);
2295 #else
2296   __ sub(SP, SP, Rtemp);
2297 #endif
2298 #ifndef AARCH64
2299   //  __ ldr(FP, Address(FP));
2300   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2301 #endif // AARCH64
2302 
2303   // Push interpreter frames in a loop
2304   Label loop;
2305   __ bind(loop);
2306   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2307   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2308 
2309   __ raw_push(FP, LR);                                     // create new frame
2310   __ mov(FP, SP);
2311   __ sub(Rtemp, Rtemp, 2*wordSize);
2312 
2313 #ifdef AARCH64
2314   __ sub(SP, SP, Rtemp, ex_uxtx);
2315 #else
2316   __ sub(SP, SP, Rtemp);
2317 #endif // AARCH64
2318 
2319   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2320 #ifdef AARCH64
2321   __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2322 #else
2323   __ mov(LR, 0);
2324   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2325 #endif // AARCH64
2326   __ subs(R8, R8, 1);                               // decrement counter
2327   __ mov(Rsender, SP);
2328   __ b(loop, ne);
2329 
2330   // Re-push self-frame
2331   __ ldr(LR, Address(R2));
2332   __ raw_push(FP, LR);
2333   __ mov(FP, SP);
2334 
2335   // Call unpack_frames with proper arguments
2336   __ mov(R0, Rthread);
2337   __ mov(R1, Deoptimization::Unpack_uncommon_trap);
2338   __ set_last_Java_frame(SP, FP, false, Rtemp);
2339   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2340   //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
2341   __ reset_last_Java_frame(Rtemp);
2342 
2343   __ mov(SP, FP);
2344 #ifdef AARCH64
2345   __ raw_pop(FP, LR);
2346   __ ret();
2347 #else
2348   __ pop(RegisterSet(FP) | RegisterSet(PC));
2349 #endif
2350 
2351   masm->flush();
2352   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
2353 }
2354 
2355 #endif // COMPILER2
2356 
2357 //------------------------------generate_handler_blob------
2358 //
2359 // Generate a special Compile2Runtime blob that saves all registers,
2360 // setup oopmap, and calls safepoint code to stop the compiled code for
2361 // a safepoint.
2362 //
2363 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2364   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2365 
2366   ResourceMark rm;
2367   CodeBuffer buffer("handler_blob", 256, 256);
2368   int frame_size_words;
2369   OopMapSet* oop_maps;
2370 
2371   bool cause_return = (poll_type == POLL_AT_RETURN);
2372 
2373   MacroAssembler* masm = new MacroAssembler(&buffer);
2374   address start = __ pc();
2375   oop_maps = new OopMapSet();
2376 
2377   if (!cause_return) {
2378 #ifdef AARCH64
2379     __ raw_push(LR, LR);
2380 #else
2381     __ sub(SP, SP, 4); // make room for LR which may still be live
2382                        // here if we are coming from a c2 method
2383 #endif // AARCH64
2384   }
2385 
2386   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
2387   if (!cause_return) {
2388     // update saved PC with correct value
2389     // need 2 steps because LR can be live in c2 method
2390     __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
2391     __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
2392   }
2393 
2394   __ mov(R0, Rthread);
2395   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
2396   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2397   __ call(call_ptr);
2398   if (pc_offset == -1) {
2399     pc_offset = __ offset();
2400   }
2401   oop_maps->add_gc_map(pc_offset, map);
2402   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2403 
2404   // Check for pending exception
2405   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
2406   __ cmp(Rtemp, 0);
2407 
2408 #ifdef AARCH64
2409   RegisterSaver::restore_live_registers(masm, cause_return);
2410   Register ret_addr = cause_return ? LR : Rtemp;
2411   if (!cause_return) {
2412     __ raw_pop(FP, ret_addr);
2413   }
2414 
2415   Label throw_exception;
2416   __ b(throw_exception, ne);
2417   __ br(ret_addr);
2418 
2419   __ bind(throw_exception);
2420   __ mov(Rexception_pc, ret_addr);
2421 #else // AARCH64
2422   if (!cause_return) {
2423     RegisterSaver::restore_live_registers(masm, false);
2424     __ pop(PC, eq);
2425     __ pop(Rexception_pc);
2426   } else {
2427     RegisterSaver::restore_live_registers(masm);
2428     __ bx(LR, eq);
2429     __ mov(Rexception_pc, LR);
2430   }
2431 #endif // AARCH64
2432 
2433   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2434 
2435   __ flush();
2436 
2437   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
2438 }
2439 
2440 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2441   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2442 
2443   ResourceMark rm;
2444   CodeBuffer buffer(name, 1000, 512);
2445   int frame_size_words;
2446   OopMapSet *oop_maps;
2447   int frame_complete;
2448 
2449   MacroAssembler* masm = new MacroAssembler(&buffer);
2450   Label pending_exception;
2451 
2452   int start = __ offset();
2453 
2454   oop_maps = new OopMapSet();
2455   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words);
2456 
2457   frame_complete = __ offset();
2458 
2459   __ mov(R0, Rthread);
2460 
2461   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2462   assert(start == 0, "warning: start differs from code_begin");
2463   __ call(destination);
2464   if (pc_offset == -1) {
2465     pc_offset = __ offset();
2466   }
2467   oop_maps->add_gc_map(pc_offset, map);
2468   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2469 
2470   __ ldr(R1, Address(Rthread, Thread::pending_exception_offset()));
2471   __ cbnz(R1, pending_exception);
2472 
2473   // Overwrite saved register values
2474 
2475   // Place metadata result of VM call into Rmethod
2476   __ get_vm_result_2(R1, Rtemp);
2477   __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize));
2478 
2479   // Place target address (VM call result) into Rtemp
2480   __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize));
2481 
2482   RegisterSaver::restore_live_registers(masm);
2483   __ jump(Rtemp);
2484 
2485   __ bind(pending_exception);
2486 
2487   RegisterSaver::restore_live_registers(masm);
2488   const Register Rzero = __ zero_register(Rtemp);
2489   __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset()));
2490   __ mov(Rexception_pc, LR);
2491   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2492 
2493   __ flush();
2494 
2495   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
2496 }