1 /*
   2  * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "assembler_arm.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "logging/log.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "runtime/sharedRuntime.hpp"
  36 #include "runtime/vframeArray.hpp"
  37 #include "utilities/align.hpp"
  38 #include "vmreg_arm.inline.hpp"
  39 #ifdef COMPILER1
  40 #include "c1/c1_Runtime1.hpp"
  41 #endif
  42 #ifdef COMPILER2
  43 #include "opto/runtime.hpp"
  44 #endif
  45 
  46 #define __ masm->
  47 
  48 class RegisterSaver {
  49 public:
  50 
  51   // Special registers:
  52   //              32-bit ARM     64-bit ARM
  53   //  Rthread:       R10            R28
  54   //  LR:            R14            R30
  55 
  56   // Rthread is callee saved in the C ABI and never changed by compiled code:
  57   // no need to save it.
  58 
  59   // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
  60   // The one at LR_offset is a return address that is needed by stack walking.
  61   // A c2 method uses LR as a standard register so it may be live when we
  62   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
  63   // in case it's live in the method we are coming from.
  64 
  65 #ifdef AARCH64
  66 
  67   //
  68   // On AArch64 registers save area has the following layout:
  69   //
  70   // |---------------------|
  71   // | return address (LR) |
  72   // | FP                  |
  73   // |---------------------|
  74   // | V31                 |
  75   // | ...                 |
  76   // | V0                  |
  77   // |---------------------|
  78   // | padding             |
  79   // | R30 (LR live value) |
  80   // |---------------------|
  81   // | R27                 |
  82   // | ...                 |
  83   // | R0                  |
  84   // |---------------------| <-- SP
  85   //
  86 
  87   enum RegisterLayout {
  88     number_of_saved_gprs = 28,
  89     number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
  90     words_per_fpr = ConcreteRegisterImpl::words_per_fpr,
  91 
  92     R0_offset  = 0,
  93     R30_offset = R0_offset + number_of_saved_gprs,
  94     D0_offset  = R30_offset + 2,
  95     FP_offset  = D0_offset + number_of_saved_fprs * words_per_fpr,
  96     LR_offset  = FP_offset + 1,
  97 
  98     reg_save_size = LR_offset + 1,
  99   };
 100 
 101   static const int Rmethod_offset;
 102   static const int Rtemp_offset;
 103 
 104 #else
 105 
 106   enum RegisterLayout {
 107     fpu_save_size = FloatRegisterImpl::number_of_registers,
 108 #ifndef __SOFTFP__
 109     D0_offset = 0,
 110 #endif
 111     R0_offset = fpu_save_size,
 112     R1_offset,
 113     R2_offset,
 114     R3_offset,
 115     R4_offset,
 116     R5_offset,
 117     R6_offset,
 118 #if (FP_REG_NUM != 7)
 119     // if not saved as FP
 120     R7_offset,
 121 #endif
 122     R8_offset,
 123     R9_offset,
 124 #if (FP_REG_NUM != 11)
 125     // if not saved as FP
 126     R11_offset,
 127 #endif
 128     R12_offset,
 129     R14_offset,
 130     FP_offset,
 131     LR_offset,
 132     reg_save_size,
 133 
 134     Rmethod_offset = R9_offset,
 135     Rtemp_offset = R12_offset,
 136   };
 137 
 138   // all regs but Rthread (R10), FP (R7 or R11), SP and PC
 139   // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
 140 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 141 
 142 #endif // AARCH64
 143 
 144   //  When LR may be live in the nmethod from which we are comming
 145   //  then lr_saved is true, the return address is saved before the
 146   //  call to save_live_register by the caller and LR contains the
 147   //  live value.
 148 
 149   static OopMap* save_live_registers(MacroAssembler* masm,
 150                                      int* total_frame_words,
 151                                      bool lr_saved = false);
 152   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
 153 
 154 };
 155 
 156 
 157 #ifdef AARCH64
 158 const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding();
 159 const int RegisterSaver::Rtemp_offset   = RegisterSaver::R0_offset + Rtemp->encoding();
 160 #endif // AARCH64
 161 
 162 
 163 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
 164                                            int* total_frame_words,
 165                                            bool lr_saved) {
 166   *total_frame_words = reg_save_size;
 167 
 168   OopMapSet *oop_maps = new OopMapSet();
 169   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 170 
 171 #ifdef AARCH64
 172   assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
 173 
 174   if (lr_saved) {
 175     // LR was stashed here, so that jump could use it as a scratch reg
 176     __ ldr(LR, Address(SP, 0));
 177     // There are two words on the stack top:
 178     //  [SP + 0]: placeholder for FP
 179     //  [SP + wordSize]: saved return address
 180     __ str(FP, Address(SP, 0));
 181   } else {
 182     __ raw_push(FP, LR);
 183   }
 184 
 185   __ sub(SP, SP, (reg_save_size - 2) * wordSize);
 186 
 187   for (int i = 0; i < number_of_saved_gprs; i += 2) {
 188     int offset = R0_offset + i;
 189     __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize));
 190     map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
 191     map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg());
 192   }
 193 
 194   __ str(R30, Address(SP, R30_offset * wordSize));
 195   map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg());
 196 
 197   for (int i = 0; i < number_of_saved_fprs; i += 2) {
 198     int offset1 = D0_offset + i * words_per_fpr;
 199     int offset2 = offset1 + words_per_fpr;
 200     Address base(SP, offset1 * wordSize);
 201     if (words_per_fpr == 2) {
 202       // pair of "wide" quad vector registers
 203       __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
 204     } else {
 205       // pair of double vector registers
 206       __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
 207     }
 208     map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
 209     map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg());
 210   }
 211 #else
 212   if (lr_saved) {
 213     __ push(RegisterSet(FP));
 214   } else {
 215     __ push(RegisterSet(FP) | RegisterSet(LR));
 216   }
 217   __ push(SAVED_BASE_REGS);
 218   if (HaveVFP) {
 219     if (VM_Version::has_vfp3_32()) {
 220       __ fpush(FloatRegisterSet(D16, 16));
 221     } else {
 222       if (FloatRegisterImpl::number_of_registers > 32) {
 223         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 224         __ sub(SP, SP, 32 * wordSize);
 225       }
 226     }
 227     __ fpush(FloatRegisterSet(D0, 16));
 228   } else {
 229     __ sub(SP, SP, fpu_save_size * wordSize);
 230   }
 231 
 232   int i;
 233   int j=0;
 234   for (i = R0_offset; i <= R9_offset; i++) {
 235     if (j == FP_REG_NUM) {
 236       // skip the FP register, managed below.
 237       j++;
 238     }
 239     map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
 240     j++;
 241   }
 242   assert(j == R10->encoding(), "must be");
 243 #if (FP_REG_NUM != 11)
 244   // add R11, if not managed as FP
 245   map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
 246 #endif
 247   map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
 248   map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
 249   if (HaveVFP) {
 250     for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
 251       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
 252       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
 253     }
 254   }
 255 #endif // AARCH64
 256 
 257   return map;
 258 }
 259 
 260 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
 261 #ifdef AARCH64
 262   for (int i = 0; i < number_of_saved_gprs; i += 2) {
 263     __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
 264   }
 265 
 266   __ ldr(R30, Address(SP, R30_offset * wordSize));
 267 
 268   for (int i = 0; i < number_of_saved_fprs; i += 2) {
 269     Address base(SP, (D0_offset + i * words_per_fpr) * wordSize);
 270     if (words_per_fpr == 2) {
 271       // pair of "wide" quad vector registers
 272       __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
 273     } else {
 274       // pair of double vector registers
 275       __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
 276     }
 277   }
 278 
 279   __ add(SP, SP, (reg_save_size - 2) * wordSize);
 280 
 281   if (restore_lr) {
 282     __ raw_pop(FP, LR);
 283   } else {
 284     __ ldr(FP, Address(SP, 0));
 285   }
 286 #else
 287   if (HaveVFP) {
 288     __ fpop(FloatRegisterSet(D0, 16));
 289     if (VM_Version::has_vfp3_32()) {
 290       __ fpop(FloatRegisterSet(D16, 16));
 291     } else {
 292       if (FloatRegisterImpl::number_of_registers > 32) {
 293         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 294         __ add(SP, SP, 32 * wordSize);
 295       }
 296     }
 297   } else {
 298     __ add(SP, SP, fpu_save_size * wordSize);
 299   }
 300   __ pop(SAVED_BASE_REGS);
 301   if (restore_lr) {
 302     __ pop(RegisterSet(FP) | RegisterSet(LR));
 303   } else {
 304     __ pop(RegisterSet(FP));
 305   }
 306 #endif // AARCH64
 307 }
 308 
 309 #ifdef AARCH64
 310 
 311 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 312   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 313     __ str_d(D0, Address(SP, -2*wordSize, pre_indexed));
 314   } else {
 315     __ raw_push(R0, ZR);
 316   }
 317 }
 318 
 319 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 320   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 321     __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed));
 322   } else {
 323     __ raw_pop(R0, ZR);
 324   }
 325 }
 326 
 327 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 328   __ raw_push(R0, R1);
 329   __ raw_push(R2, R3);
 330   __ raw_push(R4, R5);
 331   __ raw_push(R6, R7);
 332 
 333   assert(FPR_PARAMS == 8, "adjust this code");
 334   assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
 335 
 336   if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed));
 337   if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed));
 338   if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed));
 339   if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed));
 340 }
 341 
 342 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 343   assert(FPR_PARAMS == 8, "adjust this code");
 344   assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
 345 
 346   if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed));
 347   if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed));
 348   if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed));
 349   if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed));
 350 
 351   __ raw_pop(R6, R7);
 352   __ raw_pop(R4, R5);
 353   __ raw_pop(R2, R3);
 354   __ raw_pop(R0, R1);
 355 }
 356 
 357 #else // AARCH64
 358 
 359 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 360 #ifdef __ABI_HARD__
 361   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 362     __ sub(SP, SP, 8);
 363     __ fstd(D0, Address(SP));
 364     return;
 365   }
 366 #endif // __ABI_HARD__
 367   __ raw_push(R0, R1);
 368 }
 369 
 370 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 371 #ifdef __ABI_HARD__
 372   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 373     __ fldd(D0, Address(SP));
 374     __ add(SP, SP, 8);
 375     return;
 376   }
 377 #endif // __ABI_HARD__
 378   __ raw_pop(R0, R1);
 379 }
 380 
 381 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 382   // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment
 383   __ push(RegisterSet(R0, R3));
 384 
 385   // preserve arguments
 386   // Likely not needed as the locking code won't probably modify volatile FP registers,
 387   // but there is no way to guarantee that
 388   if (fp_regs_in_arguments) {
 389     // convert fp_regs_in_arguments to a number of double registers
 390     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 391     __ fpush_hardfp(FloatRegisterSet(D0, double_regs_num));
 392   }
 393 }
 394 
 395 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 396   if (fp_regs_in_arguments) {
 397     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 398     __ fpop_hardfp(FloatRegisterSet(D0, double_regs_num));
 399   }
 400   __ pop(RegisterSet(R0, R3));
 401 }
 402 
 403 #endif // AARCH64
 404 
 405 
 406 // Is vector's size (in bytes) bigger than a size saved by default?
 407 // All vector registers are saved by default on ARM.
 408 bool SharedRuntime::is_wide_vector(int size) {
 409   return false;
 410 }
 411 
 412 size_t SharedRuntime::trampoline_size() {
 413   return 16;
 414 }
 415 
 416 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 417   InlinedAddress dest(destination);
 418   __ indirect_jump(dest, Rtemp);
 419   __ bind_literal(dest);
 420 }
 421 
 422 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 423                                         VMRegPair *regs,
 424                                         VMRegPair *regs2,
 425                                         int total_args_passed) {
 426   assert(regs2 == NULL, "not needed on arm");
 427 #ifdef AARCH64
 428   int slot = 0; // counted in 32-bit VMReg slots
 429   int reg = 0;
 430   int fp_reg = 0;
 431   for (int i = 0; i < total_args_passed; i++) {
 432     switch (sig_bt[i]) {
 433     case T_SHORT:
 434     case T_CHAR:
 435     case T_BYTE:
 436     case T_BOOLEAN:
 437     case T_INT:
 438       if (reg < GPR_PARAMS) {
 439         Register r = as_Register(reg);
 440         regs[i].set1(r->as_VMReg());
 441         reg++;
 442       } else {
 443         regs[i].set1(VMRegImpl::stack2reg(slot));
 444         slot+=2;
 445       }
 446       break;
 447     case T_LONG:
 448       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 449       // fall through
 450     case T_ARRAY:
 451     case T_OBJECT:
 452     case T_ADDRESS:
 453       if (reg < GPR_PARAMS) {
 454         Register r = as_Register(reg);
 455         regs[i].set2(r->as_VMReg());
 456         reg++;
 457       } else {
 458         regs[i].set2(VMRegImpl::stack2reg(slot));
 459         slot+=2;
 460       }
 461       break;
 462     case T_FLOAT:
 463       if (fp_reg < FPR_PARAMS) {
 464         FloatRegister r = as_FloatRegister(fp_reg);
 465         regs[i].set1(r->as_VMReg());
 466         fp_reg++;
 467       } else {
 468         regs[i].set1(VMRegImpl::stack2reg(slot));
 469         slot+=2;
 470       }
 471       break;
 472     case T_DOUBLE:
 473       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 474       if (fp_reg < FPR_PARAMS) {
 475         FloatRegister r = as_FloatRegister(fp_reg);
 476         regs[i].set2(r->as_VMReg());
 477         fp_reg++;
 478       } else {
 479         regs[i].set2(VMRegImpl::stack2reg(slot));
 480         slot+=2;
 481       }
 482       break;
 483     case T_VOID:
 484       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 485       regs[i].set_bad();
 486       break;
 487     default:
 488       ShouldNotReachHere();
 489     }
 490   }
 491   return slot;
 492 
 493 #else // AARCH64
 494 
 495   int slot = 0;
 496   int ireg = 0;
 497 #ifdef __ABI_HARD__
 498   int fp_slot = 0;
 499   int single_fpr_slot = 0;
 500 #endif // __ABI_HARD__
 501   for (int i = 0; i < total_args_passed; i++) {
 502     switch (sig_bt[i]) {
 503     case T_SHORT:
 504     case T_CHAR:
 505     case T_BYTE:
 506     case T_BOOLEAN:
 507     case T_INT:
 508     case T_ARRAY:
 509     case T_OBJECT:
 510     case T_ADDRESS:
 511     case T_METADATA:
 512 #ifndef __ABI_HARD__
 513     case T_FLOAT:
 514 #endif // !__ABI_HARD__
 515       if (ireg < 4) {
 516         Register r = as_Register(ireg);
 517         regs[i].set1(r->as_VMReg());
 518         ireg++;
 519       } else {
 520         regs[i].set1(VMRegImpl::stack2reg(slot));
 521         slot++;
 522       }
 523       break;
 524     case T_LONG:
 525 #ifndef __ABI_HARD__
 526     case T_DOUBLE:
 527 #endif // !__ABI_HARD__
 528       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 529       if (ireg <= 2) {
 530 #if (ALIGN_WIDE_ARGUMENTS == 1)
 531         if(ireg & 1) ireg++;  // Aligned location required
 532 #endif
 533         Register r1 = as_Register(ireg);
 534         Register r2 = as_Register(ireg + 1);
 535         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 536         ireg += 2;
 537 #if (ALIGN_WIDE_ARGUMENTS == 0)
 538       } else if (ireg == 3) {
 539         // uses R3 + one stack slot
 540         Register r = as_Register(ireg);
 541         regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg());
 542         ireg += 1;
 543         slot += 1;
 544 #endif
 545       } else {
 546         if (slot & 1) slot++; // Aligned location required
 547         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 548         slot += 2;
 549         ireg = 4;
 550       }
 551       break;
 552     case T_VOID:
 553       regs[i].set_bad();
 554       break;
 555 #ifdef __ABI_HARD__
 556     case T_FLOAT:
 557       if ((fp_slot < 16)||(single_fpr_slot & 1)) {
 558         if ((single_fpr_slot & 1) == 0) {
 559           single_fpr_slot = fp_slot;
 560           fp_slot += 2;
 561         }
 562         FloatRegister r = as_FloatRegister(single_fpr_slot);
 563         single_fpr_slot++;
 564         regs[i].set1(r->as_VMReg());
 565       } else {
 566         regs[i].set1(VMRegImpl::stack2reg(slot));
 567         slot++;
 568       }
 569       break;
 570     case T_DOUBLE:
 571       assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
 572       if (fp_slot <= 14) {
 573         FloatRegister r1 = as_FloatRegister(fp_slot);
 574         FloatRegister r2 = as_FloatRegister(fp_slot+1);
 575         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 576         fp_slot += 2;
 577       } else {
 578         if(slot & 1) slot++;
 579         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 580         slot += 2;
 581         single_fpr_slot = 16;
 582       }
 583       break;
 584 #endif // __ABI_HARD__
 585     default:
 586       ShouldNotReachHere();
 587     }
 588   }
 589   return slot;
 590 #endif // AARCH64
 591 }
 592 
 593 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 594                                            VMRegPair *regs,
 595                                            int total_args_passed,
 596                                            int is_outgoing) {
 597 #ifdef AARCH64
 598   // C calling convention on AArch64 is good enough.
 599   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 600 #else
 601 #ifdef __SOFTFP__
 602   // soft float is the same as the C calling convention.
 603   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 604 #endif // __SOFTFP__
 605   (void) is_outgoing;
 606   int slot = 0;
 607   int ireg = 0;
 608   int freg = 0;
 609   int single_fpr = 0;
 610 
 611   for (int i = 0; i < total_args_passed; i++) {
 612     switch (sig_bt[i]) {
 613     case T_SHORT:
 614     case T_CHAR:
 615     case T_BYTE:
 616     case T_BOOLEAN:
 617     case T_INT:
 618     case T_ARRAY:
 619     case T_OBJECT:
 620     case T_ADDRESS:
 621       if (ireg < 4) {
 622         Register r = as_Register(ireg++);
 623         regs[i].set1(r->as_VMReg());
 624       } else {
 625         regs[i].set1(VMRegImpl::stack2reg(slot++));
 626       }
 627       break;
 628     case T_FLOAT:
 629       // C2 utilizes S14/S15 for mem-mem moves
 630       if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) {
 631         if ((single_fpr & 1) == 0) {
 632           single_fpr = freg;
 633           freg += 2;
 634         }
 635         FloatRegister r = as_FloatRegister(single_fpr++);
 636         regs[i].set1(r->as_VMReg());
 637       } else {
 638         regs[i].set1(VMRegImpl::stack2reg(slot++));
 639       }
 640       break;
 641     case T_DOUBLE:
 642       // C2 utilizes S14/S15 for mem-mem moves
 643       if (freg <= 14 COMPILER2_PRESENT(-2)) {
 644         FloatRegister r1 = as_FloatRegister(freg);
 645         FloatRegister r2 = as_FloatRegister(freg + 1);
 646         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 647         freg += 2;
 648       } else {
 649         // Keep internally the aligned calling convention,
 650         // ignoring ALIGN_WIDE_ARGUMENTS
 651         if (slot & 1) slot++;
 652         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 653         slot += 2;
 654         single_fpr = 16;
 655       }
 656       break;
 657     case T_LONG:
 658       // Keep internally the aligned calling convention,
 659       // ignoring ALIGN_WIDE_ARGUMENTS
 660       if (ireg <= 2) {
 661         if (ireg & 1) ireg++;
 662         Register r1 = as_Register(ireg);
 663         Register r2 = as_Register(ireg + 1);
 664         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 665         ireg += 2;
 666       } else {
 667         if (slot & 1) slot++;
 668         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 669         slot += 2;
 670         ireg = 4;
 671       }
 672       break;
 673     case T_VOID:
 674       regs[i].set_bad();
 675       break;
 676     default:
 677       ShouldNotReachHere();
 678     }
 679   }
 680 
 681   if (slot & 1) slot++;
 682   return slot;
 683 #endif // AARCH64
 684 }
 685 
 686 static void patch_callers_callsite(MacroAssembler *masm) {
 687   Label skip;
 688 
 689   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 690   __ cbz(Rtemp, skip);
 691 
 692 #ifdef AARCH64
 693   push_param_registers(masm, FPR_PARAMS);
 694   __ raw_push(LR, ZR);
 695 #else
 696   // Pushing an even number of registers for stack alignment.
 697   // Selecting R9, which had to be saved anyway for some platforms.
 698   __ push(RegisterSet(R0, R3) | R9 | LR);
 699   __ fpush_hardfp(FloatRegisterSet(D0, 8));
 700 #endif // AARCH64
 701 
 702   __ mov(R0, Rmethod);
 703   __ mov(R1, LR);
 704   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 705 
 706 #ifdef AARCH64
 707   __ raw_pop(LR, ZR);
 708   pop_param_registers(masm, FPR_PARAMS);
 709 #else
 710   __ fpop_hardfp(FloatRegisterSet(D0, 8));
 711   __ pop(RegisterSet(R0, R3) | R9 | LR);
 712 #endif // AARCH64
 713 
 714   __ bind(skip);
 715 }
 716 
 717 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 718                                     int total_args_passed, int comp_args_on_stack,
 719                                     const BasicType *sig_bt, const VMRegPair *regs) {
 720   // TODO: ARM - May be can use ldm to load arguments
 721   const Register tmp = Rtemp; // avoid erasing R5_mh
 722 
 723   // Next assert may not be needed but safer. Extra analysis required
 724   // if this there is not enough free registers and we need to use R5 here.
 725   assert_different_registers(tmp, R5_mh);
 726 
 727   // 6243940 We might end up in handle_wrong_method if
 728   // the callee is deoptimized as we race thru here. If that
 729   // happens we don't want to take a safepoint because the
 730   // caller frame will look interpreted and arguments are now
 731   // "compiled" so it is much better to make this transition
 732   // invisible to the stack walking code. Unfortunately if
 733   // we try and find the callee by normal means a safepoint
 734   // is possible. So we stash the desired callee in the thread
 735   // and the vm will find there should this case occur.
 736   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
 737   __ str(Rmethod, callee_target_addr);
 738 
 739 #ifdef AARCH64
 740 
 741   assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod);
 742   assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams);
 743 
 744   if (comp_args_on_stack) {
 745     __ sub_slow(SP, SP, align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes));
 746   }
 747 
 748   for (int i = 0; i < total_args_passed; i++) {
 749     if (sig_bt[i] == T_VOID) {
 750       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 751       continue;
 752     }
 753     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 754 
 755     int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
 756     Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
 757 
 758     VMReg r = regs[i].first();
 759     bool full_word = regs[i].second()->is_valid();
 760 
 761     if (r->is_stack()) {
 762       if (full_word) {
 763         __ ldr(tmp, source_addr);
 764         __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 765       } else {
 766         __ ldr_w(tmp, source_addr);
 767         __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 768       }
 769     } else if (r->is_Register()) {
 770       if (full_word) {
 771         __ ldr(r->as_Register(), source_addr);
 772       } else {
 773         __ ldr_w(r->as_Register(), source_addr);
 774       }
 775     } else if (r->is_FloatRegister()) {
 776       if (sig_bt[i] == T_DOUBLE) {
 777         __ ldr_d(r->as_FloatRegister(), source_addr);
 778       } else {
 779         __ ldr_s(r->as_FloatRegister(), source_addr);
 780       }
 781     } else {
 782       assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
 783     }
 784   }
 785 
 786   __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
 787   __ br(tmp);
 788 
 789 #else
 790 
 791   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 792 
 793   const Register initial_sp = Rmethod; // temporarily scratched
 794 
 795   // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
 796   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
 797 
 798   __ mov(initial_sp, SP);
 799 
 800   if (comp_args_on_stack) {
 801     __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
 802   }
 803   __ bic(SP, SP, StackAlignmentInBytes - 1);
 804 
 805   for (int i = 0; i < total_args_passed; i++) {
 806     if (sig_bt[i] == T_VOID) {
 807       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 808       continue;
 809     }
 810     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 811     int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i);
 812 
 813     VMReg r_1 = regs[i].first();
 814     VMReg r_2 = regs[i].second();
 815     if (r_1->is_stack()) {
 816       int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size;
 817       if (!r_2->is_valid()) {
 818         __ ldr(tmp, Address(initial_sp, arg_offset));
 819         __ str(tmp, Address(SP, stack_offset));
 820       } else {
 821         __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 822         __ str(tmp, Address(SP, stack_offset));
 823         __ ldr(tmp, Address(initial_sp, arg_offset));
 824         __ str(tmp, Address(SP, stack_offset + wordSize));
 825       }
 826     } else if (r_1->is_Register()) {
 827       if (!r_2->is_valid()) {
 828         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset));
 829       } else {
 830         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 831         __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
 832       }
 833     } else if (r_1->is_FloatRegister()) {
 834 #ifdef __SOFTFP__
 835       ShouldNotReachHere();
 836 #endif // __SOFTFP__
 837       if (!r_2->is_valid()) {
 838         __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
 839       } else {
 840         __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 841       }
 842     } else {
 843       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 844     }
 845   }
 846 
 847   // restore Rmethod (scratched for initial_sp)
 848   __ ldr(Rmethod, callee_target_addr);
 849   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 850 
 851 #endif // AARCH64
 852 }
 853 
 854 static void gen_c2i_adapter(MacroAssembler *masm,
 855                             int total_args_passed,  int comp_args_on_stack,
 856                             const BasicType *sig_bt, const VMRegPair *regs,
 857                             Label& skip_fixup) {
 858   // TODO: ARM - May be can use stm to deoptimize arguments
 859   const Register tmp = Rtemp;
 860 
 861   patch_callers_callsite(masm);
 862   __ bind(skip_fixup);
 863 
 864   __ mov(Rsender_sp, SP); // not yet saved
 865 
 866 #ifdef AARCH64
 867 
 868   int extraspace = align_up(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes);
 869   if (extraspace) {
 870     __ sub(SP, SP, extraspace);
 871   }
 872 
 873   for (int i = 0; i < total_args_passed; i++) {
 874     if (sig_bt[i] == T_VOID) {
 875       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 876       continue;
 877     }
 878 
 879     int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
 880     Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
 881 
 882     VMReg r = regs[i].first();
 883     bool full_word = regs[i].second()->is_valid();
 884 
 885     if (r->is_stack()) {
 886       if (full_word) {
 887         __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
 888         __ str(tmp, dest_addr);
 889       } else {
 890         __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
 891         __ str_w(tmp, dest_addr);
 892       }
 893     } else if (r->is_Register()) {
 894       if (full_word) {
 895         __ str(r->as_Register(), dest_addr);
 896       } else {
 897         __ str_w(r->as_Register(), dest_addr);
 898       }
 899     } else if (r->is_FloatRegister()) {
 900       if (sig_bt[i] == T_DOUBLE) {
 901         __ str_d(r->as_FloatRegister(), dest_addr);
 902       } else {
 903         __ str_s(r->as_FloatRegister(), dest_addr);
 904       }
 905     } else {
 906       assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
 907     }
 908   }
 909 
 910   __ mov(Rparams, SP);
 911 
 912   __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset()));
 913   __ br(tmp);
 914 
 915 #else
 916 
 917   int extraspace = total_args_passed * Interpreter::stackElementSize;
 918   if (extraspace) {
 919     __ sub_slow(SP, SP, extraspace);
 920   }
 921 
 922   for (int i = 0; i < total_args_passed; i++) {
 923     if (sig_bt[i] == T_VOID) {
 924       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 925       continue;
 926     }
 927     int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
 928 
 929     VMReg r_1 = regs[i].first();
 930     VMReg r_2 = regs[i].second();
 931     if (r_1->is_stack()) {
 932       int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 933       if (!r_2->is_valid()) {
 934         __ ldr(tmp, Address(SP, arg_offset));
 935         __ str(tmp, Address(SP, stack_offset));
 936       } else {
 937         __ ldr(tmp, Address(SP, arg_offset));
 938         __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize));
 939         __ ldr(tmp, Address(SP, arg_offset + wordSize));
 940         __ str(tmp, Address(SP, stack_offset));
 941       }
 942     } else if (r_1->is_Register()) {
 943       if (!r_2->is_valid()) {
 944         __ str(r_1->as_Register(), Address(SP, stack_offset));
 945       } else {
 946         __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
 947         __ str(r_2->as_Register(), Address(SP, stack_offset));
 948       }
 949     } else if (r_1->is_FloatRegister()) {
 950 #ifdef __SOFTFP__
 951       ShouldNotReachHere();
 952 #endif // __SOFTFP__
 953       if (!r_2->is_valid()) {
 954         __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
 955       } else {
 956         __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
 957       }
 958     } else {
 959       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 960     }
 961   }
 962 
 963   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 964 
 965 #endif // AARCH64
 966 }
 967 
 968 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 969                                                             int total_args_passed,
 970                                                             int comp_args_on_stack,
 971                                                             const BasicType *sig_bt,
 972                                                             const VMRegPair *regs,
 973                                                             AdapterFingerPrint* fingerprint) {
 974   address i2c_entry = __ pc();
 975   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 976 
 977   address c2i_unverified_entry = __ pc();
 978   Label skip_fixup;
 979   const Register receiver       = R0;
 980   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
 981   const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4);
 982 
 983   __ load_klass(receiver_klass, receiver);
 984   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
 985   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
 986   __ cmp(receiver_klass, holder_klass);
 987 
 988 #ifdef AARCH64
 989   Label ic_miss;
 990   __ b(ic_miss, ne);
 991   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 992   __ cbz(Rtemp, skip_fixup);
 993   __ bind(ic_miss);
 994   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
 995 #else
 996   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
 997   __ cmp(Rtemp, 0, eq);
 998   __ b(skip_fixup, eq);
 999   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
1000 #endif // AARCH64
1001 
1002   address c2i_entry = __ pc();
1003   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1004 
1005   __ flush();
1006   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1007 }
1008 
1009 
1010 static int reg2offset_in(VMReg r) {
1011   // Account for saved FP and LR
1012   return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
1013 }
1014 
1015 static int reg2offset_out(VMReg r) {
1016   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1017 }
1018 
1019 
1020 static void verify_oop_args(MacroAssembler* masm,
1021                             const methodHandle& method,
1022                             const BasicType* sig_bt,
1023                             const VMRegPair* regs) {
1024   Register temp_reg = Rmethod;  // not part of any compiled calling seq
1025   if (VerifyOops) {
1026     for (int i = 0; i < method->size_of_parameters(); i++) {
1027       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
1028         VMReg r = regs[i].first();
1029         assert(r->is_valid(), "bad oop arg");
1030         if (r->is_stack()) {
1031           __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1032           __ verify_oop(temp_reg);
1033         } else {
1034           __ verify_oop(r->as_Register());
1035         }
1036       }
1037     }
1038   }
1039 }
1040 
1041 static void gen_special_dispatch(MacroAssembler* masm,
1042                                  const methodHandle& method,
1043                                  const BasicType* sig_bt,
1044                                  const VMRegPair* regs) {
1045   verify_oop_args(masm, method, sig_bt, regs);
1046   vmIntrinsics::ID iid = method->intrinsic_id();
1047 
1048   // Now write the args into the outgoing interpreter space
1049   bool     has_receiver   = false;
1050   Register receiver_reg   = noreg;
1051   int      member_arg_pos = -1;
1052   Register member_reg     = noreg;
1053   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1054   if (ref_kind != 0) {
1055     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1056     member_reg = Rmethod;  // known to be free at this point
1057     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1058   } else if (iid == vmIntrinsics::_invokeBasic) {
1059     has_receiver = true;
1060   } else {
1061     fatal("unexpected intrinsic id %d", iid);
1062   }
1063 
1064   if (member_reg != noreg) {
1065     // Load the member_arg into register, if necessary.
1066     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1067     VMReg r = regs[member_arg_pos].first();
1068     if (r->is_stack()) {
1069       __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1070     } else {
1071       // no data motion is needed
1072       member_reg = r->as_Register();
1073     }
1074   }
1075 
1076   if (has_receiver) {
1077     // Make sure the receiver is loaded into a register.
1078     assert(method->size_of_parameters() > 0, "oob");
1079     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1080     VMReg r = regs[0].first();
1081     assert(r->is_valid(), "bad receiver arg");
1082     if (r->is_stack()) {
1083       // Porting note:  This assumes that compiled calling conventions always
1084       // pass the receiver oop in a register.  If this is not true on some
1085       // platform, pick a temp and load the receiver from stack.
1086       assert(false, "receiver always in a register");
1087       receiver_reg = j_rarg0;  // known to be free at this point
1088       __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1089     } else {
1090       // no data motion is needed
1091       receiver_reg = r->as_Register();
1092     }
1093   }
1094 
1095   // Figure out which address we are really jumping to:
1096   MethodHandles::generate_method_handle_dispatch(masm, iid,
1097                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1098 }
1099 
1100 // ---------------------------------------------------------------------------
1101 // Generate a native wrapper for a given method.  The method takes arguments
1102 // in the Java compiled code convention, marshals them to the native
1103 // convention (handlizes oops, etc), transitions to native, makes the call,
1104 // returns to java state (possibly blocking), unhandlizes any result and
1105 // returns.
1106 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1107                                                 const methodHandle& method,
1108                                                 int compile_id,
1109                                                 BasicType* in_sig_bt,
1110                                                 VMRegPair* in_regs,
1111                                                 BasicType ret_type,
1112                                                 address critical_entry) {
1113   if (method->is_method_handle_intrinsic()) {
1114     vmIntrinsics::ID iid = method->intrinsic_id();
1115     intptr_t start = (intptr_t)__ pc();
1116     int vep_offset = ((intptr_t)__ pc()) - start;
1117     gen_special_dispatch(masm,
1118                          method,
1119                          in_sig_bt,
1120                          in_regs);
1121     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1122     __ flush();
1123     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1124     return nmethod::new_native_nmethod(method,
1125                                        compile_id,
1126                                        masm->code(),
1127                                        vep_offset,
1128                                        frame_complete,
1129                                        stack_slots / VMRegImpl::slots_per_word,
1130                                        in_ByteSize(-1),
1131                                        in_ByteSize(-1),
1132                                        (OopMapSet*)NULL);
1133   }
1134   // Arguments for JNI method include JNIEnv and Class if static
1135 
1136   // Usage of Rtemp should be OK since scratched by native call
1137 
1138   bool is_static = method->is_static();
1139 
1140   const int total_in_args = method->size_of_parameters();
1141   int total_c_args = total_in_args + 1;
1142   if (is_static) {
1143     total_c_args++;
1144   }
1145 
1146   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1147   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1148 
1149   int argc = 0;
1150   out_sig_bt[argc++] = T_ADDRESS;
1151   if (is_static) {
1152     out_sig_bt[argc++] = T_OBJECT;
1153   }
1154 
1155   int i;
1156   for (i = 0; i < total_in_args; i++) {
1157     out_sig_bt[argc++] = in_sig_bt[i];
1158   }
1159 
1160   int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
1161   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1162   // Since object arguments need to be wrapped, we must preserve space
1163   // for those object arguments which come in registers (GPR_PARAMS maximum)
1164   // plus one more slot for Klass handle (for static methods)
1165   int oop_handle_offset = stack_slots;
1166   stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word;
1167 
1168   // Plus a lock if needed
1169   int lock_slot_offset = 0;
1170   if (method->is_synchronized()) {
1171     lock_slot_offset = stack_slots;
1172     assert(sizeof(BasicLock) == wordSize, "adjust this code");
1173     stack_slots += VMRegImpl::slots_per_word;
1174   }
1175 
1176   // Space to save return address and FP
1177   stack_slots += 2 * VMRegImpl::slots_per_word;
1178 
1179   // Calculate the final stack size taking account of alignment
1180   stack_slots = align_up(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size);
1181   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1182   int lock_slot_fp_offset = stack_size - 2 * wordSize -
1183     lock_slot_offset * VMRegImpl::stack_slot_size;
1184 
1185   // Unverified entry point
1186   address start = __ pc();
1187 
1188   // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
1189   const Register receiver = R0; // see receiverOpr()
1190   __ load_klass(Rtemp, receiver);
1191   __ cmp(Rtemp, Ricklass);
1192   Label verified;
1193 
1194   __ b(verified, eq); // jump over alignment no-ops too
1195   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
1196   __ align(CodeEntryAlignment);
1197 
1198   // Verified entry point
1199   __ bind(verified);
1200   int vep_offset = __ pc() - start;
1201 
1202 #ifdef AARCH64
1203   // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
1204   __ nop();
1205 #endif // AARCH64
1206 
1207   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
1208     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
1209     // instead of doing a full VM transition once it's been computed.
1210     Label slow_case;
1211     const Register obj_reg = R0;
1212 
1213     // Unlike for Object.hashCode, System.identityHashCode is static method and
1214     // gets object as argument instead of the receiver.
1215     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
1216       assert(method->is_static(), "method should be static");
1217       // return 0 for null reference input, return val = R0 = obj_reg = 0
1218 #ifdef AARCH64
1219       Label Continue;
1220       __ cbnz(obj_reg, Continue);
1221       __ ret();
1222       __ bind(Continue);
1223 #else
1224       __ cmp(obj_reg, 0);
1225       __ bx(LR, eq);
1226 #endif
1227     }
1228 
1229     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1230 
1231     assert(markOopDesc::unlocked_value == 1, "adjust this code");
1232     __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
1233 
1234     if (UseBiasedLocking) {
1235       assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
1236       __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
1237     }
1238 
1239 #ifdef AARCH64
1240     __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place);
1241     __ b(slow_case, eq);
1242     __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift);
1243     __ ret();
1244 #else
1245     __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
1246     __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
1247     __ bx(LR, ne);
1248 #endif // AARCH64
1249 
1250     __ bind(slow_case);
1251   }
1252 
1253   // Bang stack pages
1254   __ arm_stack_overflow_check(stack_size, Rtemp);
1255 
1256   // Setup frame linkage
1257   __ raw_push(FP, LR);
1258   __ mov(FP, SP);
1259   __ sub_slow(SP, SP, stack_size - 2*wordSize);
1260 
1261   int frame_complete = __ pc() - start;
1262 
1263   OopMapSet* oop_maps = new OopMapSet();
1264   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1265   const int extra_args = is_static ? 2 : 1;
1266   int receiver_offset = -1;
1267   int fp_regs_in_arguments = 0;
1268 
1269   for (i = total_in_args; --i >= 0; ) {
1270     switch (in_sig_bt[i]) {
1271     case T_ARRAY:
1272     case T_OBJECT: {
1273       VMReg src = in_regs[i].first();
1274       VMReg dst = out_regs[i + extra_args].first();
1275       if (src->is_stack()) {
1276         assert(dst->is_stack(), "must be");
1277         assert(i != 0, "Incoming receiver is always in a register");
1278         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1279         __ cmp(Rtemp, 0);
1280 #ifdef AARCH64
1281         __ add(Rtemp, FP, reg2offset_in(src));
1282         __ csel(Rtemp, ZR, Rtemp, eq);
1283 #else
1284         __ add(Rtemp, FP, reg2offset_in(src), ne);
1285 #endif // AARCH64
1286         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1287         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1288         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1289       } else {
1290         int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1291         __ str(src->as_Register(), Address(SP, offset));
1292         map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1293         if ((i == 0) && (!is_static)) {
1294           receiver_offset = offset;
1295         }
1296         oop_handle_offset += VMRegImpl::slots_per_word;
1297 
1298 #ifdef AARCH64
1299         __ cmp(src->as_Register(), 0);
1300         __ add(Rtemp, SP, offset);
1301         __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq);
1302         if (dst->is_stack()) {
1303           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1304         }
1305 #else
1306         if (dst->is_stack()) {
1307           __ movs(Rtemp, src->as_Register());
1308           __ add(Rtemp, SP, offset, ne);
1309           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1310         } else {
1311           __ movs(dst->as_Register(), src->as_Register());
1312           __ add(dst->as_Register(), SP, offset, ne);
1313         }
1314 #endif // AARCH64
1315       }
1316     }
1317 
1318     case T_VOID:
1319       break;
1320 
1321 #ifdef AARCH64
1322     case T_FLOAT:
1323     case T_DOUBLE: {
1324       VMReg src = in_regs[i].first();
1325       VMReg dst = out_regs[i + extra_args].first();
1326       if (src->is_stack()) {
1327         assert(dst->is_stack(), "must be");
1328         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1329         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1330       } else {
1331         assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be");
1332         assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be");
1333         fp_regs_in_arguments++;
1334       }
1335       break;
1336     }
1337 #else // AARCH64
1338 
1339 #ifdef __SOFTFP__
1340     case T_DOUBLE:
1341 #endif
1342     case T_LONG: {
1343       VMReg src_1 = in_regs[i].first();
1344       VMReg src_2 = in_regs[i].second();
1345       VMReg dst_1 = out_regs[i + extra_args].first();
1346       VMReg dst_2 = out_regs[i + extra_args].second();
1347 #if (ALIGN_WIDE_ARGUMENTS == 0)
1348       // C convention can mix a register and a stack slot for a
1349       // 64-bits native argument.
1350 
1351       // Note: following code should work independently of whether
1352       // the Java calling convention follows C convention or whether
1353       // it aligns 64-bit values.
1354       if (dst_2->is_Register()) {
1355         if (src_1->as_Register() != dst_1->as_Register()) {
1356           assert(src_1->as_Register() != dst_2->as_Register() &&
1357                  src_2->as_Register() != dst_2->as_Register(), "must be");
1358           __ mov(dst_2->as_Register(), src_2->as_Register());
1359           __ mov(dst_1->as_Register(), src_1->as_Register());
1360         } else {
1361           assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1362         }
1363       } else if (src_2->is_Register()) {
1364         if (dst_1->is_Register()) {
1365           // dst mixes a register and a stack slot
1366           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1367           assert(src_1->as_Register() != dst_1->as_Register(), "must be");
1368           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1369           __ mov(dst_1->as_Register(), src_1->as_Register());
1370         } else {
1371           // registers to stack slots
1372           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1373           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1374           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1375         }
1376       } else if (src_1->is_Register()) {
1377         if (dst_1->is_Register()) {
1378           // src and dst must be R3 + stack slot
1379           assert(dst_1->as_Register() == src_1->as_Register(), "must be");
1380           __ ldr(Rtemp,    Address(FP, reg2offset_in(src_2)));
1381           __ str(Rtemp,    Address(SP, reg2offset_out(dst_2)));
1382         } else {
1383           // <R3,stack> -> <stack,stack>
1384           assert(dst_2->is_stack() && src_2->is_stack(), "must be");
1385           __ ldr(LR, Address(FP, reg2offset_in(src_2)));
1386           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1387           __ str(LR, Address(SP, reg2offset_out(dst_2)));
1388         }
1389       } else {
1390         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1391         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1392         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1393         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1394         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1395       }
1396 #else // ALIGN_WIDE_ARGUMENTS
1397       if (src_1->is_stack()) {
1398         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1399         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1400         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1401         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1402         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1403       } else if (dst_1->is_stack()) {
1404         assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1405         __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1406         __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1407       } else if (src_1->as_Register() == dst_1->as_Register()) {
1408         assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1409       } else {
1410         assert(src_1->as_Register() != dst_2->as_Register() &&
1411                src_2->as_Register() != dst_2->as_Register(), "must be");
1412         __ mov(dst_2->as_Register(), src_2->as_Register());
1413         __ mov(dst_1->as_Register(), src_1->as_Register());
1414       }
1415 #endif // ALIGN_WIDE_ARGUMENTS
1416       break;
1417     }
1418 
1419 #if (!defined __SOFTFP__ && !defined __ABI_HARD__)
1420     case T_FLOAT: {
1421       VMReg src = in_regs[i].first();
1422       VMReg dst = out_regs[i + extra_args].first();
1423       if (src->is_stack()) {
1424         assert(dst->is_stack(), "must be");
1425         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1426         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1427       } else if (dst->is_stack()) {
1428         __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst)));
1429       } else {
1430         assert(src->is_FloatRegister() && dst->is_Register(), "must be");
1431         __ fmrs(dst->as_Register(), src->as_FloatRegister());
1432       }
1433       break;
1434     }
1435 
1436     case T_DOUBLE: {
1437       VMReg src_1 = in_regs[i].first();
1438       VMReg src_2 = in_regs[i].second();
1439       VMReg dst_1 = out_regs[i + extra_args].first();
1440       VMReg dst_2 = out_regs[i + extra_args].second();
1441       if (src_1->is_stack()) {
1442         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1443         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1444         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1445         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1446         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1447       } else if (dst_1->is_stack()) {
1448         assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be");
1449         __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1)));
1450 #if (ALIGN_WIDE_ARGUMENTS == 0)
1451       } else if (dst_2->is_stack()) {
1452         assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned
1453         // double register must go into R3 + one stack slot
1454         __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister());
1455         __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
1456 #endif
1457       } else {
1458         assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be");
1459         __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister());
1460       }
1461       break;
1462     }
1463 #endif // __SOFTFP__
1464 
1465 #ifdef __ABI_HARD__
1466     case T_FLOAT: {
1467       VMReg src = in_regs[i].first();
1468       VMReg dst = out_regs[i + extra_args].first();
1469       if (src->is_stack()) {
1470         if (dst->is_stack()) {
1471           __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1472           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1473         } else {
1474           // C2 Java calling convention does not populate S14 and S15, therefore
1475           // those need to be loaded from stack here
1476           __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src)));
1477           fp_regs_in_arguments++;
1478         }
1479       } else {
1480         assert(src->is_FloatRegister(), "must be");
1481         fp_regs_in_arguments++;
1482       }
1483       break;
1484     }
1485     case T_DOUBLE: {
1486       VMReg src_1 = in_regs[i].first();
1487       VMReg src_2 = in_regs[i].second();
1488       VMReg dst_1 = out_regs[i + extra_args].first();
1489       VMReg dst_2 = out_regs[i + extra_args].second();
1490       if (src_1->is_stack()) {
1491         if (dst_1->is_stack()) {
1492           assert(dst_2->is_stack(), "must be");
1493           __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1494           __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1495           __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1496           __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1497         } else {
1498           // C2 Java calling convention does not populate S14 and S15, therefore
1499           // those need to be loaded from stack here
1500           __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1501           fp_regs_in_arguments += 2;
1502         }
1503       } else {
1504         assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1505         fp_regs_in_arguments += 2;
1506       }
1507       break;
1508     }
1509 #endif // __ABI_HARD__
1510 #endif // AARCH64
1511 
1512     default: {
1513       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1514       VMReg src = in_regs[i].first();
1515       VMReg dst = out_regs[i + extra_args].first();
1516       if (src->is_stack()) {
1517         assert(dst->is_stack(), "must be");
1518         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1519         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1520       } else if (dst->is_stack()) {
1521         __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1522       } else {
1523         assert(src->is_Register() && dst->is_Register(), "must be");
1524         __ mov(dst->as_Register(), src->as_Register());
1525       }
1526     }
1527     }
1528   }
1529 
1530   // Get Klass mirror
1531   int klass_offset = -1;
1532   if (is_static) {
1533     klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1534     __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1535     __ add(c_rarg1, SP, klass_offset);
1536     __ str(Rtemp, Address(SP, klass_offset));
1537     map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1538   }
1539 
1540   // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1541   int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1542   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1543   oop_maps->add_gc_map(pc_offset, map);
1544 
1545 #ifndef AARCH64
1546   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1547   __ membar(MacroAssembler::StoreStore, Rtemp);
1548 #endif // !AARCH64
1549 
1550   // RedefineClasses() tracing support for obsolete method entry
1551   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1552 #ifdef AARCH64
1553     __ NOT_TESTED();
1554 #endif
1555     __ save_caller_save_registers();
1556     __ mov(R0, Rthread);
1557     __ mov_metadata(R1, method());
1558     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1559     __ restore_caller_save_registers();
1560   }
1561 
1562   const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5);
1563   const Register sync_obj    = AARCH64_ONLY(R21) NOT_AARCH64(R6);
1564   const Register disp_hdr    = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11);
1565   const Register tmp         = AARCH64_ONLY(R23) NOT_AARCH64(R8);
1566 
1567   Label slow_lock, slow_lock_biased, lock_done, fast_lock, leave;
1568   if (method->is_synchronized()) {
1569     // The first argument is a handle to sync object (a class or an instance)
1570     __ ldr(sync_obj, Address(R1));
1571     // Remember the handle for the unlocking code
1572     __ mov(sync_handle, R1);
1573 
1574     if(UseBiasedLocking) {
1575       __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1576     }
1577 
1578     const Register mark = tmp;
1579 #ifdef AARCH64
1580     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1581     assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
1582 
1583     __ ldr(mark, sync_obj);
1584 
1585     // Test if object is already locked
1586     assert(markOopDesc::unlocked_value == 1, "adjust this code");
1587     __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock);
1588 
1589     // Check for recursive lock
1590     // See comments in InterpreterMacroAssembler::lock_object for
1591     // explanations on the fast recursive locking check.
1592     __ mov(Rtemp, SP);
1593     __ sub(Rtemp, mark, Rtemp);
1594     intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
1595     Assembler::LogicalImmediate imm(mask, false);
1596     __ ands(Rtemp, Rtemp, imm);
1597     __ b(slow_lock, ne);
1598 
1599     // Recursive locking: store 0 into a lock record
1600     __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1601     __ b(lock_done);
1602 
1603     __ bind(fast_lock);
1604     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1605 
1606     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1607 #else
1608     // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1609     // That would be acceptable as either CAS or slow case path is taken in that case
1610 
1611     __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1612     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1613     __ tst(mark, markOopDesc::unlocked_value);
1614     __ b(fast_lock, ne);
1615 
1616     // Check for recursive lock
1617     // See comments in InterpreterMacroAssembler::lock_object for
1618     // explanations on the fast recursive locking check.
1619     // Check independently the low bits and the distance to SP
1620     // -1- test low 2 bits
1621     __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1622     // -2- test (hdr - SP) if the low two bits are 0
1623     __ sub(Rtemp, mark, SP, eq);
1624     __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1625     // If still 'eq' then recursive locking OK: set displaced header to 0
1626     __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1627     __ b(lock_done, eq);
1628     __ b(slow_lock);
1629 
1630     __ bind(fast_lock);
1631     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1632 
1633     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1634 #endif // AARCH64
1635 
1636     __ bind(lock_done);
1637   }
1638 
1639   // Get JNIEnv*
1640   __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1641 
1642   // Perform thread state transition
1643   __ mov(Rtemp, _thread_in_native);
1644 #ifdef AARCH64
1645   // stlr instruction is used to force all preceding writes to be observed prior to thread state change
1646   __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
1647   __ stlr_w(Rtemp, Rtemp2);
1648 #else
1649   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1650 #endif // AARCH64
1651 
1652   // Finally, call the native method
1653   __ call(method->native_function());
1654 
1655   // Set FPSCR/FPCR to a known state
1656   if (AlwaysRestoreFPU) {
1657     __ restore_default_fp_mode();
1658   }
1659 
1660   // Ensure a Boolean result is mapped to 0..1
1661   if (ret_type == T_BOOLEAN) {
1662     __ c2bool(R0);
1663   }
1664 
1665   // Do a safepoint check while thread is in transition state
1666   InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1667   Label call_safepoint_runtime, return_to_java;
1668   __ mov(Rtemp, _thread_in_native_trans);
1669   __ ldr_literal(R2, safepoint_state);
1670   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1671 
1672   // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1673   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1674 
1675   __ ldr_s32(R2, Address(R2));
1676   __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
1677   __ cmp(R2, SafepointSynchronize::_not_synchronized);
1678   __ cond_cmp(R3, 0, eq);
1679   __ b(call_safepoint_runtime, ne);
1680   __ bind(return_to_java);
1681 
1682   // Perform thread state transition and reguard stack yellow pages if needed
1683   Label reguard, reguard_done;
1684   __ mov(Rtemp, _thread_in_Java);
1685   __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset()));
1686   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1687 
1688   __ cmp(R2, JavaThread::stack_guard_yellow_reserved_disabled);
1689   __ b(reguard, eq);
1690   __ bind(reguard_done);
1691 
1692   Label slow_unlock, unlock_done, retry;
1693   if (method->is_synchronized()) {
1694     __ ldr(sync_obj, Address(sync_handle));
1695 
1696     if(UseBiasedLocking) {
1697       __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1698       // disp_hdr may not have been saved on entry with biased locking
1699       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1700     }
1701 
1702     // See C1_MacroAssembler::unlock_object() for more comments
1703     __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1704     __ cbz(R2, unlock_done);
1705 
1706     __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1707 
1708     __ bind(unlock_done);
1709   }
1710 
1711   // Set last java frame and handle block to zero
1712   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1713   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1714 
1715 #ifdef AARCH64
1716   __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1717   if (CheckJNICalls) {
1718     __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1719   }
1720 
1721 
1722   switch (ret_type) {
1723   case T_BOOLEAN:
1724     __ tst(R0, 0xff);
1725     __ cset(R0, ne);
1726     break;
1727   case T_CHAR   : __ zero_extend(R0, R0, 16);  break;
1728   case T_BYTE   : __ sign_extend(R0, R0,  8);  break;
1729   case T_SHORT  : __ sign_extend(R0, R0, 16);  break;
1730   case T_INT    : // fall through
1731   case T_LONG   : // fall through
1732   case T_VOID   : // fall through
1733   case T_FLOAT  : // fall through
1734   case T_DOUBLE : /* nothing to do */          break;
1735   case T_OBJECT : // fall through
1736   case T_ARRAY  : break; // See JNIHandles::resolve below
1737   default:
1738     ShouldNotReachHere();
1739   }
1740 #else
1741   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1742   if (CheckJNICalls) {
1743     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1744   }
1745 #endif // AARCH64
1746 
1747   // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1748   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1749     __ resolve_jobject(R0,      // value
1750                        Rtemp,   // tmp1
1751                        R1_tmp); // tmp2
1752   }
1753 
1754   // Any exception pending?
1755   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1756   __ mov(SP, FP);
1757 
1758 #ifdef AARCH64
1759   Label except;
1760   __ cbnz(Rtemp, except);
1761   __ raw_pop(FP, LR);
1762   __ ret();
1763 
1764   __ bind(except);
1765   // Pop the frame and forward the exception. Rexception_pc contains return address.
1766   __ raw_pop(FP, Rexception_pc);
1767 #else
1768   __ cmp(Rtemp, 0);
1769   // Pop the frame and return if no exception pending
1770   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1771   // Pop the frame and forward the exception. Rexception_pc contains return address.
1772   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1773   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1774 #endif // AARCH64
1775   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1776 
1777   // Safepoint operation and/or pending suspend request is in progress.
1778   // Save the return values and call the runtime function by hand.
1779   __ bind(call_safepoint_runtime);
1780   push_result_registers(masm, ret_type);
1781   __ mov(R0, Rthread);
1782   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1783   pop_result_registers(masm, ret_type);
1784   __ b(return_to_java);
1785 
1786   __ bind_literal(safepoint_state);
1787 
1788   // Reguard stack pages. Save native results around a call to C runtime.
1789   __ bind(reguard);
1790   push_result_registers(masm, ret_type);
1791   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1792   pop_result_registers(masm, ret_type);
1793   __ b(reguard_done);
1794 
1795   if (method->is_synchronized()) {
1796     // Locking slow case
1797     if(UseBiasedLocking) {
1798       __ bind(slow_lock_biased);
1799       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1800     }
1801 
1802     __ bind(slow_lock);
1803 
1804     push_param_registers(masm, fp_regs_in_arguments);
1805 
1806     // last_Java_frame is already set, so do call_VM manually; no exception can occur
1807     __ mov(R0, sync_obj);
1808     __ mov(R1, disp_hdr);
1809     __ mov(R2, Rthread);
1810     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1811 
1812     pop_param_registers(masm, fp_regs_in_arguments);
1813 
1814     __ b(lock_done);
1815 
1816     // Unlocking slow case
1817     __ bind(slow_unlock);
1818 
1819     push_result_registers(masm, ret_type);
1820 
1821     // Clear pending exception before reentering VM.
1822     // Can store the oop in register since it is a leaf call.
1823     assert_different_registers(Rtmp_save1, sync_obj, disp_hdr);
1824     __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1825     Register zero = __ zero_register(Rtemp);
1826     __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
1827     __ mov(R0, sync_obj);
1828     __ mov(R1, disp_hdr);
1829     __ mov(R2, Rthread);
1830     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1831     __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1832 
1833     pop_result_registers(masm, ret_type);
1834 
1835     __ b(unlock_done);
1836   }
1837 
1838   __ flush();
1839   return nmethod::new_native_nmethod(method,
1840                                      compile_id,
1841                                      masm->code(),
1842                                      vep_offset,
1843                                      frame_complete,
1844                                      stack_slots / VMRegImpl::slots_per_word,
1845                                      in_ByteSize(is_static ? klass_offset : receiver_offset),
1846                                      in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1847                                      oop_maps);
1848 }
1849 
1850 // this function returns the adjust size (in number of words) to a c2i adapter
1851 // activation for use during deoptimization
1852 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1853   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1854 #ifdef AARCH64
1855   extra_locals_size = align_up(extra_locals_size, StackAlignmentInBytes/BytesPerWord);
1856 #endif // AARCH64
1857   return extra_locals_size;
1858 }
1859 
1860 
1861 uint SharedRuntime::out_preserve_stack_slots() {
1862   return 0;
1863 }
1864 
1865 
1866 //------------------------------generate_deopt_blob----------------------------
1867 void SharedRuntime::generate_deopt_blob() {
1868   ResourceMark rm;
1869 #ifdef AARCH64
1870   CodeBuffer buffer("deopt_blob", 1024+256, 1);
1871 #else
1872   CodeBuffer buffer("deopt_blob", 1024, 1024);
1873 #endif
1874   int frame_size_in_words;
1875   OopMapSet* oop_maps;
1876   int reexecute_offset;
1877   int exception_in_tls_offset;
1878   int exception_offset;
1879 
1880   MacroAssembler* masm = new MacroAssembler(&buffer);
1881   Label cont;
1882   const Register Rkind   = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit
1883   const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
1884   const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
1885   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1886 
1887   address start = __ pc();
1888 
1889   oop_maps = new OopMapSet();
1890   // LR saved by caller (can be live in c2 method)
1891 
1892   // A deopt is a case where LR may be live in the c2 nmethod. So it's
1893   // not possible to call the deopt blob from the nmethod and pass the
1894   // address of the deopt handler of the nmethod in LR. What happens
1895   // now is that the caller of the deopt blob pushes the current
1896   // address so the deopt blob doesn't have to do it. This way LR can
1897   // be preserved, contains the live value from the nmethod and is
1898   // saved at R14/R30_offset here.
1899   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1900   __ mov(Rkind, Deoptimization::Unpack_deopt);
1901   __ b(cont);
1902 
1903   exception_offset = __ pc() - start;
1904 
1905   // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the
1906   // exception_in_tls_offset entry point.
1907   __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1908   __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1909   // Force return value to NULL to avoid confusing the escape analysis
1910   // logic. Everything is dead here anyway.
1911   __ mov(R0, 0);
1912 
1913   exception_in_tls_offset = __ pc() - start;
1914 
1915   // Exception data is in JavaThread structure
1916   // Patch the return address of the current frame
1917   __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset()));
1918   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1919   {
1920     const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure
1921     __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset()));
1922   }
1923   __ mov(Rkind, Deoptimization::Unpack_exception);
1924   __ b(cont);
1925 
1926   reexecute_offset = __ pc() - start;
1927 
1928   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1929   __ mov(Rkind, Deoptimization::Unpack_reexecute);
1930 
1931   // Calculate UnrollBlock and save the result in Rublock
1932   __ bind(cont);
1933   __ mov(R0, Rthread);
1934   __ mov(R1, Rkind);
1935 
1936   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1937   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1938   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info));
1939   if (pc_offset == -1) {
1940     pc_offset = __ offset();
1941   }
1942   oop_maps->add_gc_map(pc_offset, map);
1943   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1944 
1945   __ mov(Rublock, R0);
1946 
1947   // Reload Rkind from the UnrollBlock (might have changed)
1948   __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1949   Label noException;
1950   __ cmp_32(Rkind, Deoptimization::Unpack_exception);   // Was exception pending?
1951   __ b(noException, ne);
1952   // handle exception case
1953 #ifdef ASSERT
1954   // assert that exception_pc is zero in tls
1955   { Label L;
1956     __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1957     __ cbz(Rexception_pc, L);
1958     __ stop("exception pc should be null");
1959     __ bind(L);
1960   }
1961 #endif
1962   __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1963   __ verify_oop(Rexception_obj);
1964   {
1965     const Register Rzero = __ zero_register(Rtemp);
1966     __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1967   }
1968 
1969   __ bind(noException);
1970 
1971   // This frame is going away.  Fetch return value, so we can move it to
1972   // a new frame.
1973   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1974 #ifndef AARCH64
1975   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1976 #endif // !AARCH64
1977 #ifndef __SOFTFP__
1978   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1979 #endif
1980   // pop frame
1981   __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1982 
1983   // Set initial stack state before pushing interpreter frames
1984   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1985   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1986   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1987 
1988 #ifdef AARCH64
1989   // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
1990   // They are needed for correct stack walking during stack overflow handling.
1991   // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
1992   __ sub(Rtemp, Rtemp, 2*wordSize);
1993   __ add(SP, SP, Rtemp, ex_uxtx);
1994   __ raw_pop(FP, LR);
1995 
1996 #ifdef ASSERT
1997   { Label L;
1998     __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1999     __ cmp(FP, Rtemp);
2000     __ b(L, eq);
2001     __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2002     __ bind(L);
2003   }
2004   { Label L;
2005     __ ldr(Rtemp, Address(R2));
2006     __ cmp(LR, Rtemp);
2007     __ b(L, eq);
2008     __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2009     __ bind(L);
2010   }
2011 #endif // ASSERT
2012 
2013 #else
2014   __ add(SP, SP, Rtemp);
2015 #endif // AARCH64
2016 
2017 #ifdef ASSERT
2018   // Compilers generate code that bang the stack by as much as the
2019   // interpreter would need. So this stack banging should never
2020   // trigger a fault. Verify that it does not on non product builds.
2021   // See if it is enough stack to push deoptimized frames
2022   if (UseStackBanging) {
2023 #ifndef AARCH64
2024     // The compiled method that we are deoptimizing was popped from the stack.
2025     // If the stack bang results in a stack overflow, we don't return to the
2026     // method that is being deoptimized. The stack overflow exception is
2027     // propagated to the caller of the deoptimized method. Need to get the pc
2028     // from the caller in LR and restore FP.
2029     __ ldr(LR, Address(R2, 0));
2030     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2031 #endif // !AARCH64
2032     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2033     __ arm_stack_overflow_check(R8, Rtemp);
2034   }
2035 #endif
2036   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2037 
2038 #ifndef AARCH64
2039   // Pick up the initial fp we should save
2040   // XXX Note: was ldr(FP, Address(FP));
2041 
2042   // The compiler no longer uses FP as a frame pointer for the
2043   // compiled code. It can be used by the allocator in C2 or to
2044   // memorize the original SP for JSR292 call sites.
2045 
2046   // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
2047   // Deoptimization::fetch_unroll_info computes the right FP value and
2048   // stores it in Rublock.initial_info. This has been activated for ARM.
2049   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2050 #endif // !AARCH64
2051 
2052   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2053   __ mov(Rsender, SP);
2054 #ifdef AARCH64
2055   __ sub(SP, SP, Rtemp, ex_uxtx);
2056 #else
2057   __ sub(SP, SP, Rtemp);
2058 #endif // AARCH64
2059 
2060   // Push interpreter frames in a loop
2061   Label loop;
2062   __ bind(loop);
2063   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2064   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2065 
2066   __ raw_push(FP, LR);                                     // create new frame
2067   __ mov(FP, SP);
2068   __ sub(Rtemp, Rtemp, 2*wordSize);
2069 
2070 #ifdef AARCH64
2071   __ sub(SP, SP, Rtemp, ex_uxtx);
2072 #else
2073   __ sub(SP, SP, Rtemp);
2074 #endif // AARCH64
2075 
2076   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2077 #ifdef AARCH64
2078   __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2079 #else
2080   __ mov(LR, 0);
2081   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2082 #endif // AARCH64
2083 
2084   __ subs(R8, R8, 1);                               // decrement counter
2085   __ mov(Rsender, SP);
2086   __ b(loop, ne);
2087 
2088   // Re-push self-frame
2089   __ ldr(LR, Address(R2));
2090   __ raw_push(FP, LR);
2091   __ mov(FP, SP);
2092   __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
2093 
2094   // Restore frame locals after moving the frame
2095   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2096 #ifndef AARCH64
2097   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2098 #endif // !AARCH64
2099 
2100 #ifndef __SOFTFP__
2101   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2102 #endif // !__SOFTFP__
2103 
2104 #ifndef AARCH64
2105 #ifdef ASSERT
2106   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
2107   { Label L;
2108     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2109     __ cmp_32(Rkind, Rtemp);
2110     __ b(L, eq);
2111     __ stop("Rkind was overwritten");
2112     __ bind(L);
2113   }
2114 #endif
2115 #endif
2116 
2117   // Call unpack_frames with proper arguments
2118   __ mov(R0, Rthread);
2119   __ mov(R1, Rkind);
2120 
2121   pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2122   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2123   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2124   if (pc_offset == -1) {
2125     pc_offset = __ offset();
2126   }
2127   oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
2128   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2129 
2130   // Collect return values, pop self-frame and jump to interpreter
2131   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2132 #ifndef AARCH64
2133   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2134 #endif // !AARCH64
2135   // Interpreter floats controlled by __SOFTFP__, but compiler
2136   // float return value registers controlled by __ABI_HARD__
2137   // This matters for vfp-sflt builds.
2138 #ifndef __SOFTFP__
2139   // Interpreter hard float
2140 #ifdef __ABI_HARD__
2141   // Compiler float return value in FP registers
2142   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2143 #else
2144   // Compiler float return value in integer registers,
2145   // copy to D0 for interpreter (S0 <-- R0)
2146   __ fmdrr(D0_tos, R0, R1);
2147 #endif
2148 #endif // !__SOFTFP__
2149   __ mov(SP, FP);
2150 
2151 #ifdef AARCH64
2152   __ raw_pop(FP, LR);
2153   __ ret();
2154 #else
2155   __ pop(RegisterSet(FP) | RegisterSet(PC));
2156 #endif // AARCH64
2157 
2158   __ flush();
2159 
2160   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
2161                                            reexecute_offset, frame_size_in_words);
2162   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2163 }
2164 
2165 #ifdef COMPILER2
2166 
2167 //------------------------------generate_uncommon_trap_blob--------------------
2168 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2169 // instead.
2170 void SharedRuntime::generate_uncommon_trap_blob() {
2171   // allocate space for the code
2172   ResourceMark rm;
2173 
2174   // setup code generation tools
2175   int pad = VerifyThread ? 512 : 0;
2176 #ifdef _LP64
2177   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
2178 #else
2179   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
2180   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
2181   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
2182 #endif
2183   // bypassed when code generation useless
2184   MacroAssembler* masm               = new MacroAssembler(&buffer);
2185   const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
2186   const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
2187   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
2188 
2189   //
2190   // This is the entry point for all traps the compiler takes when it thinks
2191   // it cannot handle further execution of compilation code. The frame is
2192   // deoptimized in these cases and converted into interpreter frames for
2193   // execution
2194   // The steps taken by this frame are as follows:
2195   //   - push a fake "unpack_frame"
2196   //   - call the C routine Deoptimization::uncommon_trap (this function
2197   //     packs the current compiled frame into vframe arrays and returns
2198   //     information about the number and size of interpreter frames which
2199   //     are equivalent to the frame which is being deoptimized)
2200   //   - deallocate the "unpack_frame"
2201   //   - deallocate the deoptimization frame
2202   //   - in a loop using the information returned in the previous step
2203   //     push interpreter frames;
2204   //   - create a dummy "unpack_frame"
2205   //   - call the C routine: Deoptimization::unpack_frames (this function
2206   //     lays out values on the interpreter frame which was just created)
2207   //   - deallocate the dummy unpack_frame
2208   //   - return to the interpreter entry point
2209   //
2210   //  Refer to the following methods for more information:
2211   //   - Deoptimization::uncommon_trap
2212   //   - Deoptimization::unpack_frame
2213 
2214   // the unloaded class index is in R0 (first parameter to this blob)
2215 
2216   __ raw_push(FP, LR);
2217   __ set_last_Java_frame(SP, FP, false, Rtemp);
2218   __ mov(R2, Deoptimization::Unpack_uncommon_trap);
2219   __ mov(R1, R0);
2220   __ mov(R0, Rthread);
2221   __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
2222   __ mov(Rublock, R0);
2223   __ reset_last_Java_frame(Rtemp);
2224   __ raw_pop(FP, LR);
2225 
2226 #ifdef ASSERT
2227   { Label L;
2228     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2229     __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
2230     __ b(L, eq);
2231     __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
2232     __ bind(L);
2233   }
2234 #endif
2235 
2236 
2237   // Set initial stack state before pushing interpreter frames
2238   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2239   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2240   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2241 
2242 #ifdef AARCH64
2243   // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
2244   // They are needed for correct stack walking during stack overflow handling.
2245   // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
2246   __ sub(Rtemp, Rtemp, 2*wordSize);
2247   __ add(SP, SP, Rtemp, ex_uxtx);
2248   __ raw_pop(FP, LR);
2249 
2250 #ifdef ASSERT
2251   { Label L;
2252     __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2253     __ cmp(FP, Rtemp);
2254     __ b(L, eq);
2255     __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2256     __ bind(L);
2257   }
2258   { Label L;
2259     __ ldr(Rtemp, Address(R2));
2260     __ cmp(LR, Rtemp);
2261     __ b(L, eq);
2262     __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2263     __ bind(L);
2264   }
2265 #endif // ASSERT
2266 
2267 #else
2268   __ add(SP, SP, Rtemp);
2269 #endif //AARCH64
2270 
2271   // See if it is enough stack to push deoptimized frames
2272 #ifdef ASSERT
2273   // Compilers generate code that bang the stack by as much as the
2274   // interpreter would need. So this stack banging should never
2275   // trigger a fault. Verify that it does not on non product builds.
2276   if (UseStackBanging) {
2277 #ifndef AARCH64
2278     // The compiled method that we are deoptimizing was popped from the stack.
2279     // If the stack bang results in a stack overflow, we don't return to the
2280     // method that is being deoptimized. The stack overflow exception is
2281     // propagated to the caller of the deoptimized method. Need to get the pc
2282     // from the caller in LR and restore FP.
2283     __ ldr(LR, Address(R2, 0));
2284     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2285 #endif // !AARCH64
2286     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2287     __ arm_stack_overflow_check(R8, Rtemp);
2288   }
2289 #endif
2290   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2291   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2292   __ mov(Rsender, SP);
2293 #ifdef AARCH64
2294   __ sub(SP, SP, Rtemp, ex_uxtx);
2295 #else
2296   __ sub(SP, SP, Rtemp);
2297 #endif
2298 #ifndef AARCH64
2299   //  __ ldr(FP, Address(FP));
2300   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2301 #endif // AARCH64
2302 
2303   // Push interpreter frames in a loop
2304   Label loop;
2305   __ bind(loop);
2306   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2307   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2308 
2309   __ raw_push(FP, LR);                                     // create new frame
2310   __ mov(FP, SP);
2311   __ sub(Rtemp, Rtemp, 2*wordSize);
2312 
2313 #ifdef AARCH64
2314   __ sub(SP, SP, Rtemp, ex_uxtx);
2315 #else
2316   __ sub(SP, SP, Rtemp);
2317 #endif // AARCH64
2318 
2319   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2320 #ifdef AARCH64
2321   __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2322 #else
2323   __ mov(LR, 0);
2324   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2325 #endif // AARCH64
2326   __ subs(R8, R8, 1);                               // decrement counter
2327   __ mov(Rsender, SP);
2328   __ b(loop, ne);
2329 
2330   // Re-push self-frame
2331   __ ldr(LR, Address(R2));
2332   __ raw_push(FP, LR);
2333   __ mov(FP, SP);
2334 
2335   // Call unpack_frames with proper arguments
2336   __ mov(R0, Rthread);
2337   __ mov(R1, Deoptimization::Unpack_uncommon_trap);
2338   __ set_last_Java_frame(SP, FP, false, Rtemp);
2339   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2340   //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
2341   __ reset_last_Java_frame(Rtemp);
2342 
2343   __ mov(SP, FP);
2344 #ifdef AARCH64
2345   __ raw_pop(FP, LR);
2346   __ ret();
2347 #else
2348   __ pop(RegisterSet(FP) | RegisterSet(PC));
2349 #endif
2350 
2351   masm->flush();
2352   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
2353 }
2354 
2355 #endif // COMPILER2
2356 
2357 //------------------------------generate_handler_blob------
2358 //
2359 // Generate a special Compile2Runtime blob that saves all registers,
2360 // setup oopmap, and calls safepoint code to stop the compiled code for
2361 // a safepoint.
2362 //
2363 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2364   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2365 
2366   ResourceMark rm;
2367   CodeBuffer buffer("handler_blob", 256, 256);
2368   int frame_size_words;
2369   OopMapSet* oop_maps;
2370 
2371   bool cause_return = (poll_type == POLL_AT_RETURN);
2372 
2373   MacroAssembler* masm = new MacroAssembler(&buffer);
2374   address start = __ pc();
2375   oop_maps = new OopMapSet();
2376 
2377   if (!cause_return) {
2378 #ifdef AARCH64
2379     __ raw_push(LR, LR);
2380 #else
2381     __ sub(SP, SP, 4); // make room for LR which may still be live
2382                        // here if we are coming from a c2 method
2383 #endif // AARCH64
2384   }
2385 
2386   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
2387   if (!cause_return) {
2388     // update saved PC with correct value
2389     // need 2 steps because LR can be live in c2 method
2390     __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
2391     __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
2392   }
2393 
2394   __ mov(R0, Rthread);
2395   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
2396   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2397   __ call(call_ptr);
2398   if (pc_offset == -1) {
2399     pc_offset = __ offset();
2400   }
2401   oop_maps->add_gc_map(pc_offset, map);
2402   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2403 
2404   // Check for pending exception
2405   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
2406   __ cmp(Rtemp, 0);
2407 
2408 #ifdef AARCH64
2409   RegisterSaver::restore_live_registers(masm, cause_return);
2410   Register ret_addr = cause_return ? LR : Rtemp;
2411   if (!cause_return) {
2412     __ raw_pop(FP, ret_addr);
2413   }
2414 
2415   Label throw_exception;
2416   __ b(throw_exception, ne);
2417   __ br(ret_addr);
2418 
2419   __ bind(throw_exception);
2420   __ mov(Rexception_pc, ret_addr);
2421 #else // AARCH64
2422   if (!cause_return) {
2423     RegisterSaver::restore_live_registers(masm, false);
2424     __ pop(PC, eq);
2425     __ pop(Rexception_pc);
2426   } else {
2427     RegisterSaver::restore_live_registers(masm);
2428     __ bx(LR, eq);
2429     __ mov(Rexception_pc, LR);
2430   }
2431 #endif // AARCH64
2432 
2433   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2434 
2435   __ flush();
2436 
2437   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
2438 }
2439 
2440 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2441   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2442 
2443   ResourceMark rm;
2444   CodeBuffer buffer(name, 1000, 512);
2445   int frame_size_words;
2446   OopMapSet *oop_maps;
2447   int frame_complete;
2448 
2449   MacroAssembler* masm = new MacroAssembler(&buffer);
2450   Label pending_exception;
2451 
2452   int start = __ offset();
2453 
2454   oop_maps = new OopMapSet();
2455   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words);
2456 
2457   frame_complete = __ offset();
2458 
2459   __ mov(R0, Rthread);
2460 
2461   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2462   assert(start == 0, "warning: start differs from code_begin");
2463   __ call(destination);
2464   if (pc_offset == -1) {
2465     pc_offset = __ offset();
2466   }
2467   oop_maps->add_gc_map(pc_offset, map);
2468   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2469 
2470   __ ldr(R1, Address(Rthread, Thread::pending_exception_offset()));
2471   __ cbnz(R1, pending_exception);
2472 
2473   // Overwrite saved register values
2474 
2475   // Place metadata result of VM call into Rmethod
2476   __ get_vm_result_2(R1, Rtemp);
2477   __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize));
2478 
2479   // Place target address (VM call result) into Rtemp
2480   __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize));
2481 
2482   RegisterSaver::restore_live_registers(masm);
2483   __ jump(Rtemp);
2484 
2485   __ bind(pending_exception);
2486 
2487   RegisterSaver::restore_live_registers(masm);
2488   const Register Rzero = __ zero_register(Rtemp);
2489   __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset()));
2490   __ mov(Rexception_pc, LR);
2491   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2492 
2493   __ flush();
2494 
2495   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
2496 }