src/hotspot/cpu/arm/sharedRuntime_arm.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File open Sdiff src/hotspot/cpu/arm

src/hotspot/cpu/arm/sharedRuntime_arm.cpp

Print this page




  45 
  46 #define __ masm->
  47 
  48 class RegisterSaver {
  49 public:
  50 
  51   // Special registers:
  52   //              32-bit ARM     64-bit ARM
  53   //  Rthread:       R10            R28
  54   //  LR:            R14            R30
  55 
  56   // Rthread is callee saved in the C ABI and never changed by compiled code:
  57   // no need to save it.
  58 
  59   // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
  60   // The one at LR_offset is a return address that is needed by stack walking.
  61   // A c2 method uses LR as a standard register so it may be live when we
  62   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
  63   // in case it's live in the method we are coming from.
  64 
  65 #ifdef AARCH64
  66 
  67   //
  68   // On AArch64 registers save area has the following layout:
  69   //
  70   // |---------------------|
  71   // | return address (LR) |
  72   // | FP                  |
  73   // |---------------------|
  74   // | V31                 |
  75   // | ...                 |
  76   // | V0                  |
  77   // |---------------------|
  78   // | padding             |
  79   // | R30 (LR live value) |
  80   // |---------------------|
  81   // | R27                 |
  82   // | ...                 |
  83   // | R0                  |
  84   // |---------------------| <-- SP
  85   //
  86 
  87   enum RegisterLayout {
  88     number_of_saved_gprs = 28,
  89     number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
  90     words_per_fpr = ConcreteRegisterImpl::words_per_fpr,
  91 
  92     R0_offset  = 0,
  93     R30_offset = R0_offset + number_of_saved_gprs,
  94     D0_offset  = R30_offset + 2,
  95     FP_offset  = D0_offset + number_of_saved_fprs * words_per_fpr,
  96     LR_offset  = FP_offset + 1,
  97 
  98     reg_save_size = LR_offset + 1,
  99   };
 100 
 101   static const int Rmethod_offset;
 102   static const int Rtemp_offset;
 103 
 104 #else
 105 
 106   enum RegisterLayout {
 107     fpu_save_size = FloatRegisterImpl::number_of_registers,
 108 #ifndef __SOFTFP__
 109     D0_offset = 0,
 110 #endif
 111     R0_offset = fpu_save_size,
 112     R1_offset,
 113     R2_offset,
 114     R3_offset,
 115     R4_offset,
 116     R5_offset,
 117     R6_offset,
 118 #if (FP_REG_NUM != 7)
 119     // if not saved as FP
 120     R7_offset,
 121 #endif
 122     R8_offset,
 123     R9_offset,
 124 #if (FP_REG_NUM != 11)
 125     // if not saved as FP
 126     R11_offset,
 127 #endif
 128     R12_offset,
 129     R14_offset,
 130     FP_offset,
 131     LR_offset,
 132     reg_save_size,
 133 
 134     Rmethod_offset = R9_offset,
 135     Rtemp_offset = R12_offset,
 136   };
 137 
 138   // all regs but Rthread (R10), FP (R7 or R11), SP and PC
 139   // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
 140 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 141 
 142 #endif // AARCH64
 143 
 144   //  When LR may be live in the nmethod from which we are comming
 145   //  then lr_saved is true, the return address is saved before the
 146   //  call to save_live_register by the caller and LR contains the
 147   //  live value.
 148 
 149   static OopMap* save_live_registers(MacroAssembler* masm,
 150                                      int* total_frame_words,
 151                                      bool lr_saved = false);
 152   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
 153 
 154 };
 155 
 156 
 157 #ifdef AARCH64
 158 const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding();
 159 const int RegisterSaver::Rtemp_offset   = RegisterSaver::R0_offset + Rtemp->encoding();
 160 #endif // AARCH64
 161 
 162 
 163 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
 164                                            int* total_frame_words,
 165                                            bool lr_saved) {
 166   *total_frame_words = reg_save_size;
 167 
 168   OopMapSet *oop_maps = new OopMapSet();
 169   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 170 
 171 #ifdef AARCH64
 172   assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
 173 
 174   if (lr_saved) {
 175     // LR was stashed here, so that jump could use it as a scratch reg
 176     __ ldr(LR, Address(SP, 0));
 177     // There are two words on the stack top:
 178     //  [SP + 0]: placeholder for FP
 179     //  [SP + wordSize]: saved return address
 180     __ str(FP, Address(SP, 0));
 181   } else {
 182     __ raw_push(FP, LR);
 183   }
 184 
 185   __ sub(SP, SP, (reg_save_size - 2) * wordSize);
 186 
 187   for (int i = 0; i < number_of_saved_gprs; i += 2) {
 188     int offset = R0_offset + i;
 189     __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize));
 190     map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
 191     map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg());
 192   }
 193 
 194   __ str(R30, Address(SP, R30_offset * wordSize));
 195   map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg());
 196 
 197   for (int i = 0; i < number_of_saved_fprs; i += 2) {
 198     int offset1 = D0_offset + i * words_per_fpr;
 199     int offset2 = offset1 + words_per_fpr;
 200     Address base(SP, offset1 * wordSize);
 201     if (words_per_fpr == 2) {
 202       // pair of "wide" quad vector registers
 203       __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
 204     } else {
 205       // pair of double vector registers
 206       __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
 207     }
 208     map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
 209     map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg());
 210   }
 211 #else
 212   if (lr_saved) {
 213     __ push(RegisterSet(FP));
 214   } else {
 215     __ push(RegisterSet(FP) | RegisterSet(LR));
 216   }
 217   __ push(SAVED_BASE_REGS);
 218   if (HaveVFP) {
 219     if (VM_Version::has_vfp3_32()) {
 220       __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
 221     } else {
 222       if (FloatRegisterImpl::number_of_registers > 32) {
 223         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 224         __ sub(SP, SP, 32 * wordSize);
 225       }
 226     }
 227     __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
 228   } else {
 229     __ sub(SP, SP, fpu_save_size * wordSize);
 230   }
 231 


 235     if (j == FP_REG_NUM) {
 236       // skip the FP register, managed below.
 237       j++;
 238     }
 239     map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
 240     j++;
 241   }
 242   assert(j == R10->encoding(), "must be");
 243 #if (FP_REG_NUM != 11)
 244   // add R11, if not managed as FP
 245   map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
 246 #endif
 247   map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
 248   map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
 249   if (HaveVFP) {
 250     for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
 251       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
 252       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
 253     }
 254   }
 255 #endif // AARCH64
 256 
 257   return map;
 258 }
 259 
 260 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
 261 #ifdef AARCH64
 262   for (int i = 0; i < number_of_saved_gprs; i += 2) {
 263     __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
 264   }
 265 
 266   __ ldr(R30, Address(SP, R30_offset * wordSize));
 267 
 268   for (int i = 0; i < number_of_saved_fprs; i += 2) {
 269     Address base(SP, (D0_offset + i * words_per_fpr) * wordSize);
 270     if (words_per_fpr == 2) {
 271       // pair of "wide" quad vector registers
 272       __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
 273     } else {
 274       // pair of double vector registers
 275       __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
 276     }
 277   }
 278 
 279   __ add(SP, SP, (reg_save_size - 2) * wordSize);
 280 
 281   if (restore_lr) {
 282     __ raw_pop(FP, LR);
 283   } else {
 284     __ ldr(FP, Address(SP, 0));
 285   }
 286 #else
 287   if (HaveVFP) {
 288     __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
 289     if (VM_Version::has_vfp3_32()) {
 290       __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
 291     } else {
 292       if (FloatRegisterImpl::number_of_registers > 32) {
 293         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 294         __ add(SP, SP, 32 * wordSize);
 295       }
 296     }
 297   } else {
 298     __ add(SP, SP, fpu_save_size * wordSize);
 299   }
 300   __ pop(SAVED_BASE_REGS);
 301   if (restore_lr) {
 302     __ pop(RegisterSet(FP) | RegisterSet(LR));
 303   } else {
 304     __ pop(RegisterSet(FP));
 305   }
 306 #endif // AARCH64
 307 }
 308 
 309 #ifdef AARCH64
 310 
 311 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 312   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 313     __ str_d(D0, Address(SP, -2*wordSize, pre_indexed));
 314   } else {
 315     __ raw_push(R0, ZR);
 316   }
 317 }
 318 
 319 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 320   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 321     __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed));
 322   } else {
 323     __ raw_pop(R0, ZR);
 324   }
 325 }
 326 
 327 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 328   __ raw_push(R0, R1);
 329   __ raw_push(R2, R3);
 330   __ raw_push(R4, R5);
 331   __ raw_push(R6, R7);
 332 
 333   assert(FPR_PARAMS == 8, "adjust this code");
 334   assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
 335 
 336   if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed));
 337   if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed));
 338   if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed));
 339   if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed));
 340 }
 341 
 342 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 343   assert(FPR_PARAMS == 8, "adjust this code");
 344   assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
 345 
 346   if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed));
 347   if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed));
 348   if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed));
 349   if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed));
 350 
 351   __ raw_pop(R6, R7);
 352   __ raw_pop(R4, R5);
 353   __ raw_pop(R2, R3);
 354   __ raw_pop(R0, R1);
 355 }
 356 
 357 #else // AARCH64
 358 
 359 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 360 #ifdef __ABI_HARD__
 361   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 362     __ sub(SP, SP, 8);
 363     __ fstd(D0, Address(SP));
 364     return;
 365   }
 366 #endif // __ABI_HARD__
 367   __ raw_push(R0, R1);
 368 }
 369 
 370 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 371 #ifdef __ABI_HARD__
 372   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 373     __ fldd(D0, Address(SP));
 374     __ add(SP, SP, 8);
 375     return;
 376   }
 377 #endif // __ABI_HARD__


 388   // but there is no way to guarantee that
 389   if (fp_regs_in_arguments) {
 390     // convert fp_regs_in_arguments to a number of double registers
 391     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 392     __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 393   }
 394 #endif // __ ABI_HARD__
 395 }
 396 
 397 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 398 #ifdef __ABI_HARD__
 399   if (fp_regs_in_arguments) {
 400     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 401     __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 402   }
 403 #endif // __ABI_HARD__
 404 
 405   __ pop(RegisterSet(R0, R3));
 406 }
 407 
 408 #endif // AARCH64
 409 
 410 
 411 // Is vector's size (in bytes) bigger than a size saved by default?
 412 // All vector registers are saved by default on ARM.
 413 bool SharedRuntime::is_wide_vector(int size) {
 414   return false;
 415 }
 416 
 417 size_t SharedRuntime::trampoline_size() {
 418   return 16;
 419 }
 420 
 421 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 422   InlinedAddress dest(destination);
 423   __ indirect_jump(dest, Rtemp);
 424   __ bind_literal(dest);
 425 }
 426 
 427 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 428                                         VMRegPair *regs,
 429                                         VMRegPair *regs2,
 430                                         int total_args_passed) {
 431   assert(regs2 == NULL, "not needed on arm");
 432 #ifdef AARCH64
 433   int slot = 0; // counted in 32-bit VMReg slots
 434   int reg = 0;
 435   int fp_reg = 0;
 436   for (int i = 0; i < total_args_passed; i++) {
 437     switch (sig_bt[i]) {
 438     case T_SHORT:
 439     case T_CHAR:
 440     case T_BYTE:
 441     case T_BOOLEAN:
 442     case T_INT:
 443       if (reg < GPR_PARAMS) {
 444         Register r = as_Register(reg);
 445         regs[i].set1(r->as_VMReg());
 446         reg++;
 447       } else {
 448         regs[i].set1(VMRegImpl::stack2reg(slot));
 449         slot+=2;
 450       }
 451       break;
 452     case T_LONG:
 453       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 454       // fall through
 455     case T_ARRAY:
 456     case T_OBJECT:
 457     case T_ADDRESS:
 458       if (reg < GPR_PARAMS) {
 459         Register r = as_Register(reg);
 460         regs[i].set2(r->as_VMReg());
 461         reg++;
 462       } else {
 463         regs[i].set2(VMRegImpl::stack2reg(slot));
 464         slot+=2;
 465       }
 466       break;
 467     case T_FLOAT:
 468       if (fp_reg < FPR_PARAMS) {
 469         FloatRegister r = as_FloatRegister(fp_reg);
 470         regs[i].set1(r->as_VMReg());
 471         fp_reg++;
 472       } else {
 473         regs[i].set1(VMRegImpl::stack2reg(slot));
 474         slot+=2;
 475       }
 476       break;
 477     case T_DOUBLE:
 478       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 479       if (fp_reg < FPR_PARAMS) {
 480         FloatRegister r = as_FloatRegister(fp_reg);
 481         regs[i].set2(r->as_VMReg());
 482         fp_reg++;
 483       } else {
 484         regs[i].set2(VMRegImpl::stack2reg(slot));
 485         slot+=2;
 486       }
 487       break;
 488     case T_VOID:
 489       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 490       regs[i].set_bad();
 491       break;
 492     default:
 493       ShouldNotReachHere();
 494     }
 495   }
 496   return slot;
 497 
 498 #else // AARCH64
 499 
 500   int slot = 0;
 501   int ireg = 0;
 502 #ifdef __ABI_HARD__
 503   int fp_slot = 0;
 504   int single_fpr_slot = 0;
 505 #endif // __ABI_HARD__
 506   for (int i = 0; i < total_args_passed; i++) {
 507     switch (sig_bt[i]) {
 508     case T_SHORT:
 509     case T_CHAR:
 510     case T_BYTE:
 511     case T_BOOLEAN:
 512     case T_INT:
 513     case T_ARRAY:
 514     case T_OBJECT:
 515     case T_ADDRESS:
 516     case T_METADATA:
 517 #ifndef __ABI_HARD__
 518     case T_FLOAT:


 575     case T_DOUBLE:
 576       assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
 577       if (fp_slot <= 14) {
 578         FloatRegister r1 = as_FloatRegister(fp_slot);
 579         FloatRegister r2 = as_FloatRegister(fp_slot+1);
 580         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 581         fp_slot += 2;
 582       } else {
 583         if(slot & 1) slot++;
 584         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 585         slot += 2;
 586         single_fpr_slot = 16;
 587       }
 588       break;
 589 #endif // __ABI_HARD__
 590     default:
 591       ShouldNotReachHere();
 592     }
 593   }
 594   return slot;
 595 #endif // AARCH64
 596 }
 597 
 598 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 599                                            VMRegPair *regs,
 600                                            int total_args_passed,
 601                                            int is_outgoing) {
 602 #ifdef AARCH64
 603   // C calling convention on AArch64 is good enough.
 604   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 605 #else
 606 #ifdef __SOFTFP__
 607   // soft float is the same as the C calling convention.
 608   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 609 #endif // __SOFTFP__
 610   (void) is_outgoing;
 611   int slot = 0;
 612   int ireg = 0;
 613   int freg = 0;
 614   int single_fpr = 0;
 615 
 616   for (int i = 0; i < total_args_passed; i++) {
 617     switch (sig_bt[i]) {
 618     case T_SHORT:
 619     case T_CHAR:
 620     case T_BYTE:
 621     case T_BOOLEAN:
 622     case T_INT:
 623     case T_ARRAY:
 624     case T_OBJECT:
 625     case T_ADDRESS:


 668         Register r2 = as_Register(ireg + 1);
 669         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 670         ireg += 2;
 671       } else {
 672         if (slot & 1) slot++;
 673         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 674         slot += 2;
 675         ireg = 4;
 676       }
 677       break;
 678     case T_VOID:
 679       regs[i].set_bad();
 680       break;
 681     default:
 682       ShouldNotReachHere();
 683     }
 684   }
 685 
 686   if (slot & 1) slot++;
 687   return slot;
 688 #endif // AARCH64
 689 }
 690 
 691 static void patch_callers_callsite(MacroAssembler *masm) {
 692   Label skip;
 693 
 694   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 695   __ cbz(Rtemp, skip);
 696 
 697 #ifdef AARCH64
 698   push_param_registers(masm, FPR_PARAMS);
 699   __ raw_push(LR, ZR);
 700 #else
 701   // Pushing an even number of registers for stack alignment.
 702   // Selecting R9, which had to be saved anyway for some platforms.
 703   __ push(RegisterSet(R0, R3) | R9 | LR);
 704 #endif // AARCH64
 705 
 706   __ mov(R0, Rmethod);
 707   __ mov(R1, LR);
 708   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 709 
 710 #ifdef AARCH64
 711   __ raw_pop(LR, ZR);
 712   pop_param_registers(masm, FPR_PARAMS);
 713 #else
 714   __ pop(RegisterSet(R0, R3) | R9 | LR);
 715 #endif // AARCH64
 716 
 717   __ bind(skip);
 718 }
 719 
 720 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 721                                     int total_args_passed, int comp_args_on_stack,
 722                                     const BasicType *sig_bt, const VMRegPair *regs) {
 723   // TODO: ARM - May be can use ldm to load arguments
 724   const Register tmp = Rtemp; // avoid erasing R5_mh
 725 
 726   // Next assert may not be needed but safer. Extra analysis required
 727   // if this there is not enough free registers and we need to use R5 here.
 728   assert_different_registers(tmp, R5_mh);
 729 
 730   // 6243940 We might end up in handle_wrong_method if
 731   // the callee is deoptimized as we race thru here. If that
 732   // happens we don't want to take a safepoint because the
 733   // caller frame will look interpreted and arguments are now
 734   // "compiled" so it is much better to make this transition
 735   // invisible to the stack walking code. Unfortunately if
 736   // we try and find the callee by normal means a safepoint
 737   // is possible. So we stash the desired callee in the thread
 738   // and the vm will find there should this case occur.
 739   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
 740   __ str(Rmethod, callee_target_addr);
 741 
 742 #ifdef AARCH64
 743 
 744   assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod);
 745   assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams);
 746 
 747   if (comp_args_on_stack) {
 748     __ sub_slow(SP, SP, align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes));
 749   }
 750 
 751   for (int i = 0; i < total_args_passed; i++) {
 752     if (sig_bt[i] == T_VOID) {
 753       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 754       continue;
 755     }
 756     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 757 
 758     int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
 759     Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
 760 
 761     VMReg r = regs[i].first();
 762     bool full_word = regs[i].second()->is_valid();
 763 
 764     if (r->is_stack()) {
 765       if (full_word) {
 766         __ ldr(tmp, source_addr);
 767         __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 768       } else {
 769         __ ldr_w(tmp, source_addr);
 770         __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 771       }
 772     } else if (r->is_Register()) {
 773       if (full_word) {
 774         __ ldr(r->as_Register(), source_addr);
 775       } else {
 776         __ ldr_w(r->as_Register(), source_addr);
 777       }
 778     } else if (r->is_FloatRegister()) {
 779       if (sig_bt[i] == T_DOUBLE) {
 780         __ ldr_d(r->as_FloatRegister(), source_addr);
 781       } else {
 782         __ ldr_s(r->as_FloatRegister(), source_addr);
 783       }
 784     } else {
 785       assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
 786     }
 787   }
 788 
 789   __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
 790   __ br(tmp);
 791 
 792 #else
 793 
 794   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 795 
 796   const Register initial_sp = Rmethod; // temporarily scratched
 797 
 798   // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
 799   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
 800 
 801   __ mov(initial_sp, SP);
 802 
 803   if (comp_args_on_stack) {
 804     __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
 805   }
 806   __ bic(SP, SP, StackAlignmentInBytes - 1);
 807 
 808   for (int i = 0; i < total_args_passed; i++) {
 809     if (sig_bt[i] == T_VOID) {
 810       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 811       continue;
 812     }


 834         __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
 835       }
 836     } else if (r_1->is_FloatRegister()) {
 837 #ifdef __SOFTFP__
 838       ShouldNotReachHere();
 839 #endif // __SOFTFP__
 840       if (!r_2->is_valid()) {
 841         __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
 842       } else {
 843         __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 844       }
 845     } else {
 846       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 847     }
 848   }
 849 
 850   // restore Rmethod (scratched for initial_sp)
 851   __ ldr(Rmethod, callee_target_addr);
 852   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 853 
 854 #endif // AARCH64
 855 }
 856 
 857 static void gen_c2i_adapter(MacroAssembler *masm,
 858                             int total_args_passed,  int comp_args_on_stack,
 859                             const BasicType *sig_bt, const VMRegPair *regs,
 860                             Label& skip_fixup) {
 861   // TODO: ARM - May be can use stm to deoptimize arguments
 862   const Register tmp = Rtemp;
 863 
 864   patch_callers_callsite(masm);
 865   __ bind(skip_fixup);
 866 
 867   __ mov(Rsender_sp, SP); // not yet saved
 868 
 869 #ifdef AARCH64
 870 
 871   int extraspace = align_up(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes);
 872   if (extraspace) {
 873     __ sub(SP, SP, extraspace);
 874   }
 875 
 876   for (int i = 0; i < total_args_passed; i++) {
 877     if (sig_bt[i] == T_VOID) {
 878       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 879       continue;
 880     }
 881 
 882     int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
 883     Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
 884 
 885     VMReg r = regs[i].first();
 886     bool full_word = regs[i].second()->is_valid();
 887 
 888     if (r->is_stack()) {
 889       if (full_word) {
 890         __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
 891         __ str(tmp, dest_addr);
 892       } else {
 893         __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
 894         __ str_w(tmp, dest_addr);
 895       }
 896     } else if (r->is_Register()) {
 897       if (full_word) {
 898         __ str(r->as_Register(), dest_addr);
 899       } else {
 900         __ str_w(r->as_Register(), dest_addr);
 901       }
 902     } else if (r->is_FloatRegister()) {
 903       if (sig_bt[i] == T_DOUBLE) {
 904         __ str_d(r->as_FloatRegister(), dest_addr);
 905       } else {
 906         __ str_s(r->as_FloatRegister(), dest_addr);
 907       }
 908     } else {
 909       assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
 910     }
 911   }
 912 
 913   __ mov(Rparams, SP);
 914 
 915   __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset()));
 916   __ br(tmp);
 917 
 918 #else
 919 
 920   int extraspace = total_args_passed * Interpreter::stackElementSize;
 921   if (extraspace) {
 922     __ sub_slow(SP, SP, extraspace);
 923   }
 924 
 925   for (int i = 0; i < total_args_passed; i++) {
 926     if (sig_bt[i] == T_VOID) {
 927       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 928       continue;
 929     }
 930     int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
 931 
 932     VMReg r_1 = regs[i].first();
 933     VMReg r_2 = regs[i].second();
 934     if (r_1->is_stack()) {
 935       int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 936       if (!r_2->is_valid()) {
 937         __ ldr(tmp, Address(SP, arg_offset));
 938         __ str(tmp, Address(SP, stack_offset));


 948       } else {
 949         __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
 950         __ str(r_2->as_Register(), Address(SP, stack_offset));
 951       }
 952     } else if (r_1->is_FloatRegister()) {
 953 #ifdef __SOFTFP__
 954       ShouldNotReachHere();
 955 #endif // __SOFTFP__
 956       if (!r_2->is_valid()) {
 957         __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
 958       } else {
 959         __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
 960       }
 961     } else {
 962       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 963     }
 964   }
 965 
 966   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 967 
 968 #endif // AARCH64
 969 }
 970 
 971 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 972                                                             int total_args_passed,
 973                                                             int comp_args_on_stack,
 974                                                             const BasicType *sig_bt,
 975                                                             const VMRegPair *regs,
 976                                                             AdapterFingerPrint* fingerprint) {
 977   address i2c_entry = __ pc();
 978   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 979 
 980   address c2i_unverified_entry = __ pc();
 981   Label skip_fixup;
 982   const Register receiver       = R0;
 983   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
 984   const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4);
 985 
 986   __ load_klass(receiver_klass, receiver);
 987   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
 988   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
 989   __ cmp(receiver_klass, holder_klass);
 990 
 991 #ifdef AARCH64
 992   Label ic_miss;
 993   __ b(ic_miss, ne);
 994   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 995   __ cbz(Rtemp, skip_fixup);
 996   __ bind(ic_miss);
 997   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
 998 #else
 999   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
1000   __ cmp(Rtemp, 0, eq);
1001   __ b(skip_fixup, eq);
1002   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
1003 #endif // AARCH64
1004 
1005   address c2i_entry = __ pc();
1006   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1007 
1008   __ flush();
1009   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1010 }
1011 
1012 
1013 static int reg2offset_in(VMReg r) {
1014   // Account for saved FP and LR
1015   return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
1016 }
1017 
1018 static int reg2offset_out(VMReg r) {
1019   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1020 }
1021 
1022 
1023 static void verify_oop_args(MacroAssembler* masm,


1184   int lock_slot_fp_offset = stack_size - 2 * wordSize -
1185     lock_slot_offset * VMRegImpl::stack_slot_size;
1186 
1187   // Unverified entry point
1188   address start = __ pc();
1189 
1190   // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
1191   const Register receiver = R0; // see receiverOpr()
1192   __ load_klass(Rtemp, receiver);
1193   __ cmp(Rtemp, Ricklass);
1194   Label verified;
1195 
1196   __ b(verified, eq); // jump over alignment no-ops too
1197   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
1198   __ align(CodeEntryAlignment);
1199 
1200   // Verified entry point
1201   __ bind(verified);
1202   int vep_offset = __ pc() - start;
1203 
1204 #ifdef AARCH64
1205   // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
1206   __ nop();
1207 #endif // AARCH64
1208 
1209   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
1210     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
1211     // instead of doing a full VM transition once it's been computed.
1212     Label slow_case;
1213     const Register obj_reg = R0;
1214 
1215     // Unlike for Object.hashCode, System.identityHashCode is static method and
1216     // gets object as argument instead of the receiver.
1217     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
1218       assert(method->is_static(), "method should be static");
1219       // return 0 for null reference input, return val = R0 = obj_reg = 0
1220 #ifdef AARCH64
1221       Label Continue;
1222       __ cbnz(obj_reg, Continue);
1223       __ ret();
1224       __ bind(Continue);
1225 #else
1226       __ cmp(obj_reg, 0);
1227       __ bx(LR, eq);
1228 #endif
1229     }
1230 
1231     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1232 
1233     assert(markOopDesc::unlocked_value == 1, "adjust this code");
1234     __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
1235 
1236     if (UseBiasedLocking) {
1237       assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
1238       __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
1239     }
1240 
1241 #ifdef AARCH64
1242     __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place);
1243     __ b(slow_case, eq);
1244     __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift);
1245     __ ret();
1246 #else
1247     __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
1248     __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
1249     __ bx(LR, ne);
1250 #endif // AARCH64
1251 
1252     __ bind(slow_case);
1253   }
1254 
1255   // Bang stack pages
1256   __ arm_stack_overflow_check(stack_size, Rtemp);
1257 
1258   // Setup frame linkage
1259   __ raw_push(FP, LR);
1260   __ mov(FP, SP);
1261   __ sub_slow(SP, SP, stack_size - 2*wordSize);
1262 
1263   int frame_complete = __ pc() - start;
1264 
1265   OopMapSet* oop_maps = new OopMapSet();
1266   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1267   const int extra_args = is_static ? 2 : 1;
1268   int receiver_offset = -1;
1269   int fp_regs_in_arguments = 0;
1270 
1271   for (i = total_in_args; --i >= 0; ) {
1272     switch (in_sig_bt[i]) {
1273     case T_ARRAY:
1274     case T_OBJECT: {
1275       VMReg src = in_regs[i].first();
1276       VMReg dst = out_regs[i + extra_args].first();
1277       if (src->is_stack()) {
1278         assert(dst->is_stack(), "must be");
1279         assert(i != 0, "Incoming receiver is always in a register");
1280         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1281         __ cmp(Rtemp, 0);
1282 #ifdef AARCH64
1283         __ add(Rtemp, FP, reg2offset_in(src));
1284         __ csel(Rtemp, ZR, Rtemp, eq);
1285 #else
1286         __ add(Rtemp, FP, reg2offset_in(src), ne);
1287 #endif // AARCH64
1288         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1289         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1290         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1291       } else {
1292         int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1293         __ str(src->as_Register(), Address(SP, offset));
1294         map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1295         if ((i == 0) && (!is_static)) {
1296           receiver_offset = offset;
1297         }
1298         oop_handle_offset += VMRegImpl::slots_per_word;
1299 
1300 #ifdef AARCH64
1301         __ cmp(src->as_Register(), 0);
1302         __ add(Rtemp, SP, offset);
1303         __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq);
1304         if (dst->is_stack()) {
1305           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1306         }
1307 #else
1308         if (dst->is_stack()) {
1309           __ movs(Rtemp, src->as_Register());
1310           __ add(Rtemp, SP, offset, ne);
1311           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1312         } else {
1313           __ movs(dst->as_Register(), src->as_Register());
1314           __ add(dst->as_Register(), SP, offset, ne);
1315         }
1316 #endif // AARCH64
1317       }
1318     }
1319 
1320     case T_VOID:
1321       break;
1322 
1323 #ifdef AARCH64
1324     case T_FLOAT:
1325     case T_DOUBLE: {
1326       VMReg src = in_regs[i].first();
1327       VMReg dst = out_regs[i + extra_args].first();
1328       if (src->is_stack()) {
1329         assert(dst->is_stack(), "must be");
1330         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1331         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1332       } else {
1333         assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be");
1334         assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be");
1335         fp_regs_in_arguments++;
1336       }
1337       break;
1338     }
1339 #else // AARCH64
1340 
1341 #ifdef __SOFTFP__
1342     case T_DOUBLE:
1343 #endif
1344     case T_LONG: {
1345       VMReg src_1 = in_regs[i].first();
1346       VMReg src_2 = in_regs[i].second();
1347       VMReg dst_1 = out_regs[i + extra_args].first();
1348       VMReg dst_2 = out_regs[i + extra_args].second();
1349 #if (ALIGN_WIDE_ARGUMENTS == 0)
1350       // C convention can mix a register and a stack slot for a
1351       // 64-bits native argument.
1352 
1353       // Note: following code should work independently of whether
1354       // the Java calling convention follows C convention or whether
1355       // it aligns 64-bit values.
1356       if (dst_2->is_Register()) {
1357         if (src_1->as_Register() != dst_1->as_Register()) {
1358           assert(src_1->as_Register() != dst_2->as_Register() &&
1359                  src_2->as_Register() != dst_2->as_Register(), "must be");


1492       if (src_1->is_stack()) {
1493         if (dst_1->is_stack()) {
1494           assert(dst_2->is_stack(), "must be");
1495           __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1496           __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1497           __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1498           __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1499         } else {
1500           // C2 Java calling convention does not populate S14 and S15, therefore
1501           // those need to be loaded from stack here
1502           __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1503           fp_regs_in_arguments += 2;
1504         }
1505       } else {
1506         assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1507         fp_regs_in_arguments += 2;
1508       }
1509       break;
1510     }
1511 #endif // __ABI_HARD__
1512 #endif // AARCH64
1513 
1514     default: {
1515       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1516       VMReg src = in_regs[i].first();
1517       VMReg dst = out_regs[i + extra_args].first();
1518       if (src->is_stack()) {
1519         assert(dst->is_stack(), "must be");
1520         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1521         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1522       } else if (dst->is_stack()) {
1523         __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1524       } else {
1525         assert(src->is_Register() && dst->is_Register(), "must be");
1526         __ mov(dst->as_Register(), src->as_Register());
1527       }
1528     }
1529     }
1530   }
1531 
1532   // Get Klass mirror
1533   int klass_offset = -1;
1534   if (is_static) {
1535     klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1536     __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1537     __ add(c_rarg1, SP, klass_offset);
1538     __ str(Rtemp, Address(SP, klass_offset));
1539     map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1540   }
1541 
1542   // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1543   int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1544   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1545   oop_maps->add_gc_map(pc_offset, map);
1546 
1547 #ifndef AARCH64
1548   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1549   __ membar(MacroAssembler::StoreStore, Rtemp);
1550 #endif // !AARCH64
1551 
1552   // RedefineClasses() tracing support for obsolete method entry
1553   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1554 #ifdef AARCH64
1555     __ NOT_TESTED();
1556 #endif
1557     __ save_caller_save_registers();
1558     __ mov(R0, Rthread);
1559     __ mov_metadata(R1, method());
1560     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1561     __ restore_caller_save_registers();
1562   }
1563 
1564   const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5);
1565   const Register sync_obj    = AARCH64_ONLY(R21) NOT_AARCH64(R6);
1566   const Register disp_hdr    = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11);
1567   const Register tmp         = AARCH64_ONLY(R23) NOT_AARCH64(R8);
1568 
1569   Label slow_lock, slow_lock_biased, lock_done, fast_lock;
1570   if (method->is_synchronized()) {
1571     // The first argument is a handle to sync object (a class or an instance)
1572     __ ldr(sync_obj, Address(R1));
1573     // Remember the handle for the unlocking code
1574     __ mov(sync_handle, R1);
1575 
1576     if(UseBiasedLocking) {
1577       __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1578     }
1579 
1580     const Register mark = tmp;
1581 #ifdef AARCH64
1582     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1583     assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
1584 
1585     __ ldr(mark, sync_obj);
1586 
1587     // Test if object is already locked
1588     assert(markOopDesc::unlocked_value == 1, "adjust this code");
1589     __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock);
1590 
1591     // Check for recursive lock
1592     // See comments in InterpreterMacroAssembler::lock_object for
1593     // explanations on the fast recursive locking check.
1594     __ mov(Rtemp, SP);
1595     __ sub(Rtemp, mark, Rtemp);
1596     intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
1597     Assembler::LogicalImmediate imm(mask, false);
1598     __ ands(Rtemp, Rtemp, imm);
1599     __ b(slow_lock, ne);
1600 
1601     // Recursive locking: store 0 into a lock record
1602     __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1603     __ b(lock_done);
1604 
1605     __ bind(fast_lock);
1606     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1607 
1608     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1609 #else
1610     // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1611     // That would be acceptable as either CAS or slow case path is taken in that case
1612 
1613     __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1614     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1615     __ tst(mark, markOopDesc::unlocked_value);
1616     __ b(fast_lock, ne);
1617 
1618     // Check for recursive lock
1619     // See comments in InterpreterMacroAssembler::lock_object for
1620     // explanations on the fast recursive locking check.
1621     // Check independently the low bits and the distance to SP
1622     // -1- test low 2 bits
1623     __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1624     // -2- test (hdr - SP) if the low two bits are 0
1625     __ sub(Rtemp, mark, SP, eq);
1626     __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1627     // If still 'eq' then recursive locking OK: set displaced header to 0
1628     __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1629     __ b(lock_done, eq);
1630     __ b(slow_lock);
1631 
1632     __ bind(fast_lock);
1633     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1634 
1635     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1636 #endif // AARCH64
1637 
1638     __ bind(lock_done);
1639   }
1640 
1641   // Get JNIEnv*
1642   __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1643 
1644   // Perform thread state transition
1645   __ mov(Rtemp, _thread_in_native);
1646 #ifdef AARCH64
1647   // stlr instruction is used to force all preceding writes to be observed prior to thread state change
1648   __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
1649   __ stlr_w(Rtemp, Rtemp2);
1650 #else
1651   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1652 #endif // AARCH64
1653 
1654   // Finally, call the native method
1655   __ call(method->native_function());
1656 
1657   // Set FPSCR/FPCR to a known state
1658   if (AlwaysRestoreFPU) {
1659     __ restore_default_fp_mode();
1660   }
1661 
1662   // Do a safepoint check while thread is in transition state
1663   InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1664   Label call_safepoint_runtime, return_to_java;
1665   __ mov(Rtemp, _thread_in_native_trans);
1666   __ ldr_literal(R2, safepoint_state);
1667   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1668 
1669   // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1670   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1671 
1672   __ ldr_s32(R2, Address(R2));


1692 
1693     if(UseBiasedLocking) {
1694       __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1695       // disp_hdr may not have been saved on entry with biased locking
1696       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1697     }
1698 
1699     // See C1_MacroAssembler::unlock_object() for more comments
1700     __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1701     __ cbz(R2, unlock_done);
1702 
1703     __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1704 
1705     __ bind(unlock_done);
1706   }
1707 
1708   // Set last java frame and handle block to zero
1709   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1710   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1711 
1712 #ifdef AARCH64
1713   __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1714   if (CheckJNICalls) {
1715     __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1716   }
1717 
1718 
1719   switch (ret_type) {
1720   case T_BOOLEAN:
1721     __ tst(R0, 0xff);
1722     __ cset(R0, ne);
1723     break;
1724   case T_CHAR   : __ zero_extend(R0, R0, 16);  break;
1725   case T_BYTE   : __ sign_extend(R0, R0,  8);  break;
1726   case T_SHORT  : __ sign_extend(R0, R0, 16);  break;
1727   case T_INT    : // fall through
1728   case T_LONG   : // fall through
1729   case T_VOID   : // fall through
1730   case T_FLOAT  : // fall through
1731   case T_DOUBLE : /* nothing to do */          break;
1732   case T_OBJECT : // fall through
1733   case T_ARRAY  : break; // See JNIHandles::resolve below
1734   default:
1735     ShouldNotReachHere();
1736   }
1737 #else
1738   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1739   if (CheckJNICalls) {
1740     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1741   }
1742 #endif // AARCH64
1743 
1744   // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1745   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1746     __ resolve_jobject(R0,      // value
1747                        Rtemp,   // tmp1
1748                        R1_tmp); // tmp2
1749   }
1750 
1751   // Any exception pending?
1752   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1753   __ mov(SP, FP);
1754 
1755 #ifdef AARCH64
1756   Label except;
1757   __ cbnz(Rtemp, except);
1758   __ raw_pop(FP, LR);
1759   __ ret();
1760 
1761   __ bind(except);
1762   // Pop the frame and forward the exception. Rexception_pc contains return address.
1763   __ raw_pop(FP, Rexception_pc);
1764 #else
1765   __ cmp(Rtemp, 0);
1766   // Pop the frame and return if no exception pending
1767   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1768   // Pop the frame and forward the exception. Rexception_pc contains return address.
1769   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1770   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1771 #endif // AARCH64
1772   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1773 
1774   // Safepoint operation and/or pending suspend request is in progress.
1775   // Save the return values and call the runtime function by hand.
1776   __ bind(call_safepoint_runtime);
1777   push_result_registers(masm, ret_type);
1778   __ mov(R0, Rthread);
1779   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1780   pop_result_registers(masm, ret_type);
1781   __ b(return_to_java);
1782 
1783   __ bind_literal(safepoint_state);
1784 
1785   // Reguard stack pages. Save native results around a call to C runtime.
1786   __ bind(reguard);
1787   push_result_registers(masm, ret_type);
1788   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1789   pop_result_registers(masm, ret_type);
1790   __ b(reguard_done);
1791 


1831 
1832     __ b(unlock_done);
1833   }
1834 
1835   __ flush();
1836   return nmethod::new_native_nmethod(method,
1837                                      compile_id,
1838                                      masm->code(),
1839                                      vep_offset,
1840                                      frame_complete,
1841                                      stack_slots / VMRegImpl::slots_per_word,
1842                                      in_ByteSize(is_static ? klass_offset : receiver_offset),
1843                                      in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1844                                      oop_maps);
1845 }
1846 
1847 // this function returns the adjust size (in number of words) to a c2i adapter
1848 // activation for use during deoptimization
1849 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1850   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1851 #ifdef AARCH64
1852   extra_locals_size = align_up(extra_locals_size, StackAlignmentInBytes/BytesPerWord);
1853 #endif // AARCH64
1854   return extra_locals_size;
1855 }
1856 
1857 
1858 uint SharedRuntime::out_preserve_stack_slots() {
1859   return 0;
1860 }
1861 
1862 
1863 //------------------------------generate_deopt_blob----------------------------
1864 void SharedRuntime::generate_deopt_blob() {
1865   ResourceMark rm;
1866 #ifdef AARCH64
1867   CodeBuffer buffer("deopt_blob", 1024+256, 1);
1868 #else
1869   CodeBuffer buffer("deopt_blob", 1024, 1024);
1870 #endif
1871   int frame_size_in_words;
1872   OopMapSet* oop_maps;
1873   int reexecute_offset;
1874   int exception_in_tls_offset;
1875   int exception_offset;
1876 
1877   MacroAssembler* masm = new MacroAssembler(&buffer);
1878   Label cont;
1879   const Register Rkind   = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit
1880   const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
1881   const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
1882   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1883 
1884   address start = __ pc();
1885 
1886   oop_maps = new OopMapSet();
1887   // LR saved by caller (can be live in c2 method)
1888 
1889   // A deopt is a case where LR may be live in the c2 nmethod. So it's
1890   // not possible to call the deopt blob from the nmethod and pass the
1891   // address of the deopt handler of the nmethod in LR. What happens
1892   // now is that the caller of the deopt blob pushes the current
1893   // address so the deopt blob doesn't have to do it. This way LR can
1894   // be preserved, contains the live value from the nmethod and is
1895   // saved at R14/R30_offset here.
1896   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1897   __ mov(Rkind, Deoptimization::Unpack_deopt);
1898   __ b(cont);
1899 
1900   exception_offset = __ pc() - start;
1901 


1951   // assert that exception_pc is zero in tls
1952   { Label L;
1953     __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1954     __ cbz(Rexception_pc, L);
1955     __ stop("exception pc should be null");
1956     __ bind(L);
1957   }
1958 #endif
1959   __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1960   __ verify_oop(Rexception_obj);
1961   {
1962     const Register Rzero = __ zero_register(Rtemp);
1963     __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1964   }
1965 
1966   __ bind(noException);
1967 
1968   // This frame is going away.  Fetch return value, so we can move it to
1969   // a new frame.
1970   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1971 #ifndef AARCH64
1972   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1973 #endif // !AARCH64
1974 #ifndef __SOFTFP__
1975   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1976 #endif
1977   // pop frame
1978   __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1979 
1980   // Set initial stack state before pushing interpreter frames
1981   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1982   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1983   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1984 
1985 #ifdef AARCH64
1986   // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
1987   // They are needed for correct stack walking during stack overflow handling.
1988   // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
1989   __ sub(Rtemp, Rtemp, 2*wordSize);
1990   __ add(SP, SP, Rtemp, ex_uxtx);
1991   __ raw_pop(FP, LR);
1992 
1993 #ifdef ASSERT
1994   { Label L;
1995     __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1996     __ cmp(FP, Rtemp);
1997     __ b(L, eq);
1998     __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
1999     __ bind(L);
2000   }
2001   { Label L;
2002     __ ldr(Rtemp, Address(R2));
2003     __ cmp(LR, Rtemp);
2004     __ b(L, eq);
2005     __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2006     __ bind(L);
2007   }
2008 #endif // ASSERT
2009 
2010 #else
2011   __ add(SP, SP, Rtemp);
2012 #endif // AARCH64
2013 
2014 #ifdef ASSERT
2015   // Compilers generate code that bang the stack by as much as the
2016   // interpreter would need. So this stack banging should never
2017   // trigger a fault. Verify that it does not on non product builds.
2018   // See if it is enough stack to push deoptimized frames
2019   if (UseStackBanging) {
2020 #ifndef AARCH64
2021     // The compiled method that we are deoptimizing was popped from the stack.
2022     // If the stack bang results in a stack overflow, we don't return to the
2023     // method that is being deoptimized. The stack overflow exception is
2024     // propagated to the caller of the deoptimized method. Need to get the pc
2025     // from the caller in LR and restore FP.
2026     __ ldr(LR, Address(R2, 0));
2027     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2028 #endif // !AARCH64
2029     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2030     __ arm_stack_overflow_check(R8, Rtemp);
2031   }
2032 #endif
2033   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2034 
2035 #ifndef AARCH64
2036   // Pick up the initial fp we should save
2037   // XXX Note: was ldr(FP, Address(FP));
2038 
2039   // The compiler no longer uses FP as a frame pointer for the
2040   // compiled code. It can be used by the allocator in C2 or to
2041   // memorize the original SP for JSR292 call sites.
2042 
2043   // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
2044   // Deoptimization::fetch_unroll_info computes the right FP value and
2045   // stores it in Rublock.initial_info. This has been activated for ARM.
2046   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2047 #endif // !AARCH64
2048 
2049   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2050   __ mov(Rsender, SP);
2051 #ifdef AARCH64
2052   __ sub(SP, SP, Rtemp, ex_uxtx);
2053 #else
2054   __ sub(SP, SP, Rtemp);
2055 #endif // AARCH64
2056 
2057   // Push interpreter frames in a loop
2058   Label loop;
2059   __ bind(loop);
2060   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2061   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2062 
2063   __ raw_push(FP, LR);                                     // create new frame
2064   __ mov(FP, SP);
2065   __ sub(Rtemp, Rtemp, 2*wordSize);
2066 
2067 #ifdef AARCH64
2068   __ sub(SP, SP, Rtemp, ex_uxtx);
2069 #else
2070   __ sub(SP, SP, Rtemp);
2071 #endif // AARCH64
2072 
2073   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2074 #ifdef AARCH64
2075   __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2076 #else
2077   __ mov(LR, 0);
2078   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2079 #endif // AARCH64
2080 
2081   __ subs(R8, R8, 1);                               // decrement counter
2082   __ mov(Rsender, SP);
2083   __ b(loop, ne);
2084 
2085   // Re-push self-frame
2086   __ ldr(LR, Address(R2));
2087   __ raw_push(FP, LR);
2088   __ mov(FP, SP);
2089   __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
2090 
2091   // Restore frame locals after moving the frame
2092   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2093 #ifndef AARCH64
2094   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2095 #endif // !AARCH64
2096 
2097 #ifndef __SOFTFP__
2098   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2099 #endif // !__SOFTFP__
2100 
2101 #ifndef AARCH64
2102 #ifdef ASSERT
2103   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
2104   { Label L;
2105     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2106     __ cmp_32(Rkind, Rtemp);
2107     __ b(L, eq);
2108     __ stop("Rkind was overwritten");
2109     __ bind(L);
2110   }
2111 #endif
2112 #endif
2113 
2114   // Call unpack_frames with proper arguments
2115   __ mov(R0, Rthread);
2116   __ mov(R1, Rkind);
2117 
2118   pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2119   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2120   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2121   if (pc_offset == -1) {
2122     pc_offset = __ offset();
2123   }
2124   oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
2125   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2126 
2127   // Collect return values, pop self-frame and jump to interpreter
2128   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2129 #ifndef AARCH64
2130   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2131 #endif // !AARCH64
2132   // Interpreter floats controlled by __SOFTFP__, but compiler
2133   // float return value registers controlled by __ABI_HARD__
2134   // This matters for vfp-sflt builds.
2135 #ifndef __SOFTFP__
2136   // Interpreter hard float
2137 #ifdef __ABI_HARD__
2138   // Compiler float return value in FP registers
2139   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2140 #else
2141   // Compiler float return value in integer registers,
2142   // copy to D0 for interpreter (S0 <-- R0)
2143   __ fmdrr(D0_tos, R0, R1);
2144 #endif
2145 #endif // !__SOFTFP__
2146   __ mov(SP, FP);
2147 
2148 #ifdef AARCH64
2149   __ raw_pop(FP, LR);
2150   __ ret();
2151 #else
2152   __ pop(RegisterSet(FP) | RegisterSet(PC));
2153 #endif // AARCH64
2154 
2155   __ flush();
2156 
2157   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
2158                                            reexecute_offset, frame_size_in_words);
2159   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2160 }
2161 
2162 #ifdef COMPILER2
2163 
2164 //------------------------------generate_uncommon_trap_blob--------------------
2165 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
2166 // instead.
2167 void SharedRuntime::generate_uncommon_trap_blob() {
2168   // allocate space for the code
2169   ResourceMark rm;
2170 
2171   // setup code generation tools
2172   int pad = VerifyThread ? 512 : 0;
2173 #ifdef _LP64
2174   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
2175 #else
2176   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
2177   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
2178   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
2179 #endif
2180   // bypassed when code generation useless
2181   MacroAssembler* masm               = new MacroAssembler(&buffer);
2182   const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
2183   const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
2184   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
2185 
2186   //
2187   // This is the entry point for all traps the compiler takes when it thinks
2188   // it cannot handle further execution of compilation code. The frame is
2189   // deoptimized in these cases and converted into interpreter frames for
2190   // execution
2191   // The steps taken by this frame are as follows:
2192   //   - push a fake "unpack_frame"
2193   //   - call the C routine Deoptimization::uncommon_trap (this function
2194   //     packs the current compiled frame into vframe arrays and returns
2195   //     information about the number and size of interpreter frames which
2196   //     are equivalent to the frame which is being deoptimized)
2197   //   - deallocate the "unpack_frame"
2198   //   - deallocate the deoptimization frame
2199   //   - in a loop using the information returned in the previous step
2200   //     push interpreter frames;
2201   //   - create a dummy "unpack_frame"
2202   //   - call the C routine: Deoptimization::unpack_frames (this function
2203   //     lays out values on the interpreter frame which was just created)


2219   __ mov(Rublock, R0);
2220   __ reset_last_Java_frame(Rtemp);
2221   __ raw_pop(FP, LR);
2222 
2223 #ifdef ASSERT
2224   { Label L;
2225     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2226     __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
2227     __ b(L, eq);
2228     __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
2229     __ bind(L);
2230   }
2231 #endif
2232 
2233 
2234   // Set initial stack state before pushing interpreter frames
2235   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2236   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2237   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2238 
2239 #ifdef AARCH64
2240   // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
2241   // They are needed for correct stack walking during stack overflow handling.
2242   // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
2243   __ sub(Rtemp, Rtemp, 2*wordSize);
2244   __ add(SP, SP, Rtemp, ex_uxtx);
2245   __ raw_pop(FP, LR);
2246 
2247 #ifdef ASSERT
2248   { Label L;
2249     __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2250     __ cmp(FP, Rtemp);
2251     __ b(L, eq);
2252     __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2253     __ bind(L);
2254   }
2255   { Label L;
2256     __ ldr(Rtemp, Address(R2));
2257     __ cmp(LR, Rtemp);
2258     __ b(L, eq);
2259     __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2260     __ bind(L);
2261   }
2262 #endif // ASSERT
2263 
2264 #else
2265   __ add(SP, SP, Rtemp);
2266 #endif //AARCH64
2267 
2268   // See if it is enough stack to push deoptimized frames
2269 #ifdef ASSERT
2270   // Compilers generate code that bang the stack by as much as the
2271   // interpreter would need. So this stack banging should never
2272   // trigger a fault. Verify that it does not on non product builds.
2273   if (UseStackBanging) {
2274 #ifndef AARCH64
2275     // The compiled method that we are deoptimizing was popped from the stack.
2276     // If the stack bang results in a stack overflow, we don't return to the
2277     // method that is being deoptimized. The stack overflow exception is
2278     // propagated to the caller of the deoptimized method. Need to get the pc
2279     // from the caller in LR and restore FP.
2280     __ ldr(LR, Address(R2, 0));
2281     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2282 #endif // !AARCH64
2283     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2284     __ arm_stack_overflow_check(R8, Rtemp);
2285   }
2286 #endif
2287   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2288   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2289   __ mov(Rsender, SP);
2290 #ifdef AARCH64
2291   __ sub(SP, SP, Rtemp, ex_uxtx);
2292 #else
2293   __ sub(SP, SP, Rtemp);
2294 #endif
2295 #ifndef AARCH64
2296   //  __ ldr(FP, Address(FP));
2297   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2298 #endif // AARCH64
2299 
2300   // Push interpreter frames in a loop
2301   Label loop;
2302   __ bind(loop);
2303   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2304   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2305 
2306   __ raw_push(FP, LR);                                     // create new frame
2307   __ mov(FP, SP);
2308   __ sub(Rtemp, Rtemp, 2*wordSize);
2309 
2310 #ifdef AARCH64
2311   __ sub(SP, SP, Rtemp, ex_uxtx);
2312 #else
2313   __ sub(SP, SP, Rtemp);
2314 #endif // AARCH64
2315 
2316   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2317 #ifdef AARCH64
2318   __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2319 #else
2320   __ mov(LR, 0);
2321   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2322 #endif // AARCH64
2323   __ subs(R8, R8, 1);                               // decrement counter
2324   __ mov(Rsender, SP);
2325   __ b(loop, ne);
2326 
2327   // Re-push self-frame
2328   __ ldr(LR, Address(R2));
2329   __ raw_push(FP, LR);
2330   __ mov(FP, SP);
2331 
2332   // Call unpack_frames with proper arguments
2333   __ mov(R0, Rthread);
2334   __ mov(R1, Deoptimization::Unpack_uncommon_trap);
2335   __ set_last_Java_frame(SP, FP, false, Rtemp);
2336   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2337   //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
2338   __ reset_last_Java_frame(Rtemp);
2339 
2340   __ mov(SP, FP);
2341 #ifdef AARCH64
2342   __ raw_pop(FP, LR);
2343   __ ret();
2344 #else
2345   __ pop(RegisterSet(FP) | RegisterSet(PC));
2346 #endif
2347 
2348   masm->flush();
2349   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
2350 }
2351 
2352 #endif // COMPILER2
2353 
2354 //------------------------------generate_handler_blob------
2355 //
2356 // Generate a special Compile2Runtime blob that saves all registers,
2357 // setup oopmap, and calls safepoint code to stop the compiled code for
2358 // a safepoint.
2359 //
2360 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2361   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2362 
2363   ResourceMark rm;
2364   CodeBuffer buffer("handler_blob", 256, 256);
2365   int frame_size_words;
2366   OopMapSet* oop_maps;
2367 
2368   bool cause_return = (poll_type == POLL_AT_RETURN);
2369 
2370   MacroAssembler* masm = new MacroAssembler(&buffer);
2371   address start = __ pc();
2372   oop_maps = new OopMapSet();
2373 
2374   if (!cause_return) {
2375 #ifdef AARCH64
2376     __ raw_push(LR, LR);
2377 #else
2378     __ sub(SP, SP, 4); // make room for LR which may still be live
2379                        // here if we are coming from a c2 method
2380 #endif // AARCH64
2381   }
2382 
2383   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
2384   if (!cause_return) {
2385     // update saved PC with correct value
2386     // need 2 steps because LR can be live in c2 method
2387     __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
2388     __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
2389   }
2390 
2391   __ mov(R0, Rthread);
2392   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
2393   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2394   __ call(call_ptr);
2395   if (pc_offset == -1) {
2396     pc_offset = __ offset();
2397   }
2398   oop_maps->add_gc_map(pc_offset, map);
2399   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2400 
2401   // Check for pending exception
2402   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
2403   __ cmp(Rtemp, 0);
2404 
2405 #ifdef AARCH64
2406   RegisterSaver::restore_live_registers(masm, cause_return);
2407   Register ret_addr = cause_return ? LR : Rtemp;
2408   if (!cause_return) {
2409     __ raw_pop(FP, ret_addr);
2410   }
2411 
2412   Label throw_exception;
2413   __ b(throw_exception, ne);
2414   __ br(ret_addr);
2415 
2416   __ bind(throw_exception);
2417   __ mov(Rexception_pc, ret_addr);
2418 #else // AARCH64
2419   if (!cause_return) {
2420     RegisterSaver::restore_live_registers(masm, false);
2421     __ pop(PC, eq);
2422     __ pop(Rexception_pc);
2423   } else {
2424     RegisterSaver::restore_live_registers(masm);
2425     __ bx(LR, eq);
2426     __ mov(Rexception_pc, LR);
2427   }
2428 #endif // AARCH64
2429 
2430   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2431 
2432   __ flush();
2433 
2434   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
2435 }
2436 
2437 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2438   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2439 
2440   ResourceMark rm;
2441   CodeBuffer buffer(name, 1000, 512);
2442   int frame_size_words;
2443   OopMapSet *oop_maps;
2444   int frame_complete;
2445 
2446   MacroAssembler* masm = new MacroAssembler(&buffer);
2447   Label pending_exception;
2448 




  45 
  46 #define __ masm->
  47 
  48 class RegisterSaver {
  49 public:
  50 
  51   // Special registers:
  52   //              32-bit ARM     64-bit ARM
  53   //  Rthread:       R10            R28
  54   //  LR:            R14            R30
  55 
  56   // Rthread is callee saved in the C ABI and never changed by compiled code:
  57   // no need to save it.
  58 
  59   // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
  60   // The one at LR_offset is a return address that is needed by stack walking.
  61   // A c2 method uses LR as a standard register so it may be live when we
  62   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
  63   // in case it's live in the method we are coming from.
  64 








































  65 
  66   enum RegisterLayout {
  67     fpu_save_size = FloatRegisterImpl::number_of_registers,
  68 #ifndef __SOFTFP__
  69     D0_offset = 0,
  70 #endif
  71     R0_offset = fpu_save_size,
  72     R1_offset,
  73     R2_offset,
  74     R3_offset,
  75     R4_offset,
  76     R5_offset,
  77     R6_offset,
  78 #if (FP_REG_NUM != 7)
  79     // if not saved as FP
  80     R7_offset,
  81 #endif
  82     R8_offset,
  83     R9_offset,
  84 #if (FP_REG_NUM != 11)
  85     // if not saved as FP
  86     R11_offset,
  87 #endif
  88     R12_offset,
  89     R14_offset,
  90     FP_offset,
  91     LR_offset,
  92     reg_save_size,
  93 
  94     Rmethod_offset = R9_offset,
  95     Rtemp_offset = R12_offset,
  96   };
  97 
  98   // all regs but Rthread (R10), FP (R7 or R11), SP and PC
  99   // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
 100 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 101 

 102 
 103   //  When LR may be live in the nmethod from which we are comming
 104   //  then lr_saved is true, the return address is saved before the
 105   //  call to save_live_register by the caller and LR contains the
 106   //  live value.
 107 
 108   static OopMap* save_live_registers(MacroAssembler* masm,
 109                                      int* total_frame_words,
 110                                      bool lr_saved = false);
 111   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
 112 
 113 };
 114 
 115 




 116 
 117 
 118 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
 119                                            int* total_frame_words,
 120                                            bool lr_saved) {
 121   *total_frame_words = reg_save_size;
 122 
 123   OopMapSet *oop_maps = new OopMapSet();
 124   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 125 









































 126   if (lr_saved) {
 127     __ push(RegisterSet(FP));
 128   } else {
 129     __ push(RegisterSet(FP) | RegisterSet(LR));
 130   }
 131   __ push(SAVED_BASE_REGS);
 132   if (HaveVFP) {
 133     if (VM_Version::has_vfp3_32()) {
 134       __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
 135     } else {
 136       if (FloatRegisterImpl::number_of_registers > 32) {
 137         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 138         __ sub(SP, SP, 32 * wordSize);
 139       }
 140     }
 141     __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
 142   } else {
 143     __ sub(SP, SP, fpu_save_size * wordSize);
 144   }
 145 


 149     if (j == FP_REG_NUM) {
 150       // skip the FP register, managed below.
 151       j++;
 152     }
 153     map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
 154     j++;
 155   }
 156   assert(j == R10->encoding(), "must be");
 157 #if (FP_REG_NUM != 11)
 158   // add R11, if not managed as FP
 159   map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
 160 #endif
 161   map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
 162   map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
 163   if (HaveVFP) {
 164     for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
 165       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
 166       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
 167     }
 168   }

 169 
 170   return map;
 171 }
 172 
 173 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {


























 174   if (HaveVFP) {
 175     __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
 176     if (VM_Version::has_vfp3_32()) {
 177       __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
 178     } else {
 179       if (FloatRegisterImpl::number_of_registers > 32) {
 180         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 181         __ add(SP, SP, 32 * wordSize);
 182       }
 183     }
 184   } else {
 185     __ add(SP, SP, fpu_save_size * wordSize);
 186   }
 187   __ pop(SAVED_BASE_REGS);
 188   if (restore_lr) {
 189     __ pop(RegisterSet(FP) | RegisterSet(LR));
 190   } else {
 191     __ pop(RegisterSet(FP));
 192   }

















































 193 }
 194 

 195 
 196 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 197 #ifdef __ABI_HARD__
 198   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 199     __ sub(SP, SP, 8);
 200     __ fstd(D0, Address(SP));
 201     return;
 202   }
 203 #endif // __ABI_HARD__
 204   __ raw_push(R0, R1);
 205 }
 206 
 207 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 208 #ifdef __ABI_HARD__
 209   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 210     __ fldd(D0, Address(SP));
 211     __ add(SP, SP, 8);
 212     return;
 213   }
 214 #endif // __ABI_HARD__


 225   // but there is no way to guarantee that
 226   if (fp_regs_in_arguments) {
 227     // convert fp_regs_in_arguments to a number of double registers
 228     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 229     __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 230   }
 231 #endif // __ ABI_HARD__
 232 }
 233 
 234 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 235 #ifdef __ABI_HARD__
 236   if (fp_regs_in_arguments) {
 237     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 238     __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
 239   }
 240 #endif // __ABI_HARD__
 241 
 242   __ pop(RegisterSet(R0, R3));
 243 }
 244 

 245 
 246 
 247 // Is vector's size (in bytes) bigger than a size saved by default?
 248 // All vector registers are saved by default on ARM.
 249 bool SharedRuntime::is_wide_vector(int size) {
 250   return false;
 251 }
 252 
 253 size_t SharedRuntime::trampoline_size() {
 254   return 16;
 255 }
 256 
 257 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 258   InlinedAddress dest(destination);
 259   __ indirect_jump(dest, Rtemp);
 260   __ bind_literal(dest);
 261 }
 262 
 263 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 264                                         VMRegPair *regs,
 265                                         VMRegPair *regs2,
 266                                         int total_args_passed) {
 267   assert(regs2 == NULL, "not needed on arm");



































































 268 
 269   int slot = 0;
 270   int ireg = 0;
 271 #ifdef __ABI_HARD__
 272   int fp_slot = 0;
 273   int single_fpr_slot = 0;
 274 #endif // __ABI_HARD__
 275   for (int i = 0; i < total_args_passed; i++) {
 276     switch (sig_bt[i]) {
 277     case T_SHORT:
 278     case T_CHAR:
 279     case T_BYTE:
 280     case T_BOOLEAN:
 281     case T_INT:
 282     case T_ARRAY:
 283     case T_OBJECT:
 284     case T_ADDRESS:
 285     case T_METADATA:
 286 #ifndef __ABI_HARD__
 287     case T_FLOAT:


 344     case T_DOUBLE:
 345       assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
 346       if (fp_slot <= 14) {
 347         FloatRegister r1 = as_FloatRegister(fp_slot);
 348         FloatRegister r2 = as_FloatRegister(fp_slot+1);
 349         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 350         fp_slot += 2;
 351       } else {
 352         if(slot & 1) slot++;
 353         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 354         slot += 2;
 355         single_fpr_slot = 16;
 356       }
 357       break;
 358 #endif // __ABI_HARD__
 359     default:
 360       ShouldNotReachHere();
 361     }
 362   }
 363   return slot;

 364 }
 365 
 366 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 367                                            VMRegPair *regs,
 368                                            int total_args_passed,
 369                                            int is_outgoing) {




 370 #ifdef __SOFTFP__
 371   // soft float is the same as the C calling convention.
 372   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 373 #endif // __SOFTFP__
 374   (void) is_outgoing;
 375   int slot = 0;
 376   int ireg = 0;
 377   int freg = 0;
 378   int single_fpr = 0;
 379 
 380   for (int i = 0; i < total_args_passed; i++) {
 381     switch (sig_bt[i]) {
 382     case T_SHORT:
 383     case T_CHAR:
 384     case T_BYTE:
 385     case T_BOOLEAN:
 386     case T_INT:
 387     case T_ARRAY:
 388     case T_OBJECT:
 389     case T_ADDRESS:


 432         Register r2 = as_Register(ireg + 1);
 433         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 434         ireg += 2;
 435       } else {
 436         if (slot & 1) slot++;
 437         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 438         slot += 2;
 439         ireg = 4;
 440       }
 441       break;
 442     case T_VOID:
 443       regs[i].set_bad();
 444       break;
 445     default:
 446       ShouldNotReachHere();
 447     }
 448   }
 449 
 450   if (slot & 1) slot++;
 451   return slot;

 452 }
 453 
 454 static void patch_callers_callsite(MacroAssembler *masm) {
 455   Label skip;
 456 
 457   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 458   __ cbz(Rtemp, skip);
 459 




 460   // Pushing an even number of registers for stack alignment.
 461   // Selecting R9, which had to be saved anyway for some platforms.
 462   __ push(RegisterSet(R0, R3) | R9 | LR);

 463 
 464   __ mov(R0, Rmethod);
 465   __ mov(R1, LR);
 466   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 467 




 468   __ pop(RegisterSet(R0, R3) | R9 | LR);

 469 
 470   __ bind(skip);
 471 }
 472 
 473 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 474                                     int total_args_passed, int comp_args_on_stack,
 475                                     const BasicType *sig_bt, const VMRegPair *regs) {
 476   // TODO: ARM - May be can use ldm to load arguments
 477   const Register tmp = Rtemp; // avoid erasing R5_mh
 478 
 479   // Next assert may not be needed but safer. Extra analysis required
 480   // if this there is not enough free registers and we need to use R5 here.
 481   assert_different_registers(tmp, R5_mh);
 482 
 483   // 6243940 We might end up in handle_wrong_method if
 484   // the callee is deoptimized as we race thru here. If that
 485   // happens we don't want to take a safepoint because the
 486   // caller frame will look interpreted and arguments are now
 487   // "compiled" so it is much better to make this transition
 488   // invisible to the stack walking code. Unfortunately if
 489   // we try and find the callee by normal means a safepoint
 490   // is possible. So we stash the desired callee in the thread
 491   // and the vm will find there should this case occur.
 492   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
 493   __ str(Rmethod, callee_target_addr);
 494 



















































 495 
 496   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 497 
 498   const Register initial_sp = Rmethod; // temporarily scratched
 499 
 500   // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
 501   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
 502 
 503   __ mov(initial_sp, SP);
 504 
 505   if (comp_args_on_stack) {
 506     __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
 507   }
 508   __ bic(SP, SP, StackAlignmentInBytes - 1);
 509 
 510   for (int i = 0; i < total_args_passed; i++) {
 511     if (sig_bt[i] == T_VOID) {
 512       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 513       continue;
 514     }


 536         __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
 537       }
 538     } else if (r_1->is_FloatRegister()) {
 539 #ifdef __SOFTFP__
 540       ShouldNotReachHere();
 541 #endif // __SOFTFP__
 542       if (!r_2->is_valid()) {
 543         __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
 544       } else {
 545         __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 546       }
 547     } else {
 548       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 549     }
 550   }
 551 
 552   // restore Rmethod (scratched for initial_sp)
 553   __ ldr(Rmethod, callee_target_addr);
 554   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 555 

 556 }
 557 
 558 static void gen_c2i_adapter(MacroAssembler *masm,
 559                             int total_args_passed,  int comp_args_on_stack,
 560                             const BasicType *sig_bt, const VMRegPair *regs,
 561                             Label& skip_fixup) {
 562   // TODO: ARM - May be can use stm to deoptimize arguments
 563   const Register tmp = Rtemp;
 564 
 565   patch_callers_callsite(masm);
 566   __ bind(skip_fixup);
 567 
 568   __ mov(Rsender_sp, SP); // not yet saved
 569 


















































 570 
 571   int extraspace = total_args_passed * Interpreter::stackElementSize;
 572   if (extraspace) {
 573     __ sub_slow(SP, SP, extraspace);
 574   }
 575 
 576   for (int i = 0; i < total_args_passed; i++) {
 577     if (sig_bt[i] == T_VOID) {
 578       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 579       continue;
 580     }
 581     int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
 582 
 583     VMReg r_1 = regs[i].first();
 584     VMReg r_2 = regs[i].second();
 585     if (r_1->is_stack()) {
 586       int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 587       if (!r_2->is_valid()) {
 588         __ ldr(tmp, Address(SP, arg_offset));
 589         __ str(tmp, Address(SP, stack_offset));


 599       } else {
 600         __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
 601         __ str(r_2->as_Register(), Address(SP, stack_offset));
 602       }
 603     } else if (r_1->is_FloatRegister()) {
 604 #ifdef __SOFTFP__
 605       ShouldNotReachHere();
 606 #endif // __SOFTFP__
 607       if (!r_2->is_valid()) {
 608         __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
 609       } else {
 610         __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
 611       }
 612     } else {
 613       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 614     }
 615   }
 616 
 617   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 618 

 619 }
 620 
 621 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 622                                                             int total_args_passed,
 623                                                             int comp_args_on_stack,
 624                                                             const BasicType *sig_bt,
 625                                                             const VMRegPair *regs,
 626                                                             AdapterFingerPrint* fingerprint) {
 627   address i2c_entry = __ pc();
 628   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 629 
 630   address c2i_unverified_entry = __ pc();
 631   Label skip_fixup;
 632   const Register receiver       = R0;
 633   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
 634   const Register receiver_klass = R4;
 635 
 636   __ load_klass(receiver_klass, receiver);
 637   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
 638   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
 639   __ cmp(receiver_klass, holder_klass);
 640 








 641   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
 642   __ cmp(Rtemp, 0, eq);
 643   __ b(skip_fixup, eq);
 644   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);

 645 
 646   address c2i_entry = __ pc();
 647   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 648 
 649   __ flush();
 650   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 651 }
 652 
 653 
 654 static int reg2offset_in(VMReg r) {
 655   // Account for saved FP and LR
 656   return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
 657 }
 658 
 659 static int reg2offset_out(VMReg r) {
 660   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 661 }
 662 
 663 
 664 static void verify_oop_args(MacroAssembler* masm,


 825   int lock_slot_fp_offset = stack_size - 2 * wordSize -
 826     lock_slot_offset * VMRegImpl::stack_slot_size;
 827 
 828   // Unverified entry point
 829   address start = __ pc();
 830 
 831   // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
 832   const Register receiver = R0; // see receiverOpr()
 833   __ load_klass(Rtemp, receiver);
 834   __ cmp(Rtemp, Ricklass);
 835   Label verified;
 836 
 837   __ b(verified, eq); // jump over alignment no-ops too
 838   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
 839   __ align(CodeEntryAlignment);
 840 
 841   // Verified entry point
 842   __ bind(verified);
 843   int vep_offset = __ pc() - start;
 844 




 845 
 846   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
 847     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
 848     // instead of doing a full VM transition once it's been computed.
 849     Label slow_case;
 850     const Register obj_reg = R0;
 851 
 852     // Unlike for Object.hashCode, System.identityHashCode is static method and
 853     // gets object as argument instead of the receiver.
 854     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
 855       assert(method->is_static(), "method should be static");
 856       // return 0 for null reference input, return val = R0 = obj_reg = 0






 857       __ cmp(obj_reg, 0);
 858       __ bx(LR, eq);

 859     }
 860 
 861     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 862 
 863     assert(markOopDesc::unlocked_value == 1, "adjust this code");
 864     __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
 865 
 866     if (UseBiasedLocking) {
 867       assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
 868       __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
 869     }
 870 






 871     __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
 872     __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
 873     __ bx(LR, ne);

 874 
 875     __ bind(slow_case);
 876   }
 877 
 878   // Bang stack pages
 879   __ arm_stack_overflow_check(stack_size, Rtemp);
 880 
 881   // Setup frame linkage
 882   __ raw_push(FP, LR);
 883   __ mov(FP, SP);
 884   __ sub_slow(SP, SP, stack_size - 2*wordSize);
 885 
 886   int frame_complete = __ pc() - start;
 887 
 888   OopMapSet* oop_maps = new OopMapSet();
 889   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
 890   const int extra_args = is_static ? 2 : 1;
 891   int receiver_offset = -1;
 892   int fp_regs_in_arguments = 0;
 893 
 894   for (i = total_in_args; --i >= 0; ) {
 895     switch (in_sig_bt[i]) {
 896     case T_ARRAY:
 897     case T_OBJECT: {
 898       VMReg src = in_regs[i].first();
 899       VMReg dst = out_regs[i + extra_args].first();
 900       if (src->is_stack()) {
 901         assert(dst->is_stack(), "must be");
 902         assert(i != 0, "Incoming receiver is always in a register");
 903         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
 904         __ cmp(Rtemp, 0);




 905         __ add(Rtemp, FP, reg2offset_in(src), ne);

 906         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
 907         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 908         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
 909       } else {
 910         int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
 911         __ str(src->as_Register(), Address(SP, offset));
 912         map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
 913         if ((i == 0) && (!is_static)) {
 914           receiver_offset = offset;
 915         }
 916         oop_handle_offset += VMRegImpl::slots_per_word;
 917 








 918         if (dst->is_stack()) {
 919           __ movs(Rtemp, src->as_Register());
 920           __ add(Rtemp, SP, offset, ne);
 921           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
 922         } else {
 923           __ movs(dst->as_Register(), src->as_Register());
 924           __ add(dst->as_Register(), SP, offset, ne);
 925         }

 926       }
 927     }
 928 
 929     case T_VOID:
 930       break;
 931 

















 932 
 933 #ifdef __SOFTFP__
 934     case T_DOUBLE:
 935 #endif
 936     case T_LONG: {
 937       VMReg src_1 = in_regs[i].first();
 938       VMReg src_2 = in_regs[i].second();
 939       VMReg dst_1 = out_regs[i + extra_args].first();
 940       VMReg dst_2 = out_regs[i + extra_args].second();
 941 #if (ALIGN_WIDE_ARGUMENTS == 0)
 942       // C convention can mix a register and a stack slot for a
 943       // 64-bits native argument.
 944 
 945       // Note: following code should work independently of whether
 946       // the Java calling convention follows C convention or whether
 947       // it aligns 64-bit values.
 948       if (dst_2->is_Register()) {
 949         if (src_1->as_Register() != dst_1->as_Register()) {
 950           assert(src_1->as_Register() != dst_2->as_Register() &&
 951                  src_2->as_Register() != dst_2->as_Register(), "must be");


1084       if (src_1->is_stack()) {
1085         if (dst_1->is_stack()) {
1086           assert(dst_2->is_stack(), "must be");
1087           __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1088           __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1089           __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1090           __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1091         } else {
1092           // C2 Java calling convention does not populate S14 and S15, therefore
1093           // those need to be loaded from stack here
1094           __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1095           fp_regs_in_arguments += 2;
1096         }
1097       } else {
1098         assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1099         fp_regs_in_arguments += 2;
1100       }
1101       break;
1102     }
1103 #endif // __ABI_HARD__

1104 
1105     default: {
1106       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1107       VMReg src = in_regs[i].first();
1108       VMReg dst = out_regs[i + extra_args].first();
1109       if (src->is_stack()) {
1110         assert(dst->is_stack(), "must be");
1111         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1112         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1113       } else if (dst->is_stack()) {
1114         __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1115       } else {
1116         assert(src->is_Register() && dst->is_Register(), "must be");
1117         __ mov(dst->as_Register(), src->as_Register());
1118       }
1119     }
1120     }
1121   }
1122 
1123   // Get Klass mirror
1124   int klass_offset = -1;
1125   if (is_static) {
1126     klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1127     __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1128     __ add(c_rarg1, SP, klass_offset);
1129     __ str(Rtemp, Address(SP, klass_offset));
1130     map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1131   }
1132 
1133   // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1134   int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1135   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1136   oop_maps->add_gc_map(pc_offset, map);
1137 

1138   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1139   __ membar(MacroAssembler::StoreStore, Rtemp);

1140 
1141   // RedefineClasses() tracing support for obsolete method entry
1142   if (log_is_enabled(Trace, redefine, class, obsolete)) {



1143     __ save_caller_save_registers();
1144     __ mov(R0, Rthread);
1145     __ mov_metadata(R1, method());
1146     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1147     __ restore_caller_save_registers();
1148   }
1149 
1150   const Register sync_handle = R5;
1151   const Register sync_obj    = R6;
1152   const Register disp_hdr    = altFP_7_11;
1153   const Register tmp         = R8;
1154 
1155   Label slow_lock, slow_lock_biased, lock_done, fast_lock;
1156   if (method->is_synchronized()) {
1157     // The first argument is a handle to sync object (a class or an instance)
1158     __ ldr(sync_obj, Address(R1));
1159     // Remember the handle for the unlocking code
1160     __ mov(sync_handle, R1);
1161 
1162     if(UseBiasedLocking) {
1163       __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1164     }
1165 
1166     const Register mark = tmp;





























1167     // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1168     // That would be acceptable as either CAS or slow case path is taken in that case
1169 
1170     __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1171     __ sub(disp_hdr, FP, lock_slot_fp_offset);
1172     __ tst(mark, markOopDesc::unlocked_value);
1173     __ b(fast_lock, ne);
1174 
1175     // Check for recursive lock
1176     // See comments in InterpreterMacroAssembler::lock_object for
1177     // explanations on the fast recursive locking check.
1178     // Check independently the low bits and the distance to SP
1179     // -1- test low 2 bits
1180     __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1181     // -2- test (hdr - SP) if the low two bits are 0
1182     __ sub(Rtemp, mark, SP, eq);
1183     __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1184     // If still 'eq' then recursive locking OK: set displaced header to 0
1185     __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1186     __ b(lock_done, eq);
1187     __ b(slow_lock);
1188 
1189     __ bind(fast_lock);
1190     __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1191 
1192     __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);

1193 
1194     __ bind(lock_done);
1195   }
1196 
1197   // Get JNIEnv*
1198   __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1199 
1200   // Perform thread state transition
1201   __ mov(Rtemp, _thread_in_native);





1202   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));

1203 
1204   // Finally, call the native method
1205   __ call(method->native_function());
1206 
1207   // Set FPSCR/FPCR to a known state
1208   if (AlwaysRestoreFPU) {
1209     __ restore_default_fp_mode();
1210   }
1211 
1212   // Do a safepoint check while thread is in transition state
1213   InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1214   Label call_safepoint_runtime, return_to_java;
1215   __ mov(Rtemp, _thread_in_native_trans);
1216   __ ldr_literal(R2, safepoint_state);
1217   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1218 
1219   // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1220   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1221 
1222   __ ldr_s32(R2, Address(R2));


1242 
1243     if(UseBiasedLocking) {
1244       __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1245       // disp_hdr may not have been saved on entry with biased locking
1246       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1247     }
1248 
1249     // See C1_MacroAssembler::unlock_object() for more comments
1250     __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1251     __ cbz(R2, unlock_done);
1252 
1253     __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1254 
1255     __ bind(unlock_done);
1256   }
1257 
1258   // Set last java frame and handle block to zero
1259   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1260   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1261 


























1262   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1263   if (CheckJNICalls) {
1264     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1265   }

1266 
1267   // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1268   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1269     __ resolve_jobject(R0,      // value
1270                        Rtemp,   // tmp1
1271                        R1_tmp); // tmp2
1272   }
1273 
1274   // Any exception pending?
1275   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1276   __ mov(SP, FP);
1277 










1278   __ cmp(Rtemp, 0);
1279   // Pop the frame and return if no exception pending
1280   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1281   // Pop the frame and forward the exception. Rexception_pc contains return address.
1282   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1283   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);

1284   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1285 
1286   // Safepoint operation and/or pending suspend request is in progress.
1287   // Save the return values and call the runtime function by hand.
1288   __ bind(call_safepoint_runtime);
1289   push_result_registers(masm, ret_type);
1290   __ mov(R0, Rthread);
1291   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1292   pop_result_registers(masm, ret_type);
1293   __ b(return_to_java);
1294 
1295   __ bind_literal(safepoint_state);
1296 
1297   // Reguard stack pages. Save native results around a call to C runtime.
1298   __ bind(reguard);
1299   push_result_registers(masm, ret_type);
1300   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1301   pop_result_registers(masm, ret_type);
1302   __ b(reguard_done);
1303 


1343 
1344     __ b(unlock_done);
1345   }
1346 
1347   __ flush();
1348   return nmethod::new_native_nmethod(method,
1349                                      compile_id,
1350                                      masm->code(),
1351                                      vep_offset,
1352                                      frame_complete,
1353                                      stack_slots / VMRegImpl::slots_per_word,
1354                                      in_ByteSize(is_static ? klass_offset : receiver_offset),
1355                                      in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1356                                      oop_maps);
1357 }
1358 
1359 // this function returns the adjust size (in number of words) to a c2i adapter
1360 // activation for use during deoptimization
1361 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1362   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;



1363   return extra_locals_size;
1364 }
1365 
1366 
1367 uint SharedRuntime::out_preserve_stack_slots() {
1368   return 0;
1369 }
1370 
1371 
1372 //------------------------------generate_deopt_blob----------------------------
1373 void SharedRuntime::generate_deopt_blob() {
1374   ResourceMark rm;



1375   CodeBuffer buffer("deopt_blob", 1024, 1024);

1376   int frame_size_in_words;
1377   OopMapSet* oop_maps;
1378   int reexecute_offset;
1379   int exception_in_tls_offset;
1380   int exception_offset;
1381 
1382   MacroAssembler* masm = new MacroAssembler(&buffer);
1383   Label cont;
1384   const Register Rkind   = R9; // caller-saved
1385   const Register Rublock = R6;
1386   const Register Rsender = altFP_7_11;
1387   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1388 
1389   address start = __ pc();
1390 
1391   oop_maps = new OopMapSet();
1392   // LR saved by caller (can be live in c2 method)
1393 
1394   // A deopt is a case where LR may be live in the c2 nmethod. So it's
1395   // not possible to call the deopt blob from the nmethod and pass the
1396   // address of the deopt handler of the nmethod in LR. What happens
1397   // now is that the caller of the deopt blob pushes the current
1398   // address so the deopt blob doesn't have to do it. This way LR can
1399   // be preserved, contains the live value from the nmethod and is
1400   // saved at R14/R30_offset here.
1401   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1402   __ mov(Rkind, Deoptimization::Unpack_deopt);
1403   __ b(cont);
1404 
1405   exception_offset = __ pc() - start;
1406 


1456   // assert that exception_pc is zero in tls
1457   { Label L;
1458     __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1459     __ cbz(Rexception_pc, L);
1460     __ stop("exception pc should be null");
1461     __ bind(L);
1462   }
1463 #endif
1464   __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1465   __ verify_oop(Rexception_obj);
1466   {
1467     const Register Rzero = __ zero_register(Rtemp);
1468     __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1469   }
1470 
1471   __ bind(noException);
1472 
1473   // This frame is going away.  Fetch return value, so we can move it to
1474   // a new frame.
1475   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));

1476   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));

1477 #ifndef __SOFTFP__
1478   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1479 #endif
1480   // pop frame
1481   __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1482 
1483   // Set initial stack state before pushing interpreter frames
1484   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1485   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1486   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1487 


























1488   __ add(SP, SP, Rtemp);

1489 
1490 #ifdef ASSERT
1491   // Compilers generate code that bang the stack by as much as the
1492   // interpreter would need. So this stack banging should never
1493   // trigger a fault. Verify that it does not on non product builds.
1494   // See if it is enough stack to push deoptimized frames
1495   if (UseStackBanging) {

1496     // The compiled method that we are deoptimizing was popped from the stack.
1497     // If the stack bang results in a stack overflow, we don't return to the
1498     // method that is being deoptimized. The stack overflow exception is
1499     // propagated to the caller of the deoptimized method. Need to get the pc
1500     // from the caller in LR and restore FP.
1501     __ ldr(LR, Address(R2, 0));
1502     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));

1503     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
1504     __ arm_stack_overflow_check(R8, Rtemp);
1505   }
1506 #endif
1507   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
1508 

1509   // Pick up the initial fp we should save
1510   // XXX Note: was ldr(FP, Address(FP));
1511 
1512   // The compiler no longer uses FP as a frame pointer for the
1513   // compiled code. It can be used by the allocator in C2 or to
1514   // memorize the original SP for JSR292 call sites.
1515 
1516   // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
1517   // Deoptimization::fetch_unroll_info computes the right FP value and
1518   // stores it in Rublock.initial_info. This has been activated for ARM.
1519   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));

1520 
1521   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
1522   __ mov(Rsender, SP);



1523   __ sub(SP, SP, Rtemp);

1524 
1525   // Push interpreter frames in a loop
1526   Label loop;
1527   __ bind(loop);
1528   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
1529   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
1530 
1531   __ raw_push(FP, LR);                                     // create new frame
1532   __ mov(FP, SP);
1533   __ sub(Rtemp, Rtemp, 2*wordSize);
1534 



1535   __ sub(SP, SP, Rtemp);

1536 
1537   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));



1538   __ mov(LR, 0);
1539   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));

1540 
1541   __ subs(R8, R8, 1);                               // decrement counter
1542   __ mov(Rsender, SP);
1543   __ b(loop, ne);
1544 
1545   // Re-push self-frame
1546   __ ldr(LR, Address(R2));
1547   __ raw_push(FP, LR);
1548   __ mov(FP, SP);
1549   __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
1550 
1551   // Restore frame locals after moving the frame
1552   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));

1553   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));

1554 
1555 #ifndef __SOFTFP__
1556   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1557 #endif // !__SOFTFP__
1558 

1559 #ifdef ASSERT
1560   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
1561   { Label L;
1562     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1563     __ cmp_32(Rkind, Rtemp);
1564     __ b(L, eq);
1565     __ stop("Rkind was overwritten");
1566     __ bind(L);
1567   }
1568 #endif

1569 
1570   // Call unpack_frames with proper arguments
1571   __ mov(R0, Rthread);
1572   __ mov(R1, Rkind);
1573 
1574   pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
1575   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1576   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1577   if (pc_offset == -1) {
1578     pc_offset = __ offset();
1579   }
1580   oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
1581   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1582 
1583   // Collect return values, pop self-frame and jump to interpreter
1584   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));

1585   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));

1586   // Interpreter floats controlled by __SOFTFP__, but compiler
1587   // float return value registers controlled by __ABI_HARD__
1588   // This matters for vfp-sflt builds.
1589 #ifndef __SOFTFP__
1590   // Interpreter hard float
1591 #ifdef __ABI_HARD__
1592   // Compiler float return value in FP registers
1593   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1594 #else
1595   // Compiler float return value in integer registers,
1596   // copy to D0 for interpreter (S0 <-- R0)
1597   __ fmdrr(D0_tos, R0, R1);
1598 #endif
1599 #endif // !__SOFTFP__
1600   __ mov(SP, FP);
1601 




1602   __ pop(RegisterSet(FP) | RegisterSet(PC));

1603 
1604   __ flush();
1605 
1606   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
1607                                            reexecute_offset, frame_size_in_words);
1608   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
1609 }
1610 
1611 #ifdef COMPILER2
1612 
1613 //------------------------------generate_uncommon_trap_blob--------------------
1614 // Ought to generate an ideal graph & compile, but here's some SPARC ASM
1615 // instead.
1616 void SharedRuntime::generate_uncommon_trap_blob() {
1617   // allocate space for the code
1618   ResourceMark rm;
1619 
1620   // setup code generation tools
1621   int pad = VerifyThread ? 512 : 0;
1622 #ifdef _LP64
1623   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
1624 #else
1625   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
1626   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
1627   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
1628 #endif
1629   // bypassed when code generation useless
1630   MacroAssembler* masm               = new MacroAssembler(&buffer);
1631   const Register Rublock = R6;
1632   const Register Rsender = altFP_7_11;
1633   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
1634 
1635   //
1636   // This is the entry point for all traps the compiler takes when it thinks
1637   // it cannot handle further execution of compilation code. The frame is
1638   // deoptimized in these cases and converted into interpreter frames for
1639   // execution
1640   // The steps taken by this frame are as follows:
1641   //   - push a fake "unpack_frame"
1642   //   - call the C routine Deoptimization::uncommon_trap (this function
1643   //     packs the current compiled frame into vframe arrays and returns
1644   //     information about the number and size of interpreter frames which
1645   //     are equivalent to the frame which is being deoptimized)
1646   //   - deallocate the "unpack_frame"
1647   //   - deallocate the deoptimization frame
1648   //   - in a loop using the information returned in the previous step
1649   //     push interpreter frames;
1650   //   - create a dummy "unpack_frame"
1651   //   - call the C routine: Deoptimization::unpack_frames (this function
1652   //     lays out values on the interpreter frame which was just created)


1668   __ mov(Rublock, R0);
1669   __ reset_last_Java_frame(Rtemp);
1670   __ raw_pop(FP, LR);
1671 
1672 #ifdef ASSERT
1673   { Label L;
1674     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1675     __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
1676     __ b(L, eq);
1677     __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
1678     __ bind(L);
1679   }
1680 #endif
1681 
1682 
1683   // Set initial stack state before pushing interpreter frames
1684   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1685   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1686   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1687 


























1688   __ add(SP, SP, Rtemp);

1689 
1690   // See if it is enough stack to push deoptimized frames
1691 #ifdef ASSERT
1692   // Compilers generate code that bang the stack by as much as the
1693   // interpreter would need. So this stack banging should never
1694   // trigger a fault. Verify that it does not on non product builds.
1695   if (UseStackBanging) {

1696     // The compiled method that we are deoptimizing was popped from the stack.
1697     // If the stack bang results in a stack overflow, we don't return to the
1698     // method that is being deoptimized. The stack overflow exception is
1699     // propagated to the caller of the deoptimized method. Need to get the pc
1700     // from the caller in LR and restore FP.
1701     __ ldr(LR, Address(R2, 0));
1702     __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));

1703     __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
1704     __ arm_stack_overflow_check(R8, Rtemp);
1705   }
1706 #endif
1707   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
1708   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
1709   __ mov(Rsender, SP);



1710   __ sub(SP, SP, Rtemp);


1711   //  __ ldr(FP, Address(FP));
1712   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));

1713 
1714   // Push interpreter frames in a loop
1715   Label loop;
1716   __ bind(loop);
1717   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
1718   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
1719 
1720   __ raw_push(FP, LR);                                     // create new frame
1721   __ mov(FP, SP);
1722   __ sub(Rtemp, Rtemp, 2*wordSize);
1723 



1724   __ sub(SP, SP, Rtemp);

1725 
1726   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));



1727   __ mov(LR, 0);
1728   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));

1729   __ subs(R8, R8, 1);                               // decrement counter
1730   __ mov(Rsender, SP);
1731   __ b(loop, ne);
1732 
1733   // Re-push self-frame
1734   __ ldr(LR, Address(R2));
1735   __ raw_push(FP, LR);
1736   __ mov(FP, SP);
1737 
1738   // Call unpack_frames with proper arguments
1739   __ mov(R0, Rthread);
1740   __ mov(R1, Deoptimization::Unpack_uncommon_trap);
1741   __ set_last_Java_frame(SP, FP, false, Rtemp);
1742   __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1743   //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
1744   __ reset_last_Java_frame(Rtemp);
1745 
1746   __ mov(SP, FP);




1747   __ pop(RegisterSet(FP) | RegisterSet(PC));

1748 
1749   masm->flush();
1750   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
1751 }
1752 
1753 #endif // COMPILER2
1754 
1755 //------------------------------generate_handler_blob------
1756 //
1757 // Generate a special Compile2Runtime blob that saves all registers,
1758 // setup oopmap, and calls safepoint code to stop the compiled code for
1759 // a safepoint.
1760 //
1761 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
1762   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
1763 
1764   ResourceMark rm;
1765   CodeBuffer buffer("handler_blob", 256, 256);
1766   int frame_size_words;
1767   OopMapSet* oop_maps;
1768 
1769   bool cause_return = (poll_type == POLL_AT_RETURN);
1770 
1771   MacroAssembler* masm = new MacroAssembler(&buffer);
1772   address start = __ pc();
1773   oop_maps = new OopMapSet();
1774 
1775   if (!cause_return) {



1776     __ sub(SP, SP, 4); // make room for LR which may still be live
1777                        // here if we are coming from a c2 method

1778   }
1779 
1780   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
1781   if (!cause_return) {
1782     // update saved PC with correct value
1783     // need 2 steps because LR can be live in c2 method
1784     __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
1785     __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
1786   }
1787 
1788   __ mov(R0, Rthread);
1789   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1790   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1791   __ call(call_ptr);
1792   if (pc_offset == -1) {
1793     pc_offset = __ offset();
1794   }
1795   oop_maps->add_gc_map(pc_offset, map);
1796   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1797 
1798   // Check for pending exception
1799   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1800   __ cmp(Rtemp, 0);
1801 














1802   if (!cause_return) {
1803     RegisterSaver::restore_live_registers(masm, false);
1804     __ pop(PC, eq);
1805     __ pop(Rexception_pc);
1806   } else {
1807     RegisterSaver::restore_live_registers(masm);
1808     __ bx(LR, eq);
1809     __ mov(Rexception_pc, LR);
1810   }

1811 
1812   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1813 
1814   __ flush();
1815 
1816   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
1817 }
1818 
1819 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
1820   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
1821 
1822   ResourceMark rm;
1823   CodeBuffer buffer(name, 1000, 512);
1824   int frame_size_words;
1825   OopMapSet *oop_maps;
1826   int frame_complete;
1827 
1828   MacroAssembler* masm = new MacroAssembler(&buffer);
1829   Label pending_exception;
1830 


src/hotspot/cpu/arm/sharedRuntime_arm.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File