1 /*
   2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "ci/ciEnv.hpp"
  30 #include "code/nativeInst.hpp"
  31 #include "compiler/disassembler.hpp"
  32 #include "gc/shared/barrierSet.hpp"
  33 #include "gc/shared/cardTable.hpp"
  34 #include "gc/shared/barrierSetAssembler.hpp"
  35 #include "gc/shared/cardTableBarrierSet.hpp"
  36 #include "gc/shared/collectedHeap.inline.hpp"
  37 #include "interpreter/interpreter.hpp"
  38 #include "memory/resourceArea.hpp"
  39 #include "oops/klass.inline.hpp"
  40 #include "prims/methodHandles.hpp"
  41 #include "runtime/biasedLocking.hpp"
  42 #include "runtime/interfaceSupport.inline.hpp"
  43 #include "runtime/objectMonitor.hpp"
  44 #include "runtime/os.hpp"
  45 #include "runtime/sharedRuntime.hpp"
  46 #include "runtime/stubRoutines.hpp"
  47 #include "utilities/macros.hpp"
  48 
  49 // Implementation of AddressLiteral
  50 
  51 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
  52   switch (rtype) {
  53   case relocInfo::oop_type:
  54     // Oops are a special case. Normally they would be their own section
  55     // but in cases like icBuffer they are literals in the code stream that
  56     // we don't have a section for. We use none so that we get a literal address
  57     // which is always patchable.
  58     break;
  59   case relocInfo::external_word_type:
  60     _rspec = external_word_Relocation::spec(_target);
  61     break;
  62   case relocInfo::internal_word_type:
  63     _rspec = internal_word_Relocation::spec(_target);
  64     break;
  65   case relocInfo::opt_virtual_call_type:
  66     _rspec = opt_virtual_call_Relocation::spec();
  67     break;
  68   case relocInfo::static_call_type:
  69     _rspec = static_call_Relocation::spec();
  70     break;
  71   case relocInfo::runtime_call_type:
  72     _rspec = runtime_call_Relocation::spec();
  73     break;
  74   case relocInfo::poll_type:
  75   case relocInfo::poll_return_type:
  76     _rspec = Relocation::spec_simple(rtype);
  77     break;
  78   case relocInfo::none:
  79     break;
  80   default:
  81     ShouldNotReachHere();
  82     break;
  83   }
  84 }
  85 
  86 // Initially added to the Assembler interface as a pure virtual:
  87 //   RegisterConstant delayed_value(..)
  88 // for:
  89 //   6812678 macro assembler needs delayed binding of a few constants (for 6655638)
  90 // this was subsequently modified to its present name and return type
  91 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  92                                                       Register tmp,
  93                                                       int offset) {
  94   ShouldNotReachHere();
  95   return RegisterOrConstant(-1);
  96 }
  97 
  98 
  99 #ifdef AARCH64
 100 // Note: ARM32 version is OS dependent
 101 void MacroAssembler::breakpoint(AsmCondition cond) {
 102   if (cond == al) {
 103     brk();
 104   } else {
 105     Label L;
 106     b(L, inverse(cond));
 107     brk();
 108     bind(L);
 109   }
 110 }
 111 #endif // AARCH64
 112 
 113 
 114 // virtual method calling
 115 void MacroAssembler::lookup_virtual_method(Register recv_klass,
 116                                            Register vtable_index,
 117                                            Register method_result) {
 118   const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
 119   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
 120   add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
 121   ldr(method_result, Address(recv_klass, base_offset));
 122 }
 123 
 124 
 125 // Simplified, combined version, good for typical uses.
 126 // Falls through on failure.
 127 void MacroAssembler::check_klass_subtype(Register sub_klass,
 128                                          Register super_klass,
 129                                          Register temp_reg,
 130                                          Register temp_reg2,
 131                                          Register temp_reg3,
 132                                          Label& L_success) {
 133   Label L_failure;
 134   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
 135   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
 136   bind(L_failure);
 137 };
 138 
 139 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
 140                                                    Register super_klass,
 141                                                    Register temp_reg,
 142                                                    Register temp_reg2,
 143                                                    Label* L_success,
 144                                                    Label* L_failure,
 145                                                    Label* L_slow_path) {
 146 
 147   assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
 148   const Register super_check_offset = temp_reg2;
 149 
 150   Label L_fallthrough;
 151   int label_nulls = 0;
 152   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 153   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 154   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
 155   assert(label_nulls <= 1, "at most one NULL in the batch");
 156 
 157   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 158   int sco_offset = in_bytes(Klass::super_check_offset_offset());
 159   Address super_check_offset_addr(super_klass, sco_offset);
 160 
 161   // If the pointers are equal, we are done (e.g., String[] elements).
 162   // This self-check enables sharing of secondary supertype arrays among
 163   // non-primary types such as array-of-interface.  Otherwise, each such
 164   // type would need its own customized SSA.
 165   // We move this check to the front of the fast path because many
 166   // type checks are in fact trivially successful in this manner,
 167   // so we get a nicely predicted branch right at the start of the check.
 168   cmp(sub_klass, super_klass);
 169   b(*L_success, eq);
 170 
 171   // Check the supertype display:
 172   ldr_u32(super_check_offset, super_check_offset_addr);
 173 
 174   Address super_check_addr(sub_klass, super_check_offset);
 175   ldr(temp_reg, super_check_addr);
 176   cmp(super_klass, temp_reg); // load displayed supertype
 177 
 178   // This check has worked decisively for primary supers.
 179   // Secondary supers are sought in the super_cache ('super_cache_addr').
 180   // (Secondary supers are interfaces and very deeply nested subtypes.)
 181   // This works in the same check above because of a tricky aliasing
 182   // between the super_cache and the primary super display elements.
 183   // (The 'super_check_addr' can address either, as the case requires.)
 184   // Note that the cache is updated below if it does not help us find
 185   // what we need immediately.
 186   // So if it was a primary super, we can just fail immediately.
 187   // Otherwise, it's the slow path for us (no success at this point).
 188 
 189   b(*L_success, eq);
 190   cmp_32(super_check_offset, sc_offset);
 191   if (L_failure == &L_fallthrough) {
 192     b(*L_slow_path, eq);
 193   } else {
 194     b(*L_failure, ne);
 195     if (L_slow_path != &L_fallthrough) {
 196       b(*L_slow_path);
 197     }
 198   }
 199 
 200   bind(L_fallthrough);
 201 }
 202 
 203 
 204 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
 205                                                    Register super_klass,
 206                                                    Register temp_reg,
 207                                                    Register temp2_reg,
 208                                                    Register temp3_reg,
 209                                                    Label* L_success,
 210                                                    Label* L_failure,
 211                                                    bool set_cond_codes) {
 212 #ifdef AARCH64
 213   NOT_IMPLEMENTED();
 214 #else
 215   // Note: if used by code that expects a register to be 0 on success,
 216   // this register must be temp_reg and set_cond_codes must be true
 217 
 218   Register saved_reg = noreg;
 219 
 220   // get additional tmp registers
 221   if (temp3_reg == noreg) {
 222     saved_reg = temp3_reg = LR;
 223     push(saved_reg);
 224   }
 225 
 226   assert(temp2_reg != noreg, "need all the temporary registers");
 227   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
 228 
 229   Register cmp_temp = temp_reg;
 230   Register scan_temp = temp3_reg;
 231   Register count_temp = temp2_reg;
 232 
 233   Label L_fallthrough;
 234   int label_nulls = 0;
 235   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 236   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 237   assert(label_nulls <= 1, "at most one NULL in the batch");
 238 
 239   // a couple of useful fields in sub_klass:
 240   int ss_offset = in_bytes(Klass::secondary_supers_offset());
 241   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 242   Address secondary_supers_addr(sub_klass, ss_offset);
 243   Address super_cache_addr(     sub_klass, sc_offset);
 244 
 245 #ifndef PRODUCT
 246   inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
 247 #endif
 248 
 249   // We will consult the secondary-super array.
 250   ldr(scan_temp, Address(sub_klass, ss_offset));
 251 
 252   assert(! UseCompressedOops, "search_key must be the compressed super_klass");
 253   // else search_key is the
 254   Register search_key = super_klass;
 255 
 256   // Load the array length.
 257   ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
 258   add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
 259 
 260   add(count_temp, count_temp, 1);
 261 
 262   Label L_loop, L_setnz_and_fail, L_fail;
 263 
 264   // Top of search loop
 265   bind(L_loop);
 266   // Notes:
 267   //  scan_temp starts at the array elements
 268   //  count_temp is 1+size
 269   subs(count_temp, count_temp, 1);
 270   if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
 271     // direct jump to L_failure if failed and no cleanup needed
 272     b(*L_failure, eq); // not found and
 273   } else {
 274     b(L_fail, eq); // not found in the array
 275   }
 276 
 277   // Load next super to check
 278   // In the array of super classes elements are pointer sized.
 279   int element_size = wordSize;
 280   ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
 281 
 282   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
 283   subs(cmp_temp, cmp_temp, search_key);
 284 
 285   // A miss means we are NOT a subtype and need to keep looping
 286   b(L_loop, ne);
 287 
 288   // Falling out the bottom means we found a hit; we ARE a subtype
 289 
 290   // Note: temp_reg/cmp_temp is already 0 and flag Z is set
 291 
 292   // Success.  Cache the super we found and proceed in triumph.
 293   str(super_klass, Address(sub_klass, sc_offset));
 294 
 295   if (saved_reg != noreg) {
 296     // Return success
 297     pop(saved_reg);
 298   }
 299 
 300   b(*L_success);
 301 
 302   bind(L_fail);
 303   // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
 304   if (set_cond_codes) {
 305     movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
 306   }
 307   if (saved_reg != noreg) {
 308     pop(saved_reg);
 309   }
 310   if (L_failure != &L_fallthrough) {
 311     b(*L_failure);
 312   }
 313 
 314   bind(L_fallthrough);
 315 #endif
 316 }
 317 
 318 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
 319 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
 320   assert_different_registers(params_base, params_count);
 321   add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
 322   return Address(tmp, -Interpreter::stackElementSize);
 323 }
 324 
 325 
 326 void MacroAssembler::align(int modulus) {
 327   while (offset() % modulus != 0) {
 328     nop();
 329   }
 330 }
 331 
 332 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
 333                                         Register last_java_fp,
 334                                         bool save_last_java_pc,
 335                                         Register tmp) {
 336   int pc_offset;
 337   if (last_java_fp != noreg) {
 338     // optional
 339     str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
 340     _fp_saved = true;
 341   } else {
 342     _fp_saved = false;
 343   }
 344   if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
 345 #ifdef AARCH64
 346     pc_offset = mov_pc_to(tmp);
 347     str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
 348 #else
 349     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
 350     pc_offset = offset() + VM_Version::stored_pc_adjustment();
 351 #endif
 352     _pc_saved = true;
 353   } else {
 354     _pc_saved = false;
 355     pc_offset = -1;
 356   }
 357   // According to comment in javaFrameAnchorm SP must be saved last, so that other
 358   // entries are valid when SP is set.
 359 
 360   // However, this is probably not a strong constrainst since for instance PC is
 361   // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
 362   // we now write the fields in the expected order but we have not added a StoreStore
 363   // barrier.
 364 
 365   // XXX: if the ordering is really important, PC should always be saved (without forgetting
 366   // to update oop_map offsets) and a StoreStore barrier might be needed.
 367 
 368   if (last_java_sp == noreg) {
 369     last_java_sp = SP; // always saved
 370   }
 371 #ifdef AARCH64
 372   if (last_java_sp == SP) {
 373     mov(tmp, SP);
 374     str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 375   } else {
 376     str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 377   }
 378 #else
 379   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 380 #endif
 381 
 382   return pc_offset; // for oopmaps
 383 }
 384 
 385 void MacroAssembler::reset_last_Java_frame(Register tmp) {
 386   const Register Rzero = zero_register(tmp);
 387   str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
 388   if (_fp_saved) {
 389     str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
 390   }
 391   if (_pc_saved) {
 392     str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
 393   }
 394 }
 395 
 396 
 397 // Implementation of call_VM versions
 398 
 399 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
 400   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 401   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
 402 
 403 #ifndef AARCH64
 404   // Safer to save R9 here since callers may have been written
 405   // assuming R9 survives. This is suboptimal but is not worth
 406   // optimizing for the few platforms where R9 is scratched.
 407   push(RegisterSet(R4) | R9ifScratched);
 408   mov(R4, SP);
 409   bic(SP, SP, StackAlignmentInBytes - 1);
 410 #endif // AARCH64
 411   call(entry_point, relocInfo::runtime_call_type);
 412 #ifndef AARCH64
 413   mov(SP, R4);
 414   pop(RegisterSet(R4) | R9ifScratched);
 415 #endif // AARCH64
 416 }
 417 
 418 
 419 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
 420   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 421   assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
 422 
 423   const Register tmp = Rtemp;
 424   assert_different_registers(oop_result, tmp);
 425 
 426   set_last_Java_frame(SP, FP, true, tmp);
 427 
 428 #ifdef ASSERT
 429   AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
 430 #endif // ASSERT
 431 
 432 #ifndef AARCH64
 433 #if R9_IS_SCRATCHED
 434   // Safer to save R9 here since callers may have been written
 435   // assuming R9 survives. This is suboptimal but is not worth
 436   // optimizing for the few platforms where R9 is scratched.
 437 
 438   // Note: cannot save R9 above the saved SP (some calls expect for
 439   // instance the Java stack top at the saved SP)
 440   // => once saved (with set_last_Java_frame), decrease SP before rounding to
 441   // ensure the slot at SP will be free for R9).
 442   sub(SP, SP, 4);
 443   bic(SP, SP, StackAlignmentInBytes - 1);
 444   str(R9, Address(SP, 0));
 445 #else
 446   bic(SP, SP, StackAlignmentInBytes - 1);
 447 #endif // R9_IS_SCRATCHED
 448 #endif
 449 
 450   mov(R0, Rthread);
 451   call(entry_point, relocInfo::runtime_call_type);
 452 
 453 #ifndef AARCH64
 454 #if R9_IS_SCRATCHED
 455   ldr(R9, Address(SP, 0));
 456 #endif
 457   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
 458 #endif
 459 
 460   reset_last_Java_frame(tmp);
 461 
 462   // C++ interp handles this in the interpreter
 463   check_and_handle_popframe();
 464   check_and_handle_earlyret();
 465 
 466   if (check_exceptions) {
 467     // check for pending exceptions
 468     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
 469 #ifdef AARCH64
 470     Label L;
 471     cbz(tmp, L);
 472     mov_pc_to(Rexception_pc);
 473     b(StubRoutines::forward_exception_entry());
 474     bind(L);
 475 #else
 476     cmp(tmp, 0);
 477     mov(Rexception_pc, PC, ne);
 478     b(StubRoutines::forward_exception_entry(), ne);
 479 #endif // AARCH64
 480   }
 481 
 482   // get oop result if there is one and reset the value in the thread
 483   if (oop_result->is_valid()) {
 484     get_vm_result(oop_result, tmp);
 485   }
 486 }
 487 
 488 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
 489   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 490 }
 491 
 492 
 493 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
 494   assert (arg_1 == R1, "fixed register for arg_1");
 495   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 496 }
 497 
 498 
 499 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 500   assert (arg_1 == R1, "fixed register for arg_1");
 501   assert (arg_2 == R2, "fixed register for arg_2");
 502   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 503 }
 504 
 505 
 506 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 507   assert (arg_1 == R1, "fixed register for arg_1");
 508   assert (arg_2 == R2, "fixed register for arg_2");
 509   assert (arg_3 == R3, "fixed register for arg_3");
 510   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 511 }
 512 
 513 
 514 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
 515   // Not used on ARM
 516   Unimplemented();
 517 }
 518 
 519 
 520 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
 521   // Not used on ARM
 522   Unimplemented();
 523 }
 524 
 525 
 526 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 527 // Not used on ARM
 528   Unimplemented();
 529 }
 530 
 531 
 532 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 533   // Not used on ARM
 534   Unimplemented();
 535 }
 536 
 537 // Raw call, without saving/restoring registers, exception handling, etc.
 538 // Mainly used from various stubs.
 539 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
 540   const Register tmp = Rtemp; // Rtemp free since scratched by call
 541   set_last_Java_frame(SP, FP, true, tmp);
 542 #if R9_IS_SCRATCHED
 543   if (save_R9_if_scratched) {
 544     // Note: Saving also R10 for alignment.
 545     push(RegisterSet(R9, R10));
 546   }
 547 #endif
 548   mov(R0, Rthread);
 549   call(entry_point, relocInfo::runtime_call_type);
 550 #if R9_IS_SCRATCHED
 551   if (save_R9_if_scratched) {
 552     pop(RegisterSet(R9, R10));
 553   }
 554 #endif
 555   reset_last_Java_frame(tmp);
 556 }
 557 
 558 void MacroAssembler::call_VM_leaf(address entry_point) {
 559   call_VM_leaf_helper(entry_point, 0);
 560 }
 561 
 562 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
 563   assert (arg_1 == R0, "fixed register for arg_1");
 564   call_VM_leaf_helper(entry_point, 1);
 565 }
 566 
 567 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
 568   assert (arg_1 == R0, "fixed register for arg_1");
 569   assert (arg_2 == R1, "fixed register for arg_2");
 570   call_VM_leaf_helper(entry_point, 2);
 571 }
 572 
 573 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
 574   assert (arg_1 == R0, "fixed register for arg_1");
 575   assert (arg_2 == R1, "fixed register for arg_2");
 576   assert (arg_3 == R2, "fixed register for arg_3");
 577   call_VM_leaf_helper(entry_point, 3);
 578 }
 579 
 580 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
 581   assert (arg_1 == R0, "fixed register for arg_1");
 582   assert (arg_2 == R1, "fixed register for arg_2");
 583   assert (arg_3 == R2, "fixed register for arg_3");
 584   assert (arg_4 == R3, "fixed register for arg_4");
 585   call_VM_leaf_helper(entry_point, 4);
 586 }
 587 
 588 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
 589   assert_different_registers(oop_result, tmp);
 590   ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
 591   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
 592   verify_oop(oop_result);
 593 }
 594 
 595 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
 596   assert_different_registers(metadata_result, tmp);
 597   ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
 598   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
 599 }
 600 
 601 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
 602   if (arg2.is_register()) {
 603     add(dst, arg1, arg2.as_register());
 604   } else {
 605     add(dst, arg1, arg2.as_constant());
 606   }
 607 }
 608 
 609 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
 610 #ifdef AARCH64
 611   if (c == 0) {
 612     if (rd != rn) {
 613       mov(rd, rn);
 614     }
 615     return;
 616   }
 617   if (c < 0) {
 618     sub_slow(rd, rn, -c);
 619     return;
 620   }
 621   if (c > right_n_bits(24)) {
 622     guarantee(rd != rn, "no large add_slow with only one register");
 623     mov_slow(rd, c);
 624     add(rd, rn, rd);
 625   } else {
 626     int lo = c & right_n_bits(12);
 627     int hi = (c >> 12) & right_n_bits(12);
 628     if (lo != 0) {
 629       add(rd, rn, lo, lsl0);
 630     }
 631     if (hi != 0) {
 632       add(rd, (lo == 0) ? rn : rd, hi, lsl12);
 633     }
 634   }
 635 #else
 636   // This function is used in compiler for handling large frame offsets
 637   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 638     return sub(rd, rn, (-c));
 639   }
 640   int low = c & 0x3fc;
 641   if (low != 0) {
 642     add(rd, rn, low);
 643     rn = rd;
 644   }
 645   if (c & ~0x3fc) {
 646     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
 647     add(rd, rn, c & ~0x3fc);
 648   } else if (rd != rn) {
 649     assert(c == 0, "");
 650     mov(rd, rn); // need to generate at least one move!
 651   }
 652 #endif // AARCH64
 653 }
 654 
 655 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
 656 #ifdef AARCH64
 657   if (c <= 0) {
 658     add_slow(rd, rn, -c);
 659     return;
 660   }
 661   if (c > right_n_bits(24)) {
 662     guarantee(rd != rn, "no large sub_slow with only one register");
 663     mov_slow(rd, c);
 664     sub(rd, rn, rd);
 665   } else {
 666     int lo = c & right_n_bits(12);
 667     int hi = (c >> 12) & right_n_bits(12);
 668     if (lo != 0) {
 669       sub(rd, rn, lo, lsl0);
 670     }
 671     if (hi != 0) {
 672       sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
 673     }
 674   }
 675 #else
 676   // This function is used in compiler for handling large frame offsets
 677   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 678     return add(rd, rn, (-c));
 679   }
 680   int low = c & 0x3fc;
 681   if (low != 0) {
 682     sub(rd, rn, low);
 683     rn = rd;
 684   }
 685   if (c & ~0x3fc) {
 686     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
 687     sub(rd, rn, c & ~0x3fc);
 688   } else if (rd != rn) {
 689     assert(c == 0, "");
 690     mov(rd, rn); // need to generate at least one move!
 691   }
 692 #endif // AARCH64
 693 }
 694 
 695 void MacroAssembler::mov_slow(Register rd, address addr) {
 696   // do *not* call the non relocated mov_related_address
 697   mov_slow(rd, (intptr_t)addr);
 698 }
 699 
 700 void MacroAssembler::mov_slow(Register rd, const char *str) {
 701   mov_slow(rd, (intptr_t)str);
 702 }
 703 
 704 #ifdef AARCH64
 705 
 706 // Common code for mov_slow and instr_count_for_mov_slow.
 707 // Returns number of instructions of mov_slow pattern,
 708 // generating it if non-null MacroAssembler is given.
 709 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
 710   // This code pattern is matched in NativeIntruction::is_mov_slow.
 711   // Update it at modifications.
 712 
 713   const intx mask = right_n_bits(16);
 714   // 1 movz instruction
 715   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 716     if ((c & ~(mask << base_shift)) == 0) {
 717       if (masm != NULL) {
 718         masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
 719       }
 720       return 1;
 721     }
 722   }
 723   // 1 movn instruction
 724   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 725     if (((~c) & ~(mask << base_shift)) == 0) {
 726       if (masm != NULL) {
 727         masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
 728       }
 729       return 1;
 730     }
 731   }
 732   // 1 orr instruction
 733   {
 734     LogicalImmediate imm(c, false);
 735     if (imm.is_encoded()) {
 736       if (masm != NULL) {
 737         masm->orr(rd, ZR, imm);
 738       }
 739       return 1;
 740     }
 741   }
 742   // 1 movz/movn + up to 3 movk instructions
 743   int zeroes = 0;
 744   int ones = 0;
 745   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 746     int part = (c >> base_shift) & mask;
 747     if (part == 0) {
 748       ++zeroes;
 749     } else if (part == mask) {
 750       ++ones;
 751     }
 752   }
 753   int def_bits = 0;
 754   if (ones > zeroes) {
 755     def_bits = mask;
 756   }
 757   int inst_count = 0;
 758   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 759     int part = (c >> base_shift) & mask;
 760     if (part != def_bits) {
 761       if (masm != NULL) {
 762         if (inst_count > 0) {
 763           masm->movk(rd, part, base_shift);
 764         } else {
 765           if (def_bits == 0) {
 766             masm->movz(rd, part, base_shift);
 767           } else {
 768             masm->movn(rd, ~part & mask, base_shift);
 769           }
 770         }
 771       }
 772       inst_count++;
 773     }
 774   }
 775   assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
 776   return inst_count;
 777 }
 778 
 779 void MacroAssembler::mov_slow(Register rd, intptr_t c) {
 780 #ifdef ASSERT
 781   int off = offset();
 782 #endif
 783   (void) mov_slow_helper(rd, c, this);
 784   assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
 785 }
 786 
 787 // Counts instructions generated by mov_slow(rd, c).
 788 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
 789   return mov_slow_helper(noreg, c, NULL);
 790 }
 791 
 792 int MacroAssembler::instr_count_for_mov_slow(address c) {
 793   return mov_slow_helper(noreg, (intptr_t)c, NULL);
 794 }
 795 
 796 #else
 797 
 798 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
 799   if (AsmOperand::is_rotated_imm(c)) {
 800     mov(rd, c, cond);
 801   } else if (AsmOperand::is_rotated_imm(~c)) {
 802     mvn(rd, ~c, cond);
 803   } else if (VM_Version::supports_movw()) {
 804     movw(rd, c & 0xffff, cond);
 805     if ((unsigned int)c >> 16) {
 806       movt(rd, (unsigned int)c >> 16, cond);
 807     }
 808   } else {
 809     // Find first non-zero bit
 810     int shift = 0;
 811     while ((c & (3 << shift)) == 0) {
 812       shift += 2;
 813     }
 814     // Put the least significant part of the constant
 815     int mask = 0xff << shift;
 816     mov(rd, c & mask, cond);
 817     // Add up to 3 other parts of the constant;
 818     // each of them can be represented as rotated_imm
 819     if (c & (mask << 8)) {
 820       orr(rd, rd, c & (mask << 8), cond);
 821     }
 822     if (c & (mask << 16)) {
 823       orr(rd, rd, c & (mask << 16), cond);
 824     }
 825     if (c & (mask << 24)) {
 826       orr(rd, rd, c & (mask << 24), cond);
 827     }
 828   }
 829 }
 830 
 831 #endif // AARCH64
 832 
 833 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
 834 #ifdef AARCH64
 835                              bool patchable
 836 #else
 837                              AsmCondition cond
 838 #endif
 839                              ) {
 840 
 841   if (o == NULL) {
 842 #ifdef AARCH64
 843     if (patchable) {
 844       nop();
 845     }
 846     mov(rd, ZR);
 847 #else
 848     mov(rd, 0, cond);
 849 #endif
 850     return;
 851   }
 852 
 853   if (oop_index == 0) {
 854     oop_index = oop_recorder()->allocate_oop_index(o);
 855   }
 856   relocate(oop_Relocation::spec(oop_index));
 857 
 858 #ifdef AARCH64
 859   if (patchable) {
 860     nop();
 861   }
 862   ldr(rd, pc());
 863 #else
 864   if (VM_Version::supports_movw()) {
 865     movw(rd, 0, cond);
 866     movt(rd, 0, cond);
 867   } else {
 868     ldr(rd, Address(PC), cond);
 869     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
 870     nop();
 871   }
 872 #endif
 873 }
 874 
 875 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
 876   if (o == NULL) {
 877 #ifdef AARCH64
 878     if (patchable) {
 879       nop();
 880     }
 881 #endif
 882     mov(rd, 0);
 883     return;
 884   }
 885 
 886   if (metadata_index == 0) {
 887     metadata_index = oop_recorder()->allocate_metadata_index(o);
 888   }
 889   relocate(metadata_Relocation::spec(metadata_index));
 890 
 891 #ifdef AARCH64
 892   if (patchable) {
 893     nop();
 894   }
 895 #ifdef COMPILER2
 896   if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
 897     mov_slow(rd, (address)o);
 898     return;
 899   }
 900 #endif
 901   ldr(rd, pc());
 902 #else
 903   if (VM_Version::supports_movw()) {
 904     movw(rd, ((int)o) & 0xffff);
 905     movt(rd, (unsigned int)o >> 16);
 906   } else {
 907     ldr(rd, Address(PC));
 908     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
 909     nop();
 910   }
 911 #endif // AARCH64
 912 }
 913 
 914 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
 915   Label skip_constant;
 916   union {
 917     jfloat f;
 918     jint i;
 919   } accessor;
 920   accessor.f = c;
 921 
 922 #ifdef AARCH64
 923   // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
 924   Label L;
 925   ldr_s(fd, target(L));
 926   b(skip_constant);
 927   bind(L);
 928   emit_int32(accessor.i);
 929   bind(skip_constant);
 930 #else
 931   flds(fd, Address(PC), cond);
 932   b(skip_constant);
 933   emit_int32(accessor.i);
 934   bind(skip_constant);
 935 #endif // AARCH64
 936 }
 937 
 938 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
 939   Label skip_constant;
 940   union {
 941     jdouble d;
 942     jint i[2];
 943   } accessor;
 944   accessor.d = c;
 945 
 946 #ifdef AARCH64
 947   // TODO-AARCH64 - try to optimize loading of double constants with fmov
 948   Label L;
 949   ldr_d(fd, target(L));
 950   b(skip_constant);
 951   align(wordSize);
 952   bind(L);
 953   emit_int32(accessor.i[0]);
 954   emit_int32(accessor.i[1]);
 955   bind(skip_constant);
 956 #else
 957   fldd(fd, Address(PC), cond);
 958   b(skip_constant);
 959   emit_int32(accessor.i[0]);
 960   emit_int32(accessor.i[1]);
 961   bind(skip_constant);
 962 #endif // AARCH64
 963 }
 964 
 965 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
 966   intptr_t addr = (intptr_t) address_of_global;
 967 #ifdef AARCH64
 968   assert((addr & 0x3) == 0, "address should be aligned");
 969 
 970   // FIXME: TODO
 971   if (false && page_reachable_from_cache(address_of_global)) {
 972     assert(false,"TODO: relocate");
 973     //relocate();
 974     adrp(reg, address_of_global);
 975     ldrsw(reg, Address(reg, addr & 0xfff));
 976   } else {
 977     mov_slow(reg, addr & ~0x3fff);
 978     ldrsw(reg, Address(reg, addr & 0x3fff));
 979   }
 980 #else
 981   mov_slow(reg, addr & ~0xfff);
 982   ldr(reg, Address(reg, addr & 0xfff));
 983 #endif
 984 }
 985 
 986 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
 987 #ifdef AARCH64
 988   intptr_t addr = (intptr_t) address_of_global;
 989   assert ((addr & 0x7) == 0, "address should be aligned");
 990   mov_slow(reg, addr & ~0x7fff);
 991   ldr(reg, Address(reg, addr & 0x7fff));
 992 #else
 993   ldr_global_s32(reg, address_of_global);
 994 #endif
 995 }
 996 
 997 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
 998   intptr_t addr = (intptr_t) address_of_global;
 999   mov_slow(reg, addr & ~0xfff);
1000   ldrb(reg, Address(reg, addr & 0xfff));
1001 }
1002 
1003 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
1004 #ifdef AARCH64
1005   switch (bits) {
1006     case  8: uxtb(rd, rn); break;
1007     case 16: uxth(rd, rn); break;
1008     case 32: mov_w(rd, rn); break;
1009     default: ShouldNotReachHere();
1010   }
1011 #else
1012   if (bits <= 8) {
1013     andr(rd, rn, (1 << bits) - 1);
1014   } else if (bits >= 24) {
1015     bic(rd, rn, -1 << bits);
1016   } else {
1017     mov(rd, AsmOperand(rn, lsl, 32 - bits));
1018     mov(rd, AsmOperand(rd, lsr, 32 - bits));
1019   }
1020 #endif
1021 }
1022 
1023 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
1024 #ifdef AARCH64
1025   switch (bits) {
1026     case  8: sxtb(rd, rn); break;
1027     case 16: sxth(rd, rn); break;
1028     case 32: sxtw(rd, rn); break;
1029     default: ShouldNotReachHere();
1030   }
1031 #else
1032   mov(rd, AsmOperand(rn, lsl, 32 - bits));
1033   mov(rd, AsmOperand(rd, asr, 32 - bits));
1034 #endif
1035 }
1036 
1037 #ifndef AARCH64
1038 
1039 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
1040                                Register rn_lo, Register rn_hi,
1041                                AsmCondition cond) {
1042   if (rd_lo != rn_hi) {
1043     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1044     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1045   } else if (rd_hi != rn_lo) {
1046     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1047     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1048   } else {
1049     eor(rd_lo, rd_hi, rd_lo, cond);
1050     eor(rd_hi, rd_lo, rd_hi, cond);
1051     eor(rd_lo, rd_hi, rd_lo, cond);
1052   }
1053 }
1054 
1055 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1056                                 Register rn_lo, Register rn_hi,
1057                                 AsmShift shift, Register count) {
1058   Register tmp;
1059   if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
1060     tmp = rd_lo;
1061   } else {
1062     tmp = rd_hi;
1063   }
1064   assert_different_registers(tmp, count, rn_lo, rn_hi);
1065 
1066   subs(tmp, count, 32);
1067   if (shift == lsl) {
1068     assert_different_registers(rd_hi, rn_lo);
1069     assert_different_registers(count, rd_hi);
1070     mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
1071     rsb(tmp, count, 32, mi);
1072     if (rd_hi == rn_hi) {
1073       mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1074       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1075     } else {
1076       mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1077       orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1078     }
1079     mov(rd_lo, AsmOperand(rn_lo, shift, count));
1080   } else {
1081     assert_different_registers(rd_lo, rn_hi);
1082     assert_different_registers(rd_lo, count);
1083     mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
1084     rsb(tmp, count, 32, mi);
1085     if (rd_lo == rn_lo) {
1086       mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1087       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1088     } else {
1089       mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1090       orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1091     }
1092     mov(rd_hi, AsmOperand(rn_hi, shift, count));
1093   }
1094 }
1095 
1096 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1097                                 Register rn_lo, Register rn_hi,
1098                                 AsmShift shift, int count) {
1099   assert(count != 0 && (count & ~63) == 0, "must be");
1100 
1101   if (shift == lsl) {
1102     assert_different_registers(rd_hi, rn_lo);
1103     if (count >= 32) {
1104       mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
1105       mov(rd_lo, 0);
1106     } else {
1107       mov(rd_hi, AsmOperand(rn_hi, lsl, count));
1108       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
1109       mov(rd_lo, AsmOperand(rn_lo, lsl, count));
1110     }
1111   } else {
1112     assert_different_registers(rd_lo, rn_hi);
1113     if (count >= 32) {
1114       if (count == 32) {
1115         mov(rd_lo, rn_hi);
1116       } else {
1117         mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
1118       }
1119       if (shift == asr) {
1120         mov(rd_hi, AsmOperand(rn_hi, asr, 0));
1121       } else {
1122         mov(rd_hi, 0);
1123       }
1124     } else {
1125       mov(rd_lo, AsmOperand(rn_lo, lsr, count));
1126       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
1127       mov(rd_hi, AsmOperand(rn_hi, shift, count));
1128     }
1129   }
1130 }
1131 #endif // !AARCH64
1132 
1133 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
1134   // This code pattern is matched in NativeIntruction::skip_verify_oop.
1135   // Update it at modifications.
1136   if (!VerifyOops) return;
1137 
1138   char buffer[64];
1139 #ifdef COMPILER1
1140   if (CommentedAssembly) {
1141     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1142     block_comment(buffer);
1143   }
1144 #endif
1145   const char* msg_buffer = NULL;
1146   {
1147     ResourceMark rm;
1148     stringStream ss;
1149     ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
1150     msg_buffer = code_string(ss.as_string());
1151   }
1152 
1153   save_all_registers();
1154 
1155   if (reg != R2) {
1156       mov(R2, reg);                              // oop to verify
1157   }
1158   mov(R1, SP);                                   // register save area
1159 
1160   Label done;
1161   InlinedString Lmsg(msg_buffer);
1162   ldr_literal(R0, Lmsg);                         // message
1163 
1164   // call indirectly to solve generation ordering problem
1165   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1166   call(Rtemp);
1167 
1168   restore_all_registers();
1169 
1170   b(done);
1171 #ifdef COMPILER2
1172   int off = offset();
1173 #endif
1174   bind_literal(Lmsg);
1175 #ifdef COMPILER2
1176   if (offset() - off == 1 * wordSize) {
1177     // no padding, so insert nop for worst-case sizing
1178     nop();
1179   }
1180 #endif
1181   bind(done);
1182 }
1183 
1184 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
1185   if (!VerifyOops) return;
1186 
1187   const char* msg_buffer = NULL;
1188   {
1189     ResourceMark rm;
1190     stringStream ss;
1191     if ((addr.base() == SP) && (addr.index()==noreg)) {
1192       ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
1193     } else {
1194       ss.print("verify_oop_addr: %s", s);
1195     }
1196     ss.print(" (%s:%d)", file, line);
1197     msg_buffer = code_string(ss.as_string());
1198   }
1199 
1200   int push_size = save_all_registers();
1201 
1202   if (addr.base() == SP) {
1203     // computes an addr that takes into account the push
1204     if (addr.index() != noreg) {
1205       Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
1206       add(new_base, SP, push_size);
1207       addr = addr.rebase(new_base);
1208     } else {
1209       addr = addr.plus_disp(push_size);
1210     }
1211   }
1212 
1213   ldr(R2, addr);                                 // oop to verify
1214   mov(R1, SP);                                   // register save area
1215 
1216   Label done;
1217   InlinedString Lmsg(msg_buffer);
1218   ldr_literal(R0, Lmsg);                         // message
1219 
1220   // call indirectly to solve generation ordering problem
1221   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1222   call(Rtemp);
1223 
1224   restore_all_registers();
1225 
1226   b(done);
1227   bind_literal(Lmsg);
1228   bind(done);
1229 }
1230 
1231 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
1232   if (needs_explicit_null_check(offset)) {
1233 #ifdef AARCH64
1234     ldr(ZR, Address(reg));
1235 #else
1236     assert_different_registers(reg, tmp);
1237     if (tmp == noreg) {
1238       tmp = Rtemp;
1239       assert((! Thread::current()->is_Compiler_thread()) ||
1240              (! (ciEnv::current()->task() == NULL)) ||
1241              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1242              "Rtemp not available in C2"); // explicit tmp register required
1243       // XXX: could we mark the code buffer as not compatible with C2 ?
1244     }
1245     ldr(tmp, Address(reg));
1246 #endif
1247   }
1248 }
1249 
1250 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1251 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1252                                  RegisterOrConstant size_expression, Label& slow_case) {
1253   if (!Universe::heap()->supports_inline_contig_alloc()) {
1254     b(slow_case);
1255     return;
1256   }
1257 
1258   CollectedHeap* ch = Universe::heap();
1259 
1260   const Register top_addr = tmp1;
1261   const Register heap_end = tmp2;
1262 
1263   if (size_expression.is_register()) {
1264     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
1265   } else {
1266     assert_different_registers(obj, obj_end, top_addr, heap_end);
1267   }
1268 
1269   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
1270   if (load_const) {
1271     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
1272   } else {
1273     ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
1274   }
1275   // Calculate new heap_top by adding the size of the object
1276   Label retry;
1277   bind(retry);
1278 
1279 #ifdef AARCH64
1280   ldxr(obj, top_addr);
1281 #else
1282   ldr(obj, Address(top_addr));
1283 #endif // AARCH64
1284 
1285   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
1286   add_rc(obj_end, obj, size_expression);
1287   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
1288   cmp(obj_end, obj);
1289   b(slow_case, lo);
1290   // Update heap_top if allocation succeeded
1291   cmp(obj_end, heap_end);
1292   b(slow_case, hi);
1293 
1294 #ifdef AARCH64
1295   stxr(heap_end/*scratched*/, obj_end, top_addr);
1296   cbnz_w(heap_end, retry);
1297 #else
1298   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
1299   b(retry, ne);
1300 #endif // AARCH64
1301 }
1302 
1303 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1304 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1305                                  RegisterOrConstant size_expression, Label& slow_case) {
1306   const Register tlab_end = tmp1;
1307   assert_different_registers(obj, obj_end, tlab_end);
1308 
1309   ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1310   ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
1311   add_rc(obj_end, obj, size_expression);
1312   cmp(obj_end, tlab_end);
1313   b(slow_case, hi);
1314   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1315 }
1316 
1317 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
1318 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1319   Label loop;
1320   const Register ptr = start;
1321 
1322 #ifdef AARCH64
1323   // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1324   const Register size = tmp;
1325   Label remaining, done;
1326 
1327   sub(size, end, start);
1328 
1329 #ifdef ASSERT
1330   { Label L;
1331     tst(size, wordSize - 1);
1332     b(L, eq);
1333     stop("size is not a multiple of wordSize");
1334     bind(L);
1335   }
1336 #endif // ASSERT
1337 
1338   subs(size, size, wordSize);
1339   b(remaining, le);
1340 
1341   // Zero by 2 words per iteration.
1342   bind(loop);
1343   subs(size, size, 2*wordSize);
1344   stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1345   b(loop, gt);
1346 
1347   bind(remaining);
1348   b(done, ne);
1349   str(ZR, Address(ptr));
1350   bind(done);
1351 #else
1352   mov(tmp, 0);
1353   bind(loop);
1354   cmp(ptr, end);
1355   str(tmp, Address(ptr, wordSize, post_indexed), lo);
1356   b(loop, lo);
1357 #endif // AARCH64
1358 }
1359 
1360 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1361 #ifdef AARCH64
1362   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1363   add_rc(tmp, tmp, size_in_bytes);
1364   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1365 #else
1366   // Bump total bytes allocated by this thread
1367   Label done;
1368 
1369   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1370   adds(tmp, tmp, size_in_bytes);
1371   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc);
1372   b(done, cc);
1373 
1374   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1375   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1376   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1377   Register low, high;
1378   // Select ether R0/R1 or R2/R3
1379 
1380   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1381     low = R2;
1382     high  = R3;
1383   } else {
1384     low = R0;
1385     high  = R1;
1386   }
1387   push(RegisterSet(low, high));
1388 
1389   ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1390   adds(low, low, size_in_bytes);
1391   adc(high, high, 0);
1392   strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1393 
1394   pop(RegisterSet(low, high));
1395 
1396   bind(done);
1397 #endif // AARCH64
1398 }
1399 
1400 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1401   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1402   if (UseStackBanging) {
1403     const int page_size = os::vm_page_size();
1404 
1405     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1406     strb(R0, Address(tmp));
1407 #ifdef AARCH64
1408     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1409       sub(tmp, tmp, page_size);
1410       strb(R0, Address(tmp));
1411     }
1412 #else
1413     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1414       strb(R0, Address(tmp, -0xff0, pre_indexed));
1415     }
1416 #endif // AARCH64
1417   }
1418 }
1419 
1420 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
1421   if (UseStackBanging) {
1422     Label loop;
1423 
1424     mov(tmp, SP);
1425     add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
1426 #ifdef AARCH64
1427     sub(tmp, tmp, Rsize);
1428     bind(loop);
1429     subs(Rsize, Rsize, os::vm_page_size());
1430     strb(ZR, Address(tmp, Rsize));
1431 #else
1432     bind(loop);
1433     subs(Rsize, Rsize, 0xff0);
1434     strb(R0, Address(tmp, -0xff0, pre_indexed));
1435 #endif // AARCH64
1436     b(loop, hi);
1437   }
1438 }
1439 
1440 void MacroAssembler::stop(const char* msg) {
1441   // This code pattern is matched in NativeIntruction::is_stop.
1442   // Update it at modifications.
1443 #ifdef COMPILER1
1444   if (CommentedAssembly) {
1445     block_comment("stop");
1446   }
1447 #endif
1448 
1449   InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1450   InlinedString Lmsg(msg);
1451 
1452   // save all registers for further inspection
1453   save_all_registers();
1454 
1455   ldr_literal(R0, Lmsg);                     // message
1456   mov(R1, SP);                               // register save area
1457 
1458 #ifdef AARCH64
1459   ldr_literal(Rtemp, Ldebug);
1460   br(Rtemp);
1461 #else
1462   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
1463 #endif // AARCH64
1464 
1465 #if defined(COMPILER2) && defined(AARCH64)
1466   int off = offset();
1467 #endif
1468   bind_literal(Lmsg);
1469   bind_literal(Ldebug);
1470 #if defined(COMPILER2) && defined(AARCH64)
1471   if (offset() - off == 2 * wordSize) {
1472     // no padding, so insert nop for worst-case sizing
1473     nop();
1474   }
1475 #endif
1476 }
1477 
1478 void MacroAssembler::warn(const char* msg) {
1479 #ifdef COMPILER1
1480   if (CommentedAssembly) {
1481     block_comment("warn");
1482   }
1483 #endif
1484 
1485   InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1486   InlinedString Lmsg(msg);
1487   Label done;
1488 
1489   int push_size = save_caller_save_registers();
1490 
1491 #ifdef AARCH64
1492   // TODO-AARCH64 - get rid of extra debug parameters
1493   mov(R1, LR);
1494   mov(R2, FP);
1495   add(R3, SP, push_size);
1496 #endif
1497 
1498   ldr_literal(R0, Lmsg);                    // message
1499   ldr_literal(LR, Lwarn);                   // call warning
1500 
1501   call(LR);
1502 
1503   restore_caller_save_registers();
1504 
1505   b(done);
1506   bind_literal(Lmsg);
1507   bind_literal(Lwarn);
1508   bind(done);
1509 }
1510 
1511 
1512 int MacroAssembler::save_all_registers() {
1513   // This code pattern is matched in NativeIntruction::is_save_all_registers.
1514   // Update it at modifications.
1515 #ifdef AARCH64
1516   const Register tmp = Rtemp;
1517   raw_push(R30, ZR);
1518   for (int i = 28; i >= 0; i -= 2) {
1519       raw_push(as_Register(i), as_Register(i+1));
1520   }
1521   mov_pc_to(tmp);
1522   str(tmp, Address(SP, 31*wordSize));
1523   ldr(tmp, Address(SP, tmp->encoding()*wordSize));
1524   return 32*wordSize;
1525 #else
1526   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1527   return 15*wordSize;
1528 #endif // AARCH64
1529 }
1530 
1531 void MacroAssembler::restore_all_registers() {
1532 #ifdef AARCH64
1533   for (int i = 0; i <= 28; i += 2) {
1534     raw_pop(as_Register(i), as_Register(i+1));
1535   }
1536   raw_pop(R30, ZR);
1537 #else
1538   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
1539   add(SP, SP, wordSize);                         // discard saved PC
1540 #endif // AARCH64
1541 }
1542 
1543 int MacroAssembler::save_caller_save_registers() {
1544 #ifdef AARCH64
1545   for (int i = 0; i <= 16; i += 2) {
1546     raw_push(as_Register(i), as_Register(i+1));
1547   }
1548   raw_push(R18, LR);
1549   return 20*wordSize;
1550 #else
1551 #if R9_IS_SCRATCHED
1552   // Save also R10 to preserve alignment
1553   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1554   return 8*wordSize;
1555 #else
1556   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1557   return 6*wordSize;
1558 #endif
1559 #endif // AARCH64
1560 }
1561 
1562 void MacroAssembler::restore_caller_save_registers() {
1563 #ifdef AARCH64
1564   raw_pop(R18, LR);
1565   for (int i = 16; i >= 0; i -= 2) {
1566     raw_pop(as_Register(i), as_Register(i+1));
1567   }
1568 #else
1569 #if R9_IS_SCRATCHED
1570   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1571 #else
1572   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1573 #endif
1574 #endif // AARCH64
1575 }
1576 
1577 void MacroAssembler::debug(const char* msg, const intx* registers) {
1578   // In order to get locks to work, we need to fake a in_VM state
1579   JavaThread* thread = JavaThread::current();
1580   thread->set_thread_state(_thread_in_vm);
1581 
1582   if (ShowMessageBoxOnError) {
1583     ttyLocker ttyl;
1584     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1585       BytecodeCounter::print();
1586     }
1587     if (os::message_box(msg, "Execution stopped, print registers?")) {
1588 #ifdef AARCH64
1589       // saved registers: R0-R30, PC
1590       const int nregs = 32;
1591 #else
1592       // saved registers: R0-R12, LR, PC
1593       const int nregs = 15;
1594       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1595 #endif // AARCH64
1596 
1597       for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
1598         tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
1599       }
1600 
1601 #ifdef AARCH64
1602       tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
1603 #endif // AARCH64
1604 
1605       // derive original SP value from the address of register save area
1606       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
1607     }
1608     BREAKPOINT;
1609   } else {
1610     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1611   }
1612   assert(false, "DEBUG MESSAGE: %s", msg);
1613   fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1614 }
1615 
1616 void MacroAssembler::unimplemented(const char* what) {
1617   const char* buf = NULL;
1618   {
1619     ResourceMark rm;
1620     stringStream ss;
1621     ss.print("unimplemented: %s", what);
1622     buf = code_string(ss.as_string());
1623   }
1624   stop(buf);
1625 }
1626 
1627 
1628 // Implementation of FixedSizeCodeBlock
1629 
1630 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1631 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1632 }
1633 
1634 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1635   if (_enabled) {
1636     address curr_pc = _masm->pc();
1637 
1638     assert(_start < curr_pc, "invalid current pc");
1639     guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1640 
1641     int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1642     for (int i = 0; i < nops_count; i++) {
1643       _masm->nop();
1644     }
1645   }
1646 }
1647 
1648 #ifdef AARCH64
1649 
1650 // Serializes memory.
1651 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
1652 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
1653   if (!os::is_MP()) return;
1654 
1655   // TODO-AARCH64 investigate dsb vs dmb effects
1656   if (order_constraint == StoreStore) {
1657     dmb(DMB_st);
1658   } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
1659     dmb(DMB_ld);
1660   } else {
1661     dmb(DMB_all);
1662   }
1663 }
1664 
1665 #else
1666 
1667 // Serializes memory. Potentially blows flags and reg.
1668 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
1669 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1670 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
1671 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1672                             Register tmp,
1673                             bool preserve_flags,
1674                             Register load_tgt) {
1675   if (!os::is_MP()) return;
1676 
1677   if (order_constraint == StoreStore) {
1678     dmb(DMB_st, tmp);
1679   } else if ((order_constraint & StoreLoad)  ||
1680              (order_constraint & LoadLoad)   ||
1681              (order_constraint & StoreStore) ||
1682              (load_tgt == noreg)             ||
1683              preserve_flags) {
1684     dmb(DMB_all, tmp);
1685   } else {
1686     // LoadStore: speculative stores reordeing is prohibited
1687 
1688     // By providing an ordered load target register, we avoid an extra memory load reference
1689     Label not_taken;
1690     bind(not_taken);
1691     cmp(load_tgt, load_tgt);
1692     b(not_taken, ne);
1693   }
1694 }
1695 
1696 #endif // AARCH64
1697 
1698 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1699 // on failure, so fall-through can only mean success.
1700 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1701 // This is only needed for C2, which for some reason does not rety,
1702 // while C1/interpreter does.
1703 // TODO: measure if it makes a difference
1704 
1705 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1706   Register base, Register tmp, Label &slow_case,
1707   bool allow_fallthrough_on_failure, bool one_shot)
1708 {
1709 
1710   bool fallthrough_is_success = false;
1711 
1712   // ARM Litmus Test example does prefetching here.
1713   // TODO: investigate if it helps performance
1714 
1715   // The last store was to the displaced header, so to prevent
1716   // reordering we must issue a StoreStore or Release barrier before
1717   // the CAS store.
1718 
1719 #ifdef AARCH64
1720 
1721   Register Rscratch = tmp;
1722   Register Roop = base;
1723   Register mark = oldval;
1724   Register Rbox = newval;
1725   Label loop;
1726 
1727   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1728 
1729   // Instead of StoreStore here, we use store-release-exclusive below
1730 
1731   bind(loop);
1732 
1733   ldaxr(tmp, base);  // acquire
1734   cmp(tmp, oldval);
1735   b(slow_case, ne);
1736   stlxr(tmp, newval, base); // release
1737   if (one_shot) {
1738     cmp_w(tmp, 0);
1739   } else {
1740     cbnz_w(tmp, loop);
1741     fallthrough_is_success = true;
1742   }
1743 
1744   // MemBarAcquireLock would normally go here, but
1745   // we already do ldaxr+stlxr above, which has
1746   // Sequential Consistency
1747 
1748 #else
1749   membar(MacroAssembler::StoreStore, noreg);
1750 
1751   if (one_shot) {
1752     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1753     cmp(tmp, oldval);
1754     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1755     cmp(tmp, 0, eq);
1756   } else {
1757     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1758   }
1759 
1760   // MemBarAcquireLock barrier
1761   // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1762   // but that doesn't prevent a load or store from floating up between
1763   // the load and store in the CAS sequence, so play it safe and
1764   // do a full fence.
1765   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1766 #endif
1767   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1768     b(slow_case, ne);
1769   }
1770 }
1771 
1772 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1773   Register base, Register tmp, Label &slow_case,
1774   bool allow_fallthrough_on_failure, bool one_shot)
1775 {
1776 
1777   bool fallthrough_is_success = false;
1778 
1779   assert_different_registers(oldval,newval,base,tmp);
1780 
1781 #ifdef AARCH64
1782   Label loop;
1783 
1784   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1785 
1786   bind(loop);
1787   ldxr(tmp, base);
1788   cmp(tmp, oldval);
1789   b(slow_case, ne);
1790   // MemBarReleaseLock barrier
1791   stlxr(tmp, newval, base);
1792   if (one_shot) {
1793     cmp_w(tmp, 0);
1794   } else {
1795     cbnz_w(tmp, loop);
1796     fallthrough_is_success = true;
1797   }
1798 #else
1799   // MemBarReleaseLock barrier
1800   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1801   // but that doesn't prevent a load or store from floating down between
1802   // the load and store in the CAS sequence, so play it safe and
1803   // do a full fence.
1804   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1805 
1806   if (one_shot) {
1807     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1808     cmp(tmp, oldval);
1809     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1810     cmp(tmp, 0, eq);
1811   } else {
1812     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1813   }
1814 #endif
1815   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1816     b(slow_case, ne);
1817   }
1818 
1819   // ExitEnter
1820   // According to JSR-133 Cookbook, this should be StoreLoad, the same
1821   // barrier that follows volatile store.
1822   // TODO: Should be able to remove on armv8 if volatile loads
1823   // use the load-acquire instruction.
1824   membar(StoreLoad, noreg);
1825 }
1826 
1827 #ifndef PRODUCT
1828 
1829 // Preserves flags and all registers.
1830 // On SMP the updated value might not be visible to external observers without a sychronization barrier
1831 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1832   if (counter_addr != NULL) {
1833     InlinedAddress counter_addr_literal((address)counter_addr);
1834     Label done, retry;
1835     if (cond != al) {
1836       b(done, inverse(cond));
1837     }
1838 
1839 #ifdef AARCH64
1840     raw_push(R0, R1);
1841     raw_push(R2, ZR);
1842 
1843     ldr_literal(R0, counter_addr_literal);
1844 
1845     bind(retry);
1846     ldxr_w(R1, R0);
1847     add_w(R1, R1, 1);
1848     stxr_w(R2, R1, R0);
1849     cbnz_w(R2, retry);
1850 
1851     raw_pop(R2, ZR);
1852     raw_pop(R0, R1);
1853 #else
1854     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1855     ldr_literal(R0, counter_addr_literal);
1856 
1857     mrs(CPSR, Rtemp);
1858 
1859     bind(retry);
1860     ldr_s32(R1, Address(R0));
1861     add(R2, R1, 1);
1862     atomic_cas_bool(R1, R2, R0, 0, R3);
1863     b(retry, ne);
1864 
1865     msr(CPSR_fsxc, Rtemp);
1866 
1867     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1868 #endif // AARCH64
1869 
1870     b(done);
1871     bind_literal(counter_addr_literal);
1872 
1873     bind(done);
1874   }
1875 }
1876 
1877 #endif // !PRODUCT
1878 
1879 
1880 // Building block for CAS cases of biased locking: makes CAS and records statistics.
1881 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
1882 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
1883                                                  Register tmp, Label& slow_case, int* counter_addr) {
1884 
1885   cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
1886 #ifdef ASSERT
1887   breakpoint(ne); // Fallthrough only on success
1888 #endif
1889 #ifndef PRODUCT
1890   if (counter_addr != NULL) {
1891     cond_atomic_inc32(al, counter_addr);
1892   }
1893 #endif // !PRODUCT
1894 }
1895 
1896 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
1897                                          bool swap_reg_contains_mark,
1898                                          Register tmp2,
1899                                          Label& done, Label& slow_case,
1900                                          BiasedLockingCounters* counters) {
1901   // obj_reg must be preserved (at least) if the bias locking fails
1902   // tmp_reg is a temporary register
1903   // swap_reg was used as a temporary but contained a value
1904   //   that was used afterwards in some call pathes. Callers
1905   //   have been fixed so that swap_reg no longer needs to be
1906   //   saved.
1907   // Rtemp in no longer scratched
1908 
1909   assert(UseBiasedLocking, "why call this otherwise?");
1910   assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
1911   guarantee(swap_reg!=tmp_reg, "invariant");
1912   assert(tmp_reg != noreg, "must supply tmp_reg");
1913 
1914 #ifndef PRODUCT
1915   if (PrintBiasedLockingStatistics && (counters == NULL)) {
1916     counters = BiasedLocking::counters();
1917   }
1918 #endif
1919 
1920   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1921   Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
1922 
1923   // Biased locking
1924   // See whether the lock is currently biased toward our thread and
1925   // whether the epoch is still valid
1926   // Note that the runtime guarantees sufficient alignment of JavaThread
1927   // pointers to allow age to be placed into low bits
1928   // First check to see whether biasing is even enabled for this object
1929   Label cas_label;
1930 
1931   // The null check applies to the mark loading, if we need to load it.
1932   // If the mark has already been loaded in swap_reg then it has already
1933   // been performed and the offset is irrelevant.
1934   int null_check_offset = offset();
1935   if (!swap_reg_contains_mark) {
1936     ldr(swap_reg, mark_addr);
1937   }
1938 
1939   // On MP platform loads could return 'stale' values in some cases.
1940   // That is acceptable since either CAS or slow case path is taken in the worst case.
1941 
1942   andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1943   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
1944 
1945   b(cas_label, ne);
1946 
1947   // The bias pattern is present in the object's header. Need to check
1948   // whether the bias owner and the epoch are both still current.
1949   load_klass(tmp_reg, obj_reg);
1950   ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
1951   orr(tmp_reg, tmp_reg, Rthread);
1952   eor(tmp_reg, tmp_reg, swap_reg);
1953 
1954 #ifdef AARCH64
1955   ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
1956 #else
1957   bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
1958 #endif // AARCH64
1959 
1960 #ifndef PRODUCT
1961   if (counters != NULL) {
1962     cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
1963   }
1964 #endif // !PRODUCT
1965 
1966   b(done, eq);
1967 
1968   Label try_revoke_bias;
1969   Label try_rebias;
1970 
1971   // At this point we know that the header has the bias pattern and
1972   // that we are not the bias owner in the current epoch. We need to
1973   // figure out more details about the state of the header in order to
1974   // know what operations can be legally performed on the object's
1975   // header.
1976 
1977   // If the low three bits in the xor result aren't clear, that means
1978   // the prototype header is no longer biased and we have to revoke
1979   // the bias on this object.
1980   tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1981   b(try_revoke_bias, ne);
1982 
1983   // Biasing is still enabled for this data type. See whether the
1984   // epoch of the current bias is still valid, meaning that the epoch
1985   // bits of the mark word are equal to the epoch bits of the
1986   // prototype header. (Note that the prototype header's epoch bits
1987   // only change at a safepoint.) If not, attempt to rebias the object
1988   // toward the current thread. Note that we must be absolutely sure
1989   // that the current epoch is invalid in order to do this because
1990   // otherwise the manipulations it performs on the mark word are
1991   // illegal.
1992   tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
1993   b(try_rebias, ne);
1994 
1995   // tmp_reg has the age, epoch and pattern bits cleared
1996   // The remaining (owner) bits are (Thread ^ current_owner)
1997 
1998   // The epoch of the current bias is still valid but we know nothing
1999   // about the owner; it might be set or it might be clear. Try to
2000   // acquire the bias of the object using an atomic operation. If this
2001   // fails we will go in to the runtime to revoke the object's bias.
2002   // Note that we first construct the presumed unbiased header so we
2003   // don't accidentally blow away another thread's valid bias.
2004 
2005   // Note that we know the owner is not ourself. Hence, success can
2006   // only happen when the owner bits is 0
2007 
2008 #ifdef AARCH64
2009   // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
2010   // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
2011   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2012   andr(swap_reg, swap_reg, tmp2);
2013 #else
2014   // until the assembler can be made smarter, we need to make some assumptions about the values
2015   // so we can optimize this:
2016   assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
2017 
2018   mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
2019   mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
2020 #endif // AARCH64
2021 
2022   orr(tmp_reg, swap_reg, Rthread); // new mark
2023 
2024   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2025         (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
2026 
2027   // If the biasing toward our thread failed, this means that
2028   // another thread succeeded in biasing it toward itself and we
2029   // need to revoke that bias. The revocation will occur in the
2030   // interpreter runtime in the slow case.
2031 
2032   b(done);
2033 
2034   bind(try_rebias);
2035 
2036   // At this point we know the epoch has expired, meaning that the
2037   // current "bias owner", if any, is actually invalid. Under these
2038   // circumstances _only_, we are allowed to use the current header's
2039   // value as the comparison value when doing the cas to acquire the
2040   // bias in the current epoch. In other words, we allow transfer of
2041   // the bias from one thread to another directly in this situation.
2042 
2043   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2044 
2045   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2046 
2047   // owner bits 'random'. Set them to Rthread.
2048 #ifdef AARCH64
2049   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2050   andr(tmp_reg, tmp_reg, tmp2);
2051 #else
2052   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2053   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2054 #endif // AARCH64
2055 
2056   orr(tmp_reg, tmp_reg, Rthread); // new mark
2057 
2058   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2059         (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
2060 
2061   // If the biasing toward our thread failed, then another thread
2062   // succeeded in biasing it toward itself and we need to revoke that
2063   // bias. The revocation will occur in the runtime in the slow case.
2064 
2065   b(done);
2066 
2067   bind(try_revoke_bias);
2068 
2069   // The prototype mark in the klass doesn't have the bias bit set any
2070   // more, indicating that objects of this data type are not supposed
2071   // to be biased any more. We are going to try to reset the mark of
2072   // this object to the prototype value and fall through to the
2073   // CAS-based locking scheme. Note that if our CAS fails, it means
2074   // that another thread raced us for the privilege of revoking the
2075   // bias of this particular object, so it's okay to continue in the
2076   // normal locking code.
2077 
2078   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2079 
2080   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2081 
2082   // owner bits 'random'. Clear them
2083 #ifdef AARCH64
2084   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2085   andr(tmp_reg, tmp_reg, tmp2);
2086 #else
2087   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2088   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2089 #endif // AARCH64
2090 
2091   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
2092         (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
2093 
2094   // Fall through to the normal CAS-based lock, because no matter what
2095   // the result of the above CAS, some thread must have succeeded in
2096   // removing the bias bit from the object's header.
2097 
2098   bind(cas_label);
2099 
2100   return null_check_offset;
2101 }
2102 
2103 
2104 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
2105   assert(UseBiasedLocking, "why call this otherwise?");
2106 
2107   // Check for biased locking unlock case, which is a no-op
2108   // Note: we do not have to check the thread ID for two reasons.
2109   // First, the interpreter checks for IllegalMonitorStateException at
2110   // a higher level. Second, if the bias was revoked while we held the
2111   // lock, the object could not be rebiased toward another thread, so
2112   // the bias bit would be clear.
2113   ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2114 
2115   andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2116   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
2117   b(done, eq);
2118 }
2119 
2120 
2121 void MacroAssembler::resolve_jobject(Register value,
2122                                      Register tmp1,
2123                                      Register tmp2) {
2124   assert_different_registers(value, tmp1, tmp2);
2125   Label done, not_weak;
2126   cbz(value, done);             // Use NULL as-is.
2127   STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
2128   tbz(value, 0, not_weak);      // Test for jweak tag.
2129 
2130   // Resolve jweak.
2131   access_load_at(T_OBJECT, IN_ROOT | ON_PHANTOM_OOP_REF,
2132                  Address(value, -JNIHandles::weak_tag_value), value, tmp1, tmp2, noreg);
2133   b(done);
2134   bind(not_weak);
2135   // Resolve (untagged) jobject.
2136   access_load_at(T_OBJECT, IN_ROOT | IN_CONCURRENT_ROOT,
2137                  Address(value, 0), value, tmp1, tmp2, noreg);
2138   verify_oop(value);
2139   bind(done);
2140 }
2141 
2142 
2143 //////////////////////////////////////////////////////////////////////////////////
2144 
2145 #ifdef AARCH64
2146 
2147 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
2148   switch (size_in_bytes) {
2149     case  8: ldr(dst, src); break;
2150     case  4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
2151     case  2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
2152     case  1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
2153     default: ShouldNotReachHere();
2154   }
2155 }
2156 
2157 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2158   switch (size_in_bytes) {
2159     case  8: str(src, dst);    break;
2160     case  4: str_32(src, dst); break;
2161     case  2: strh(src, dst);   break;
2162     case  1: strb(src, dst);   break;
2163     default: ShouldNotReachHere();
2164   }
2165 }
2166 
2167 #else
2168 
2169 void MacroAssembler::load_sized_value(Register dst, Address src,
2170                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
2171   switch (size_in_bytes) {
2172     case  4: ldr(dst, src, cond); break;
2173     case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
2174     case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
2175     default: ShouldNotReachHere();
2176   }
2177 }
2178 
2179 
2180 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
2181   switch (size_in_bytes) {
2182     case  4: str(src, dst, cond); break;
2183     case  2: strh(src, dst, cond);   break;
2184     case  1: strb(src, dst, cond);   break;
2185     default: ShouldNotReachHere();
2186   }
2187 }
2188 #endif // AARCH64
2189 
2190 // Look up the method for a megamorphic invokeinterface call.
2191 // The target method is determined by <Rinterf, Rindex>.
2192 // The receiver klass is in Rklass.
2193 // On success, the result will be in method_result, and execution falls through.
2194 // On failure, execution transfers to the given label.
2195 void MacroAssembler::lookup_interface_method(Register Rklass,
2196                                              Register Rintf,
2197                                              RegisterOrConstant itable_index,
2198                                              Register method_result,
2199                                              Register Rscan,
2200                                              Register Rtmp,
2201                                              Label& L_no_such_interface) {
2202 
2203   assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
2204 
2205   const int entry_size = itableOffsetEntry::size() * HeapWordSize;
2206   assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience");
2207 
2208   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2209   const int base = in_bytes(Klass::vtable_start_offset());
2210   const int scale = exact_log2(vtableEntry::size_in_bytes());
2211   ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
2212   add(Rscan, Rklass, base);
2213   add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
2214 
2215   // Search through the itable for an interface equal to incoming Rintf
2216   // itable looks like [intface][offset][intface][offset][intface][offset]
2217 
2218   Label loop;
2219   bind(loop);
2220   ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
2221 #ifdef AARCH64
2222   Label found;
2223   cmp(Rtmp, Rintf);
2224   b(found, eq);
2225   cbnz(Rtmp, loop);
2226 #else
2227   cmp(Rtmp, Rintf);  // set ZF and CF if interface is found
2228   cmn(Rtmp, 0, ne);  // check if tmp == 0 and clear CF if it is
2229   b(loop, ne);
2230 #endif // AARCH64
2231 
2232 #ifdef AARCH64
2233   b(L_no_such_interface);
2234   bind(found);
2235 #else
2236   // CF == 0 means we reached the end of itable without finding icklass
2237   b(L_no_such_interface, cc);
2238 #endif // !AARCH64
2239 
2240   if (method_result != noreg) {
2241     // Interface found at previous position of Rscan, now load the method
2242     ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size));
2243     if (itable_index.is_register()) {
2244       add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
2245       assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
2246       assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
2247       ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
2248     } else {
2249       int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
2250                           itableMethodEntry::method_offset_in_bytes();
2251       add_slow(method_result, Rklass, method_offset);
2252       ldr(method_result, Address(method_result, Rtmp));
2253     }
2254   }
2255 }
2256 
2257 #ifdef COMPILER2
2258 // TODO: 8 bytes at a time? pre-fetch?
2259 // Compare char[] arrays aligned to 4 bytes.
2260 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
2261                                         Register limit, Register result,
2262                                       Register chr1, Register chr2, Label& Ldone) {
2263   Label Lvector, Lloop;
2264 
2265   // Note: limit contains number of bytes (2*char_elements) != 0.
2266   tst(limit, 0x2); // trailing character ?
2267   b(Lvector, eq);
2268 
2269   // compare the trailing char
2270   sub(limit, limit, sizeof(jchar));
2271   ldrh(chr1, Address(ary1, limit));
2272   ldrh(chr2, Address(ary2, limit));
2273   cmp(chr1, chr2);
2274   mov(result, 0, ne);     // not equal
2275   b(Ldone, ne);
2276 
2277   // only one char ?
2278   tst(limit, limit);
2279   mov(result, 1, eq);
2280   b(Ldone, eq);
2281 
2282   // word by word compare, dont't need alignment check
2283   bind(Lvector);
2284 
2285   // Shift ary1 and ary2 to the end of the arrays, negate limit
2286   add(ary1, limit, ary1);
2287   add(ary2, limit, ary2);
2288   neg(limit, limit);
2289 
2290   bind(Lloop);
2291   ldr_u32(chr1, Address(ary1, limit));
2292   ldr_u32(chr2, Address(ary2, limit));
2293   cmp_32(chr1, chr2);
2294   mov(result, 0, ne);     // not equal
2295   b(Ldone, ne);
2296   adds(limit, limit, 2*sizeof(jchar));
2297   b(Lloop, ne);
2298 
2299   // Caller should set it:
2300   // mov(result_reg, 1);  //equal
2301 }
2302 #endif
2303 
2304 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
2305   mov_slow(tmpreg1, counter_addr);
2306   ldr_s32(tmpreg2, tmpreg1);
2307   add_32(tmpreg2, tmpreg2, 1);
2308   str_32(tmpreg2, tmpreg1);
2309 }
2310 
2311 void MacroAssembler::floating_cmp(Register dst) {
2312 #ifdef AARCH64
2313   NOT_TESTED();
2314   cset(dst, gt);            // 1 if '>', else 0
2315   csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
2316 #else
2317   vmrs(dst, FPSCR);
2318   orr(dst, dst, 0x08000000);
2319   eor(dst, dst, AsmOperand(dst, lsl, 3));
2320   mov(dst, AsmOperand(dst, asr, 30));
2321 #endif
2322 }
2323 
2324 void MacroAssembler::restore_default_fp_mode() {
2325 #ifdef AARCH64
2326   msr(SysReg_FPCR, ZR);
2327 #else
2328 #ifndef __SOFTFP__
2329   // Round to Near mode, IEEE compatible, masked exceptions
2330   mov(Rtemp, 0);
2331   vmsr(FPSCR, Rtemp);
2332 #endif // !__SOFTFP__
2333 #endif // AARCH64
2334 }
2335 
2336 #ifndef AARCH64
2337 // 24-bit word range == 26-bit byte range
2338 bool check26(int offset) {
2339   // this could be simplified, but it mimics encoding and decoding
2340   // an actual branch insrtuction
2341   int off1 = offset << 6 >> 8;
2342   int encoded = off1 & ((1<<24)-1);
2343   int decoded = encoded << 8 >> 6;
2344   return offset == decoded;
2345 }
2346 #endif // !AARCH64
2347 
2348 // Perform some slight adjustments so the default 32MB code cache
2349 // is fully reachable.
2350 static inline address first_cache_address() {
2351   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
2352 }
2353 static inline address last_cache_address() {
2354   return CodeCache::high_bound() - Assembler::InstructionSize;
2355 }
2356 
2357 #ifdef AARCH64
2358 // Can we reach target using ADRP?
2359 bool MacroAssembler::page_reachable_from_cache(address target) {
2360   intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
2361   intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
2362   intptr_t addr = (intptr_t)target & ~0xfff;
2363 
2364   intptr_t loffset = addr - cl;
2365   intptr_t hoffset = addr - ch;
2366   return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
2367 }
2368 #endif
2369 
2370 // Can we reach target using unconditional branch or call from anywhere
2371 // in the code cache (because code can be relocated)?
2372 bool MacroAssembler::_reachable_from_cache(address target) {
2373 #ifdef __thumb__
2374   if ((1 & (intptr_t)target) != 0) {
2375     // Return false to avoid 'b' if we need switching to THUMB mode.
2376     return false;
2377   }
2378 #endif
2379 
2380   address cl = first_cache_address();
2381   address ch = last_cache_address();
2382 
2383   if (ForceUnreachable) {
2384     // Only addresses from CodeCache can be treated as reachable.
2385     if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
2386       return false;
2387     }
2388   }
2389 
2390   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
2391   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
2392 
2393 #ifdef AARCH64
2394   return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
2395 #else
2396   return check26(loffset - 8) && check26(hoffset - 8);
2397 #endif
2398 }
2399 
2400 bool MacroAssembler::reachable_from_cache(address target) {
2401   assert(CodeCache::contains(pc()), "not supported");
2402   return _reachable_from_cache(target);
2403 }
2404 
2405 // Can we reach the entire code cache from anywhere else in the code cache?
2406 bool MacroAssembler::_cache_fully_reachable() {
2407   address cl = first_cache_address();
2408   address ch = last_cache_address();
2409   return _reachable_from_cache(cl) && _reachable_from_cache(ch);
2410 }
2411 
2412 bool MacroAssembler::cache_fully_reachable() {
2413   assert(CodeCache::contains(pc()), "not supported");
2414   return _cache_fully_reachable();
2415 }
2416 
2417 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2418   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2419   if (reachable_from_cache(target)) {
2420     relocate(rtype);
2421     b(target NOT_AARCH64_ARG(cond));
2422     return;
2423   }
2424 
2425   // Note: relocate is not needed for the code below,
2426   // encoding targets in absolute format.
2427   if (ignore_non_patchable_relocations()) {
2428     rtype = relocInfo::none;
2429   }
2430 
2431 #ifdef AARCH64
2432   assert (scratch != noreg, "should be specified");
2433   InlinedAddress address_literal(target, rtype);
2434   ldr_literal(scratch, address_literal);
2435   br(scratch);
2436   int off = offset();
2437   bind_literal(address_literal);
2438 #ifdef COMPILER2
2439   if (offset() - off == wordSize) {
2440     // no padding, so insert nop for worst-case sizing
2441     nop();
2442   }
2443 #endif
2444 #else
2445   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
2446     // Note: this version cannot be (atomically) patched
2447     mov_slow(scratch, (intptr_t)target, cond);
2448     bx(scratch, cond);
2449   } else {
2450     Label skip;
2451     InlinedAddress address_literal(target);
2452     if (cond != al) {
2453       b(skip, inverse(cond));
2454     }
2455     relocate(rtype);
2456     ldr_literal(PC, address_literal);
2457     bind_literal(address_literal);
2458     bind(skip);
2459   }
2460 #endif // AARCH64
2461 }
2462 
2463 // Similar to jump except that:
2464 // - near calls are valid only if any destination in the cache is near
2465 // - no movt/movw (not atomically patchable)
2466 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2467   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2468   if (cache_fully_reachable()) {
2469     // Note: this assumes that all possible targets (the initial one
2470     // and the addressed patched to) are all in the code cache.
2471     assert(CodeCache::contains(target), "target might be too far");
2472     relocate(rtype);
2473     b(target NOT_AARCH64_ARG(cond));
2474     return;
2475   }
2476 
2477   // Discard the relocation information if not needed for CacheCompiledCode
2478   // since the next encodings are all in absolute format.
2479   if (ignore_non_patchable_relocations()) {
2480     rtype = relocInfo::none;
2481   }
2482 
2483 #ifdef AARCH64
2484   assert (scratch != noreg, "should be specified");
2485   InlinedAddress address_literal(target);
2486   relocate(rtype);
2487   ldr_literal(scratch, address_literal);
2488   br(scratch);
2489   int off = offset();
2490   bind_literal(address_literal);
2491 #ifdef COMPILER2
2492   if (offset() - off == wordSize) {
2493     // no padding, so insert nop for worst-case sizing
2494     nop();
2495   }
2496 #endif
2497 #else
2498   {
2499     Label skip;
2500     InlinedAddress address_literal(target);
2501     if (cond != al) {
2502       b(skip, inverse(cond));
2503     }
2504     relocate(rtype);
2505     ldr_literal(PC, address_literal);
2506     bind_literal(address_literal);
2507     bind(skip);
2508   }
2509 #endif // AARCH64
2510 }
2511 
2512 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
2513   Register scratch = LR;
2514   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
2515   if (reachable_from_cache(target)) {
2516     relocate(rspec);
2517     bl(target NOT_AARCH64_ARG(cond));
2518     return;
2519   }
2520 
2521   // Note: relocate is not needed for the code below,
2522   // encoding targets in absolute format.
2523   if (ignore_non_patchable_relocations()) {
2524     // This assumes the information was needed only for relocating the code.
2525     rspec = RelocationHolder::none;
2526   }
2527 
2528 #ifndef AARCH64
2529   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
2530     // Note: this version cannot be (atomically) patched
2531     mov_slow(scratch, (intptr_t)target, cond);
2532     blx(scratch, cond);
2533     return;
2534   }
2535 #endif
2536 
2537   {
2538     Label ret_addr;
2539 #ifndef AARCH64
2540     if (cond != al) {
2541       b(ret_addr, inverse(cond));
2542     }
2543 #endif
2544 
2545 
2546 #ifdef AARCH64
2547     // TODO-AARCH64: make more optimal implementation
2548     // [ Keep in sync with MacroAssembler::call_size ]
2549     assert(rspec.type() == relocInfo::none, "call reloc not implemented");
2550     mov_slow(scratch, target);
2551     blr(scratch);
2552 #else
2553     InlinedAddress address_literal(target);
2554     relocate(rspec);
2555     adr(LR, ret_addr);
2556     ldr_literal(PC, address_literal);
2557 
2558     bind_literal(address_literal);
2559     bind(ret_addr);
2560 #endif
2561   }
2562 }
2563 
2564 #if defined(AARCH64) && defined(COMPILER2)
2565 int MacroAssembler::call_size(address target, bool far, bool patchable) {
2566   // FIXME: mov_slow is variable-length
2567   if (!far) return 1; // bl
2568   if (patchable) return 2;  // ldr; blr
2569   return instr_count_for_mov_slow((intptr_t)target) + 1;
2570 }
2571 #endif
2572 
2573 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
2574   assert(rspec.type() == relocInfo::static_call_type ||
2575          rspec.type() == relocInfo::none ||
2576          rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
2577 
2578   // Always generate the relocation information, needed for patching
2579   relocate(rspec); // used by NativeCall::is_call_before()
2580   if (cache_fully_reachable()) {
2581     // Note: this assumes that all possible targets (the initial one
2582     // and the addresses patched to) are all in the code cache.
2583     assert(CodeCache::contains(target), "target might be too far");
2584     bl(target);
2585   } else {
2586 #if defined(AARCH64) && defined(COMPILER2)
2587     if (c2) {
2588       // return address needs to match call_size().
2589       // no need to trash Rtemp
2590       int off = offset();
2591       Label skip_literal;
2592       InlinedAddress address_literal(target);
2593       ldr_literal(LR, address_literal);
2594       blr(LR);
2595       int ret_addr_offset = offset();
2596       assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
2597       b(skip_literal);
2598       int off2 = offset();
2599       bind_literal(address_literal);
2600       if (offset() - off2 == wordSize) {
2601         // no padding, so insert nop for worst-case sizing
2602         nop();
2603       }
2604       bind(skip_literal);
2605       return ret_addr_offset;
2606     }
2607 #endif
2608     Label ret_addr;
2609     InlinedAddress address_literal(target);
2610 #ifdef AARCH64
2611     ldr_literal(Rtemp, address_literal);
2612     adr(LR, ret_addr);
2613     br(Rtemp);
2614 #else
2615     adr(LR, ret_addr);
2616     ldr_literal(PC, address_literal);
2617 #endif
2618     bind_literal(address_literal);
2619     bind(ret_addr);
2620   }
2621   return offset();
2622 }
2623 
2624 // ((OopHandle)result).resolve();
2625 void MacroAssembler::resolve_oop_handle(Register result) {
2626   // OopHandle::resolve is an indirection.
2627   ldr(result, Address(result, 0));
2628 }
2629 
2630 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
2631   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2632   ldr(tmp, Address(method, Method::const_offset()));
2633   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
2634   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
2635   ldr(mirror, Address(tmp, mirror_offset));
2636   resolve_oop_handle(mirror);
2637 }
2638 
2639 
2640 ///////////////////////////////////////////////////////////////////////////////
2641 
2642 // Compressed pointers
2643 
2644 #ifdef AARCH64
2645 
2646 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
2647   if (UseCompressedClassPointers) {
2648     ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2649     decode_klass_not_null(dst_klass);
2650   } else {
2651     ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2652   }
2653 }
2654 
2655 #else
2656 
2657 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
2658   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
2659 }
2660 
2661 #endif // AARCH64
2662 
2663 // Blows src_klass.
2664 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
2665 #ifdef AARCH64
2666   if (UseCompressedClassPointers) {
2667     assert(src_klass != dst_oop, "not enough registers");
2668     encode_klass_not_null(src_klass);
2669     str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2670     return;
2671   }
2672 #endif // AARCH64
2673   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2674 }
2675 
2676 #ifdef AARCH64
2677 
2678 void MacroAssembler::store_klass_gap(Register dst) {
2679   if (UseCompressedClassPointers) {
2680     str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2681   }
2682 }
2683 
2684 #endif // AARCH64
2685 
2686 
2687 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2688   access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3);
2689 }
2690 
2691 // Blows src and flags.
2692 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2693   access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false);
2694 }
2695 
2696 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2697   access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true);
2698 }
2699 
2700 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
2701                                     Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) {
2702   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2703   bool as_raw = (decorators & AS_RAW) != 0;
2704   if (as_raw) {
2705     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2706   } else {
2707     bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2708   }
2709 }
2710 
2711 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
2712                                      Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
2713   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2714   bool as_raw = (decorators & AS_RAW) != 0;
2715   if (as_raw) {
2716     bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2717   } else {
2718     bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2719   }
2720 }
2721 
2722 
2723 #ifdef AARCH64
2724 
2725 // Algorithm must match oop.inline.hpp encode_heap_oop.
2726 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
2727   // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
2728   // Update it at modifications.
2729   assert (UseCompressedOops, "must be compressed");
2730   assert (Universe::heap() != NULL, "java heap should be initialized");
2731 #ifdef ASSERT
2732   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2733 #endif
2734   verify_oop(src);
2735   if (Universe::narrow_oop_base() == NULL) {
2736     if (Universe::narrow_oop_shift() != 0) {
2737       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2738       _lsr(dst, src, Universe::narrow_oop_shift());
2739     } else if (dst != src) {
2740       mov(dst, src);
2741     }
2742   } else {
2743     tst(src, src);
2744     csel(dst, Rheap_base, src, eq);
2745     sub(dst, dst, Rheap_base);
2746     if (Universe::narrow_oop_shift() != 0) {
2747       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2748       _lsr(dst, dst, Universe::narrow_oop_shift());
2749     }
2750   }
2751 }
2752 
2753 // Same algorithm as oop.inline.hpp decode_heap_oop.
2754 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
2755 #ifdef ASSERT
2756   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2757 #endif
2758   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2759   if (Universe::narrow_oop_base() != NULL) {
2760     tst(src, src);
2761     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2762     csel(dst, dst, ZR, ne);
2763   } else {
2764     _lsl(dst, src, Universe::narrow_oop_shift());
2765   }
2766   verify_oop(dst);
2767 }
2768 
2769 #ifdef COMPILER2
2770 // Algorithm must match oop.inline.hpp encode_heap_oop.
2771 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule
2772 // must be changed.
2773 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
2774   assert (UseCompressedOops, "must be compressed");
2775   assert (Universe::heap() != NULL, "java heap should be initialized");
2776 #ifdef ASSERT
2777   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2778 #endif
2779   verify_oop(src);
2780   if (Universe::narrow_oop_base() == NULL) {
2781     if (Universe::narrow_oop_shift() != 0) {
2782       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2783       _lsr(dst, src, Universe::narrow_oop_shift());
2784     } else if (dst != src) {
2785           mov(dst, src);
2786     }
2787   } else {
2788     sub(dst, src, Rheap_base);
2789     if (Universe::narrow_oop_shift() != 0) {
2790       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2791       _lsr(dst, dst, Universe::narrow_oop_shift());
2792     }
2793   }
2794 }
2795 
2796 // Same algorithm as oops.inline.hpp decode_heap_oop.
2797 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule
2798 // must be changed.
2799 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
2800 #ifdef ASSERT
2801   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2802 #endif
2803   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2804   if (Universe::narrow_oop_base() != NULL) {
2805     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2806   } else {
2807     _lsl(dst, src, Universe::narrow_oop_shift());
2808   }
2809   verify_oop(dst);
2810 }
2811 
2812 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2813   assert(UseCompressedClassPointers, "should only be used for compressed header");
2814   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2815   int klass_index = oop_recorder()->find_index(k);
2816   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2817 
2818   // Relocation with special format (see relocInfo_arm.hpp).
2819   relocate(rspec);
2820   narrowKlass encoded_k = Klass::encode_klass(k);
2821   movz(dst, encoded_k & 0xffff, 0);
2822   movk(dst, (encoded_k >> 16) & 0xffff, 16);
2823 }
2824 
2825 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2826   assert(UseCompressedOops, "should only be used for compressed header");
2827   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2828   int oop_index = oop_recorder()->find_index(obj);
2829   RelocationHolder rspec = oop_Relocation::spec(oop_index);
2830 
2831   relocate(rspec);
2832   movz(dst, 0xffff, 0);
2833   movk(dst, 0xffff, 16);
2834 }
2835 
2836 #endif // COMPILER2
2837 // Must preserve condition codes, or C2 encodeKlass_not_null rule
2838 // must be changed.
2839 void MacroAssembler::encode_klass_not_null(Register r) {
2840   if (Universe::narrow_klass_base() != NULL) {
2841     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
2842     assert(r != Rheap_base, "Encoding a klass in Rheap_base");
2843     mov_slow(Rheap_base, Universe::narrow_klass_base());
2844     sub(r, r, Rheap_base);
2845   }
2846   if (Universe::narrow_klass_shift() != 0) {
2847     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2848     _lsr(r, r, Universe::narrow_klass_shift());
2849   }
2850   if (Universe::narrow_klass_base() != NULL) {
2851     reinit_heapbase();
2852   }
2853 }
2854 
2855 // Must preserve condition codes, or C2 encodeKlass_not_null rule
2856 // must be changed.
2857 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
2858   if (dst == src) {
2859     encode_klass_not_null(src);
2860     return;
2861   }
2862   if (Universe::narrow_klass_base() != NULL) {
2863     mov_slow(dst, (int64_t)Universe::narrow_klass_base());
2864     sub(dst, src, dst);
2865     if (Universe::narrow_klass_shift() != 0) {
2866       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2867       _lsr(dst, dst, Universe::narrow_klass_shift());
2868     }
2869   } else {
2870     if (Universe::narrow_klass_shift() != 0) {
2871       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2872       _lsr(dst, src, Universe::narrow_klass_shift());
2873     } else {
2874       mov(dst, src);
2875     }
2876   }
2877 }
2878 
2879 // Function instr_count_for_decode_klass_not_null() counts the instructions
2880 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
2881 // when (Universe::heap() != NULL).  Hence, if the instructions they
2882 // generate change, then this method needs to be updated.
2883 int MacroAssembler::instr_count_for_decode_klass_not_null() {
2884   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
2885   assert(Universe::heap() != NULL, "java heap should be initialized");
2886   if (Universe::narrow_klass_base() != NULL) {
2887     return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
2888       1 +                                                                 // add
2889       instr_count_for_mov_slow(Universe::narrow_ptrs_base());   // reinit_heapbase() = mov_slow
2890   } else {
2891     if (Universe::narrow_klass_shift() != 0) {
2892       return 1;
2893     }
2894   }
2895   return 0;
2896 }
2897 
2898 // Must preserve condition codes, or C2 decodeKlass_not_null rule
2899 // must be changed.
2900 void MacroAssembler::decode_klass_not_null(Register r) {
2901   int off = offset();
2902   assert(UseCompressedClassPointers, "should only be used for compressed headers");
2903   assert(Universe::heap() != NULL, "java heap should be initialized");
2904   assert(r != Rheap_base, "Decoding a klass in Rheap_base");
2905   // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
2906   // Also do not verify_oop as this is called by verify_oop.
2907   if (Universe::narrow_klass_base() != NULL) {
2908     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
2909     mov_slow(Rheap_base, Universe::narrow_klass_base());
2910     add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
2911     reinit_heapbase();
2912   } else {
2913     if (Universe::narrow_klass_shift() != 0) {
2914       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2915       _lsl(r, r, Universe::narrow_klass_shift());
2916     }
2917   }
2918   assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
2919 }
2920 
2921 // Must preserve condition codes, or C2 decodeKlass_not_null rule
2922 // must be changed.
2923 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
2924   if (src == dst) {
2925     decode_klass_not_null(src);
2926     return;
2927   }
2928 
2929   assert(UseCompressedClassPointers, "should only be used for compressed headers");
2930   assert(Universe::heap() != NULL, "java heap should be initialized");
2931   assert(src != Rheap_base, "Decoding a klass in Rheap_base");
2932   assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
2933   // Also do not verify_oop as this is called by verify_oop.
2934   if (Universe::narrow_klass_base() != NULL) {
2935     mov_slow(dst, Universe::narrow_klass_base());
2936     add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
2937   } else {
2938     _lsl(dst, src, Universe::narrow_klass_shift());
2939   }
2940 }
2941 
2942 
2943 void MacroAssembler::reinit_heapbase() {
2944   if (UseCompressedOops || UseCompressedClassPointers) {
2945     if (Universe::heap() != NULL) {
2946       mov_slow(Rheap_base, Universe::narrow_ptrs_base());
2947     } else {
2948       ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
2949     }
2950   }
2951 }
2952 
2953 #ifdef ASSERT
2954 void MacroAssembler::verify_heapbase(const char* msg) {
2955   // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
2956   // Update it at modifications.
2957   assert (UseCompressedOops, "should be compressed");
2958   assert (Universe::heap() != NULL, "java heap should be initialized");
2959   if (CheckCompressedOops) {
2960     Label ok;
2961     str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
2962     raw_push(Rtemp, ZR);
2963     mrs(Rtemp, Assembler::SysReg_NZCV);
2964     str(Rtemp, Address(SP, 1 * wordSize));
2965     mov_slow(Rtemp, Universe::narrow_ptrs_base());
2966     cmp(Rheap_base, Rtemp);
2967     b(ok, eq);
2968     stop(msg);
2969     bind(ok);
2970     ldr(Rtemp, Address(SP, 1 * wordSize));
2971     msr(Assembler::SysReg_NZCV, Rtemp);
2972     raw_pop(Rtemp, ZR);
2973     str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
2974   }
2975 }
2976 #endif // ASSERT
2977 
2978 #endif // AARCH64
2979 
2980 #ifdef COMPILER2
2981 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
2982 {
2983   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
2984 
2985   Register Rmark      = Rscratch2;
2986 
2987   assert(Roop != Rscratch, "");
2988   assert(Roop != Rmark, "");
2989   assert(Rbox != Rscratch, "");
2990   assert(Rbox != Rmark, "");
2991 
2992   Label fast_lock, done;
2993 
2994   if (UseBiasedLocking && !UseOptoBiasInlining) {
2995     Label failed;
2996 #ifdef AARCH64
2997     biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed);
2998 #else
2999     biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
3000 #endif
3001     bind(failed);
3002   }
3003 
3004   ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
3005   tst(Rmark, markOopDesc::unlocked_value);
3006   b(fast_lock, ne);
3007 
3008   // Check for recursive lock
3009   // See comments in InterpreterMacroAssembler::lock_object for
3010   // explanations on the fast recursive locking check.
3011 #ifdef AARCH64
3012   intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
3013   Assembler::LogicalImmediate imm(mask, false);
3014   mov(Rscratch, SP);
3015   sub(Rscratch, Rmark, Rscratch);
3016   ands(Rscratch, Rscratch, imm);
3017   // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
3018   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3019   b(done);
3020 
3021 #else
3022   // -1- test low 2 bits
3023   movs(Rscratch, AsmOperand(Rmark, lsl, 30));
3024   // -2- test (hdr - SP) if the low two bits are 0
3025   sub(Rscratch, Rmark, SP, eq);
3026   movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
3027   // If still 'eq' then recursive locking OK
3028   // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
3029   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3030   b(done);
3031 #endif
3032 
3033   bind(fast_lock);
3034   str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3035 
3036   bool allow_fallthrough_on_failure = true;
3037   bool one_shot = true;
3038   cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3039 
3040   bind(done);
3041 
3042 }
3043 
3044 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2  AARCH64_ONLY_ARG(Register Rscratch3))
3045 {
3046   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3047 
3048   Register Rmark      = Rscratch2;
3049 
3050   assert(Roop != Rscratch, "");
3051   assert(Roop != Rmark, "");
3052   assert(Rbox != Rscratch, "");
3053   assert(Rbox != Rmark, "");
3054 
3055   Label done;
3056 
3057   if (UseBiasedLocking && !UseOptoBiasInlining) {
3058     biased_locking_exit(Roop, Rscratch, done);
3059   }
3060 
3061   ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3062   // If hdr is NULL, we've got recursive locking and there's nothing more to do
3063   cmp(Rmark, 0);
3064   b(done, eq);
3065 
3066   // Restore the object header
3067   bool allow_fallthrough_on_failure = true;
3068   bool one_shot = true;
3069   cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3070 
3071   bind(done);
3072 
3073 }
3074 #endif // COMPILER2