1 /*
   2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "ci/ciEnv.hpp"
  30 #include "code/nativeInst.hpp"
  31 #include "compiler/disassembler.hpp"
  32 #include "gc/shared/barrierSet.hpp"
  33 #include "gc/shared/cardTable.hpp"
  34 #include "gc/shared/barrierSetAssembler.hpp"
  35 #include "gc/shared/cardTableBarrierSet.hpp"
  36 #include "gc/shared/collectedHeap.inline.hpp"
  37 #include "interpreter/interpreter.hpp"
  38 #include "memory/resourceArea.hpp"
  39 #include "oops/accessDecorators.hpp"
  40 #include "oops/klass.inline.hpp"
  41 #include "prims/methodHandles.hpp"
  42 #include "runtime/biasedLocking.hpp"
  43 #include "runtime/interfaceSupport.inline.hpp"
  44 #include "runtime/objectMonitor.hpp"
  45 #include "runtime/os.hpp"
  46 #include "runtime/sharedRuntime.hpp"
  47 #include "runtime/stubRoutines.hpp"
  48 #include "utilities/macros.hpp"
  49 
  50 // Implementation of AddressLiteral
  51 
  52 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
  53   switch (rtype) {
  54   case relocInfo::oop_type:
  55     // Oops are a special case. Normally they would be their own section
  56     // but in cases like icBuffer they are literals in the code stream that
  57     // we don't have a section for. We use none so that we get a literal address
  58     // which is always patchable.
  59     break;
  60   case relocInfo::external_word_type:
  61     _rspec = external_word_Relocation::spec(_target);
  62     break;
  63   case relocInfo::internal_word_type:
  64     _rspec = internal_word_Relocation::spec(_target);
  65     break;
  66   case relocInfo::opt_virtual_call_type:
  67     _rspec = opt_virtual_call_Relocation::spec();
  68     break;
  69   case relocInfo::static_call_type:
  70     _rspec = static_call_Relocation::spec();
  71     break;
  72   case relocInfo::runtime_call_type:
  73     _rspec = runtime_call_Relocation::spec();
  74     break;
  75   case relocInfo::poll_type:
  76   case relocInfo::poll_return_type:
  77     _rspec = Relocation::spec_simple(rtype);
  78     break;
  79   case relocInfo::none:
  80     break;
  81   default:
  82     ShouldNotReachHere();
  83     break;
  84   }
  85 }
  86 
  87 // Initially added to the Assembler interface as a pure virtual:
  88 //   RegisterConstant delayed_value(..)
  89 // for:
  90 //   6812678 macro assembler needs delayed binding of a few constants (for 6655638)
  91 // this was subsequently modified to its present name and return type
  92 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  93                                                       Register tmp,
  94                                                       int offset) {
  95   ShouldNotReachHere();
  96   return RegisterOrConstant(-1);
  97 }
  98 
  99 
 100 #ifdef AARCH64
 101 // Note: ARM32 version is OS dependent
 102 void MacroAssembler::breakpoint(AsmCondition cond) {
 103   if (cond == al) {
 104     brk();
 105   } else {
 106     Label L;
 107     b(L, inverse(cond));
 108     brk();
 109     bind(L);
 110   }
 111 }
 112 #endif // AARCH64
 113 
 114 
 115 // virtual method calling
 116 void MacroAssembler::lookup_virtual_method(Register recv_klass,
 117                                            Register vtable_index,
 118                                            Register method_result) {
 119   const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
 120   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
 121   add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
 122   ldr(method_result, Address(recv_klass, base_offset));
 123 }
 124 
 125 
 126 // Simplified, combined version, good for typical uses.
 127 // Falls through on failure.
 128 void MacroAssembler::check_klass_subtype(Register sub_klass,
 129                                          Register super_klass,
 130                                          Register temp_reg,
 131                                          Register temp_reg2,
 132                                          Register temp_reg3,
 133                                          Label& L_success) {
 134   Label L_failure;
 135   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
 136   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
 137   bind(L_failure);
 138 };
 139 
 140 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
 141                                                    Register super_klass,
 142                                                    Register temp_reg,
 143                                                    Register temp_reg2,
 144                                                    Label* L_success,
 145                                                    Label* L_failure,
 146                                                    Label* L_slow_path) {
 147 
 148   assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
 149   const Register super_check_offset = temp_reg2;
 150 
 151   Label L_fallthrough;
 152   int label_nulls = 0;
 153   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 154   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 155   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
 156   assert(label_nulls <= 1, "at most one NULL in the batch");
 157 
 158   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 159   int sco_offset = in_bytes(Klass::super_check_offset_offset());
 160   Address super_check_offset_addr(super_klass, sco_offset);
 161 
 162   // If the pointers are equal, we are done (e.g., String[] elements).
 163   // This self-check enables sharing of secondary supertype arrays among
 164   // non-primary types such as array-of-interface.  Otherwise, each such
 165   // type would need its own customized SSA.
 166   // We move this check to the front of the fast path because many
 167   // type checks are in fact trivially successful in this manner,
 168   // so we get a nicely predicted branch right at the start of the check.
 169   cmp(sub_klass, super_klass);
 170   b(*L_success, eq);
 171 
 172   // Check the supertype display:
 173   ldr_u32(super_check_offset, super_check_offset_addr);
 174 
 175   Address super_check_addr(sub_klass, super_check_offset);
 176   ldr(temp_reg, super_check_addr);
 177   cmp(super_klass, temp_reg); // load displayed supertype
 178 
 179   // This check has worked decisively for primary supers.
 180   // Secondary supers are sought in the super_cache ('super_cache_addr').
 181   // (Secondary supers are interfaces and very deeply nested subtypes.)
 182   // This works in the same check above because of a tricky aliasing
 183   // between the super_cache and the primary super display elements.
 184   // (The 'super_check_addr' can address either, as the case requires.)
 185   // Note that the cache is updated below if it does not help us find
 186   // what we need immediately.
 187   // So if it was a primary super, we can just fail immediately.
 188   // Otherwise, it's the slow path for us (no success at this point).
 189 
 190   b(*L_success, eq);
 191   cmp_32(super_check_offset, sc_offset);
 192   if (L_failure == &L_fallthrough) {
 193     b(*L_slow_path, eq);
 194   } else {
 195     b(*L_failure, ne);
 196     if (L_slow_path != &L_fallthrough) {
 197       b(*L_slow_path);
 198     }
 199   }
 200 
 201   bind(L_fallthrough);
 202 }
 203 
 204 
 205 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
 206                                                    Register super_klass,
 207                                                    Register temp_reg,
 208                                                    Register temp2_reg,
 209                                                    Register temp3_reg,
 210                                                    Label* L_success,
 211                                                    Label* L_failure,
 212                                                    bool set_cond_codes) {
 213 #ifdef AARCH64
 214   NOT_IMPLEMENTED();
 215 #else
 216   // Note: if used by code that expects a register to be 0 on success,
 217   // this register must be temp_reg and set_cond_codes must be true
 218 
 219   Register saved_reg = noreg;
 220 
 221   // get additional tmp registers
 222   if (temp3_reg == noreg) {
 223     saved_reg = temp3_reg = LR;
 224     push(saved_reg);
 225   }
 226 
 227   assert(temp2_reg != noreg, "need all the temporary registers");
 228   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
 229 
 230   Register cmp_temp = temp_reg;
 231   Register scan_temp = temp3_reg;
 232   Register count_temp = temp2_reg;
 233 
 234   Label L_fallthrough;
 235   int label_nulls = 0;
 236   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 237   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 238   assert(label_nulls <= 1, "at most one NULL in the batch");
 239 
 240   // a couple of useful fields in sub_klass:
 241   int ss_offset = in_bytes(Klass::secondary_supers_offset());
 242   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 243   Address secondary_supers_addr(sub_klass, ss_offset);
 244   Address super_cache_addr(     sub_klass, sc_offset);
 245 
 246 #ifndef PRODUCT
 247   inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
 248 #endif
 249 
 250   // We will consult the secondary-super array.
 251   ldr(scan_temp, Address(sub_klass, ss_offset));
 252 
 253   assert(! UseCompressedOops, "search_key must be the compressed super_klass");
 254   // else search_key is the
 255   Register search_key = super_klass;
 256 
 257   // Load the array length.
 258   ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
 259   add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
 260 
 261   add(count_temp, count_temp, 1);
 262 
 263   Label L_loop, L_setnz_and_fail, L_fail;
 264 
 265   // Top of search loop
 266   bind(L_loop);
 267   // Notes:
 268   //  scan_temp starts at the array elements
 269   //  count_temp is 1+size
 270   subs(count_temp, count_temp, 1);
 271   if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
 272     // direct jump to L_failure if failed and no cleanup needed
 273     b(*L_failure, eq); // not found and
 274   } else {
 275     b(L_fail, eq); // not found in the array
 276   }
 277 
 278   // Load next super to check
 279   // In the array of super classes elements are pointer sized.
 280   int element_size = wordSize;
 281   ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
 282 
 283   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
 284   subs(cmp_temp, cmp_temp, search_key);
 285 
 286   // A miss means we are NOT a subtype and need to keep looping
 287   b(L_loop, ne);
 288 
 289   // Falling out the bottom means we found a hit; we ARE a subtype
 290 
 291   // Note: temp_reg/cmp_temp is already 0 and flag Z is set
 292 
 293   // Success.  Cache the super we found and proceed in triumph.
 294   str(super_klass, Address(sub_klass, sc_offset));
 295 
 296   if (saved_reg != noreg) {
 297     // Return success
 298     pop(saved_reg);
 299   }
 300 
 301   b(*L_success);
 302 
 303   bind(L_fail);
 304   // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
 305   if (set_cond_codes) {
 306     movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
 307   }
 308   if (saved_reg != noreg) {
 309     pop(saved_reg);
 310   }
 311   if (L_failure != &L_fallthrough) {
 312     b(*L_failure);
 313   }
 314 
 315   bind(L_fallthrough);
 316 #endif
 317 }
 318 
 319 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
 320 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
 321   assert_different_registers(params_base, params_count);
 322   add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
 323   return Address(tmp, -Interpreter::stackElementSize);
 324 }
 325 
 326 
 327 void MacroAssembler::align(int modulus) {
 328   while (offset() % modulus != 0) {
 329     nop();
 330   }
 331 }
 332 
 333 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
 334                                         Register last_java_fp,
 335                                         bool save_last_java_pc,
 336                                         Register tmp) {
 337   int pc_offset;
 338   if (last_java_fp != noreg) {
 339     // optional
 340     str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
 341     _fp_saved = true;
 342   } else {
 343     _fp_saved = false;
 344   }
 345   if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
 346 #ifdef AARCH64
 347     pc_offset = mov_pc_to(tmp);
 348     str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
 349 #else
 350     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
 351     pc_offset = offset() + VM_Version::stored_pc_adjustment();
 352 #endif
 353     _pc_saved = true;
 354   } else {
 355     _pc_saved = false;
 356     pc_offset = -1;
 357   }
 358   // According to comment in javaFrameAnchorm SP must be saved last, so that other
 359   // entries are valid when SP is set.
 360 
 361   // However, this is probably not a strong constrainst since for instance PC is
 362   // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
 363   // we now write the fields in the expected order but we have not added a StoreStore
 364   // barrier.
 365 
 366   // XXX: if the ordering is really important, PC should always be saved (without forgetting
 367   // to update oop_map offsets) and a StoreStore barrier might be needed.
 368 
 369   if (last_java_sp == noreg) {
 370     last_java_sp = SP; // always saved
 371   }
 372 #ifdef AARCH64
 373   if (last_java_sp == SP) {
 374     mov(tmp, SP);
 375     str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 376   } else {
 377     str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 378   }
 379 #else
 380   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 381 #endif
 382 
 383   return pc_offset; // for oopmaps
 384 }
 385 
 386 void MacroAssembler::reset_last_Java_frame(Register tmp) {
 387   const Register Rzero = zero_register(tmp);
 388   str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
 389   if (_fp_saved) {
 390     str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
 391   }
 392   if (_pc_saved) {
 393     str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
 394   }
 395 }
 396 
 397 
 398 // Implementation of call_VM versions
 399 
 400 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
 401   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 402   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
 403 
 404 #ifndef AARCH64
 405   // Safer to save R9 here since callers may have been written
 406   // assuming R9 survives. This is suboptimal but is not worth
 407   // optimizing for the few platforms where R9 is scratched.
 408   push(RegisterSet(R4) | R9ifScratched);
 409   mov(R4, SP);
 410   bic(SP, SP, StackAlignmentInBytes - 1);
 411 #endif // AARCH64
 412   call(entry_point, relocInfo::runtime_call_type);
 413 #ifndef AARCH64
 414   mov(SP, R4);
 415   pop(RegisterSet(R4) | R9ifScratched);
 416 #endif // AARCH64
 417 }
 418 
 419 
 420 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
 421   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 422   assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
 423 
 424   const Register tmp = Rtemp;
 425   assert_different_registers(oop_result, tmp);
 426 
 427   set_last_Java_frame(SP, FP, true, tmp);
 428 
 429 #ifdef ASSERT
 430   AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
 431 #endif // ASSERT
 432 
 433 #ifndef AARCH64
 434 #if R9_IS_SCRATCHED
 435   // Safer to save R9 here since callers may have been written
 436   // assuming R9 survives. This is suboptimal but is not worth
 437   // optimizing for the few platforms where R9 is scratched.
 438 
 439   // Note: cannot save R9 above the saved SP (some calls expect for
 440   // instance the Java stack top at the saved SP)
 441   // => once saved (with set_last_Java_frame), decrease SP before rounding to
 442   // ensure the slot at SP will be free for R9).
 443   sub(SP, SP, 4);
 444   bic(SP, SP, StackAlignmentInBytes - 1);
 445   str(R9, Address(SP, 0));
 446 #else
 447   bic(SP, SP, StackAlignmentInBytes - 1);
 448 #endif // R9_IS_SCRATCHED
 449 #endif
 450 
 451   mov(R0, Rthread);
 452   call(entry_point, relocInfo::runtime_call_type);
 453 
 454 #ifndef AARCH64
 455 #if R9_IS_SCRATCHED
 456   ldr(R9, Address(SP, 0));
 457 #endif
 458   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
 459 #endif
 460 
 461   reset_last_Java_frame(tmp);
 462 
 463   // C++ interp handles this in the interpreter
 464   check_and_handle_popframe();
 465   check_and_handle_earlyret();
 466 
 467   if (check_exceptions) {
 468     // check for pending exceptions
 469     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
 470 #ifdef AARCH64
 471     Label L;
 472     cbz(tmp, L);
 473     mov_pc_to(Rexception_pc);
 474     b(StubRoutines::forward_exception_entry());
 475     bind(L);
 476 #else
 477     cmp(tmp, 0);
 478     mov(Rexception_pc, PC, ne);
 479     b(StubRoutines::forward_exception_entry(), ne);
 480 #endif // AARCH64
 481   }
 482 
 483   // get oop result if there is one and reset the value in the thread
 484   if (oop_result->is_valid()) {
 485     get_vm_result(oop_result, tmp);
 486   }
 487 }
 488 
 489 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
 490   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 491 }
 492 
 493 
 494 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
 495   assert (arg_1 == R1, "fixed register for arg_1");
 496   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 497 }
 498 
 499 
 500 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 501   assert (arg_1 == R1, "fixed register for arg_1");
 502   assert (arg_2 == R2, "fixed register for arg_2");
 503   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 504 }
 505 
 506 
 507 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 508   assert (arg_1 == R1, "fixed register for arg_1");
 509   assert (arg_2 == R2, "fixed register for arg_2");
 510   assert (arg_3 == R3, "fixed register for arg_3");
 511   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 512 }
 513 
 514 
 515 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
 516   // Not used on ARM
 517   Unimplemented();
 518 }
 519 
 520 
 521 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
 522   // Not used on ARM
 523   Unimplemented();
 524 }
 525 
 526 
 527 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 528 // Not used on ARM
 529   Unimplemented();
 530 }
 531 
 532 
 533 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 534   // Not used on ARM
 535   Unimplemented();
 536 }
 537 
 538 // Raw call, without saving/restoring registers, exception handling, etc.
 539 // Mainly used from various stubs.
 540 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
 541   const Register tmp = Rtemp; // Rtemp free since scratched by call
 542   set_last_Java_frame(SP, FP, true, tmp);
 543 #if R9_IS_SCRATCHED
 544   if (save_R9_if_scratched) {
 545     // Note: Saving also R10 for alignment.
 546     push(RegisterSet(R9, R10));
 547   }
 548 #endif
 549   mov(R0, Rthread);
 550   call(entry_point, relocInfo::runtime_call_type);
 551 #if R9_IS_SCRATCHED
 552   if (save_R9_if_scratched) {
 553     pop(RegisterSet(R9, R10));
 554   }
 555 #endif
 556   reset_last_Java_frame(tmp);
 557 }
 558 
 559 void MacroAssembler::call_VM_leaf(address entry_point) {
 560   call_VM_leaf_helper(entry_point, 0);
 561 }
 562 
 563 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
 564   assert (arg_1 == R0, "fixed register for arg_1");
 565   call_VM_leaf_helper(entry_point, 1);
 566 }
 567 
 568 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
 569   assert (arg_1 == R0, "fixed register for arg_1");
 570   assert (arg_2 == R1, "fixed register for arg_2");
 571   call_VM_leaf_helper(entry_point, 2);
 572 }
 573 
 574 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
 575   assert (arg_1 == R0, "fixed register for arg_1");
 576   assert (arg_2 == R1, "fixed register for arg_2");
 577   assert (arg_3 == R2, "fixed register for arg_3");
 578   call_VM_leaf_helper(entry_point, 3);
 579 }
 580 
 581 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
 582   assert (arg_1 == R0, "fixed register for arg_1");
 583   assert (arg_2 == R1, "fixed register for arg_2");
 584   assert (arg_3 == R2, "fixed register for arg_3");
 585   assert (arg_4 == R3, "fixed register for arg_4");
 586   call_VM_leaf_helper(entry_point, 4);
 587 }
 588 
 589 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
 590   assert_different_registers(oop_result, tmp);
 591   ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
 592   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
 593   verify_oop(oop_result);
 594 }
 595 
 596 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
 597   assert_different_registers(metadata_result, tmp);
 598   ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
 599   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
 600 }
 601 
 602 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
 603   if (arg2.is_register()) {
 604     add(dst, arg1, arg2.as_register());
 605   } else {
 606     add(dst, arg1, arg2.as_constant());
 607   }
 608 }
 609 
 610 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
 611 #ifdef AARCH64
 612   if (c == 0) {
 613     if (rd != rn) {
 614       mov(rd, rn);
 615     }
 616     return;
 617   }
 618   if (c < 0) {
 619     sub_slow(rd, rn, -c);
 620     return;
 621   }
 622   if (c > right_n_bits(24)) {
 623     guarantee(rd != rn, "no large add_slow with only one register");
 624     mov_slow(rd, c);
 625     add(rd, rn, rd);
 626   } else {
 627     int lo = c & right_n_bits(12);
 628     int hi = (c >> 12) & right_n_bits(12);
 629     if (lo != 0) {
 630       add(rd, rn, lo, lsl0);
 631     }
 632     if (hi != 0) {
 633       add(rd, (lo == 0) ? rn : rd, hi, lsl12);
 634     }
 635   }
 636 #else
 637   // This function is used in compiler for handling large frame offsets
 638   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 639     return sub(rd, rn, (-c));
 640   }
 641   int low = c & 0x3fc;
 642   if (low != 0) {
 643     add(rd, rn, low);
 644     rn = rd;
 645   }
 646   if (c & ~0x3fc) {
 647     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
 648     add(rd, rn, c & ~0x3fc);
 649   } else if (rd != rn) {
 650     assert(c == 0, "");
 651     mov(rd, rn); // need to generate at least one move!
 652   }
 653 #endif // AARCH64
 654 }
 655 
 656 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
 657 #ifdef AARCH64
 658   if (c <= 0) {
 659     add_slow(rd, rn, -c);
 660     return;
 661   }
 662   if (c > right_n_bits(24)) {
 663     guarantee(rd != rn, "no large sub_slow with only one register");
 664     mov_slow(rd, c);
 665     sub(rd, rn, rd);
 666   } else {
 667     int lo = c & right_n_bits(12);
 668     int hi = (c >> 12) & right_n_bits(12);
 669     if (lo != 0) {
 670       sub(rd, rn, lo, lsl0);
 671     }
 672     if (hi != 0) {
 673       sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
 674     }
 675   }
 676 #else
 677   // This function is used in compiler for handling large frame offsets
 678   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 679     return add(rd, rn, (-c));
 680   }
 681   int low = c & 0x3fc;
 682   if (low != 0) {
 683     sub(rd, rn, low);
 684     rn = rd;
 685   }
 686   if (c & ~0x3fc) {
 687     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
 688     sub(rd, rn, c & ~0x3fc);
 689   } else if (rd != rn) {
 690     assert(c == 0, "");
 691     mov(rd, rn); // need to generate at least one move!
 692   }
 693 #endif // AARCH64
 694 }
 695 
 696 void MacroAssembler::mov_slow(Register rd, address addr) {
 697   // do *not* call the non relocated mov_related_address
 698   mov_slow(rd, (intptr_t)addr);
 699 }
 700 
 701 void MacroAssembler::mov_slow(Register rd, const char *str) {
 702   mov_slow(rd, (intptr_t)str);
 703 }
 704 
 705 #ifdef AARCH64
 706 
 707 // Common code for mov_slow and instr_count_for_mov_slow.
 708 // Returns number of instructions of mov_slow pattern,
 709 // generating it if non-null MacroAssembler is given.
 710 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
 711   // This code pattern is matched in NativeIntruction::is_mov_slow.
 712   // Update it at modifications.
 713 
 714   const intx mask = right_n_bits(16);
 715   // 1 movz instruction
 716   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 717     if ((c & ~(mask << base_shift)) == 0) {
 718       if (masm != NULL) {
 719         masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
 720       }
 721       return 1;
 722     }
 723   }
 724   // 1 movn instruction
 725   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 726     if (((~c) & ~(mask << base_shift)) == 0) {
 727       if (masm != NULL) {
 728         masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
 729       }
 730       return 1;
 731     }
 732   }
 733   // 1 orr instruction
 734   {
 735     LogicalImmediate imm(c, false);
 736     if (imm.is_encoded()) {
 737       if (masm != NULL) {
 738         masm->orr(rd, ZR, imm);
 739       }
 740       return 1;
 741     }
 742   }
 743   // 1 movz/movn + up to 3 movk instructions
 744   int zeroes = 0;
 745   int ones = 0;
 746   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 747     int part = (c >> base_shift) & mask;
 748     if (part == 0) {
 749       ++zeroes;
 750     } else if (part == mask) {
 751       ++ones;
 752     }
 753   }
 754   int def_bits = 0;
 755   if (ones > zeroes) {
 756     def_bits = mask;
 757   }
 758   int inst_count = 0;
 759   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 760     int part = (c >> base_shift) & mask;
 761     if (part != def_bits) {
 762       if (masm != NULL) {
 763         if (inst_count > 0) {
 764           masm->movk(rd, part, base_shift);
 765         } else {
 766           if (def_bits == 0) {
 767             masm->movz(rd, part, base_shift);
 768           } else {
 769             masm->movn(rd, ~part & mask, base_shift);
 770           }
 771         }
 772       }
 773       inst_count++;
 774     }
 775   }
 776   assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
 777   return inst_count;
 778 }
 779 
 780 void MacroAssembler::mov_slow(Register rd, intptr_t c) {
 781 #ifdef ASSERT
 782   int off = offset();
 783 #endif
 784   (void) mov_slow_helper(rd, c, this);
 785   assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
 786 }
 787 
 788 // Counts instructions generated by mov_slow(rd, c).
 789 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
 790   return mov_slow_helper(noreg, c, NULL);
 791 }
 792 
 793 int MacroAssembler::instr_count_for_mov_slow(address c) {
 794   return mov_slow_helper(noreg, (intptr_t)c, NULL);
 795 }
 796 
 797 #else
 798 
 799 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
 800   if (AsmOperand::is_rotated_imm(c)) {
 801     mov(rd, c, cond);
 802   } else if (AsmOperand::is_rotated_imm(~c)) {
 803     mvn(rd, ~c, cond);
 804   } else if (VM_Version::supports_movw()) {
 805     movw(rd, c & 0xffff, cond);
 806     if ((unsigned int)c >> 16) {
 807       movt(rd, (unsigned int)c >> 16, cond);
 808     }
 809   } else {
 810     // Find first non-zero bit
 811     int shift = 0;
 812     while ((c & (3 << shift)) == 0) {
 813       shift += 2;
 814     }
 815     // Put the least significant part of the constant
 816     int mask = 0xff << shift;
 817     mov(rd, c & mask, cond);
 818     // Add up to 3 other parts of the constant;
 819     // each of them can be represented as rotated_imm
 820     if (c & (mask << 8)) {
 821       orr(rd, rd, c & (mask << 8), cond);
 822     }
 823     if (c & (mask << 16)) {
 824       orr(rd, rd, c & (mask << 16), cond);
 825     }
 826     if (c & (mask << 24)) {
 827       orr(rd, rd, c & (mask << 24), cond);
 828     }
 829   }
 830 }
 831 
 832 #endif // AARCH64
 833 
 834 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
 835 #ifdef AARCH64
 836                              bool patchable
 837 #else
 838                              AsmCondition cond
 839 #endif
 840                              ) {
 841 
 842   if (o == NULL) {
 843 #ifdef AARCH64
 844     if (patchable) {
 845       nop();
 846     }
 847     mov(rd, ZR);
 848 #else
 849     mov(rd, 0, cond);
 850 #endif
 851     return;
 852   }
 853 
 854   if (oop_index == 0) {
 855     oop_index = oop_recorder()->allocate_oop_index(o);
 856   }
 857   relocate(oop_Relocation::spec(oop_index));
 858 
 859 #ifdef AARCH64
 860   if (patchable) {
 861     nop();
 862   }
 863   ldr(rd, pc());
 864 #else
 865   if (VM_Version::supports_movw()) {
 866     movw(rd, 0, cond);
 867     movt(rd, 0, cond);
 868   } else {
 869     ldr(rd, Address(PC), cond);
 870     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
 871     nop();
 872   }
 873 #endif
 874 }
 875 
 876 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
 877   if (o == NULL) {
 878 #ifdef AARCH64
 879     if (patchable) {
 880       nop();
 881     }
 882 #endif
 883     mov(rd, 0);
 884     return;
 885   }
 886 
 887   if (metadata_index == 0) {
 888     metadata_index = oop_recorder()->allocate_metadata_index(o);
 889   }
 890   relocate(metadata_Relocation::spec(metadata_index));
 891 
 892 #ifdef AARCH64
 893   if (patchable) {
 894     nop();
 895   }
 896 #ifdef COMPILER2
 897   if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
 898     mov_slow(rd, (address)o);
 899     return;
 900   }
 901 #endif
 902   ldr(rd, pc());
 903 #else
 904   if (VM_Version::supports_movw()) {
 905     movw(rd, ((int)o) & 0xffff);
 906     movt(rd, (unsigned int)o >> 16);
 907   } else {
 908     ldr(rd, Address(PC));
 909     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
 910     nop();
 911   }
 912 #endif // AARCH64
 913 }
 914 
 915 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
 916   Label skip_constant;
 917   union {
 918     jfloat f;
 919     jint i;
 920   } accessor;
 921   accessor.f = c;
 922 
 923 #ifdef AARCH64
 924   // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
 925   Label L;
 926   ldr_s(fd, target(L));
 927   b(skip_constant);
 928   bind(L);
 929   emit_int32(accessor.i);
 930   bind(skip_constant);
 931 #else
 932   flds(fd, Address(PC), cond);
 933   b(skip_constant);
 934   emit_int32(accessor.i);
 935   bind(skip_constant);
 936 #endif // AARCH64
 937 }
 938 
 939 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
 940   Label skip_constant;
 941   union {
 942     jdouble d;
 943     jint i[2];
 944   } accessor;
 945   accessor.d = c;
 946 
 947 #ifdef AARCH64
 948   // TODO-AARCH64 - try to optimize loading of double constants with fmov
 949   Label L;
 950   ldr_d(fd, target(L));
 951   b(skip_constant);
 952   align(wordSize);
 953   bind(L);
 954   emit_int32(accessor.i[0]);
 955   emit_int32(accessor.i[1]);
 956   bind(skip_constant);
 957 #else
 958   fldd(fd, Address(PC), cond);
 959   b(skip_constant);
 960   emit_int32(accessor.i[0]);
 961   emit_int32(accessor.i[1]);
 962   bind(skip_constant);
 963 #endif // AARCH64
 964 }
 965 
 966 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
 967   intptr_t addr = (intptr_t) address_of_global;
 968 #ifdef AARCH64
 969   assert((addr & 0x3) == 0, "address should be aligned");
 970 
 971   // FIXME: TODO
 972   if (false && page_reachable_from_cache(address_of_global)) {
 973     assert(false,"TODO: relocate");
 974     //relocate();
 975     adrp(reg, address_of_global);
 976     ldrsw(reg, Address(reg, addr & 0xfff));
 977   } else {
 978     mov_slow(reg, addr & ~0x3fff);
 979     ldrsw(reg, Address(reg, addr & 0x3fff));
 980   }
 981 #else
 982   mov_slow(reg, addr & ~0xfff);
 983   ldr(reg, Address(reg, addr & 0xfff));
 984 #endif
 985 }
 986 
 987 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
 988 #ifdef AARCH64
 989   intptr_t addr = (intptr_t) address_of_global;
 990   assert ((addr & 0x7) == 0, "address should be aligned");
 991   mov_slow(reg, addr & ~0x7fff);
 992   ldr(reg, Address(reg, addr & 0x7fff));
 993 #else
 994   ldr_global_s32(reg, address_of_global);
 995 #endif
 996 }
 997 
 998 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
 999   intptr_t addr = (intptr_t) address_of_global;
1000   mov_slow(reg, addr & ~0xfff);
1001   ldrb(reg, Address(reg, addr & 0xfff));
1002 }
1003 
1004 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
1005 #ifdef AARCH64
1006   switch (bits) {
1007     case  8: uxtb(rd, rn); break;
1008     case 16: uxth(rd, rn); break;
1009     case 32: mov_w(rd, rn); break;
1010     default: ShouldNotReachHere();
1011   }
1012 #else
1013   if (bits <= 8) {
1014     andr(rd, rn, (1 << bits) - 1);
1015   } else if (bits >= 24) {
1016     bic(rd, rn, -1 << bits);
1017   } else {
1018     mov(rd, AsmOperand(rn, lsl, 32 - bits));
1019     mov(rd, AsmOperand(rd, lsr, 32 - bits));
1020   }
1021 #endif
1022 }
1023 
1024 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
1025 #ifdef AARCH64
1026   switch (bits) {
1027     case  8: sxtb(rd, rn); break;
1028     case 16: sxth(rd, rn); break;
1029     case 32: sxtw(rd, rn); break;
1030     default: ShouldNotReachHere();
1031   }
1032 #else
1033   mov(rd, AsmOperand(rn, lsl, 32 - bits));
1034   mov(rd, AsmOperand(rd, asr, 32 - bits));
1035 #endif
1036 }
1037 
1038 #ifndef AARCH64
1039 
1040 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
1041   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1042   bs->obj_equals(this, obj1, obj2);
1043 }
1044 
1045 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
1046                                Register rn_lo, Register rn_hi,
1047                                AsmCondition cond) {
1048   if (rd_lo != rn_hi) {
1049     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1050     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1051   } else if (rd_hi != rn_lo) {
1052     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1053     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1054   } else {
1055     eor(rd_lo, rd_hi, rd_lo, cond);
1056     eor(rd_hi, rd_lo, rd_hi, cond);
1057     eor(rd_lo, rd_hi, rd_lo, cond);
1058   }
1059 }
1060 
1061 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1062                                 Register rn_lo, Register rn_hi,
1063                                 AsmShift shift, Register count) {
1064   Register tmp;
1065   if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
1066     tmp = rd_lo;
1067   } else {
1068     tmp = rd_hi;
1069   }
1070   assert_different_registers(tmp, count, rn_lo, rn_hi);
1071 
1072   subs(tmp, count, 32);
1073   if (shift == lsl) {
1074     assert_different_registers(rd_hi, rn_lo);
1075     assert_different_registers(count, rd_hi);
1076     mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
1077     rsb(tmp, count, 32, mi);
1078     if (rd_hi == rn_hi) {
1079       mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1080       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1081     } else {
1082       mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1083       orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1084     }
1085     mov(rd_lo, AsmOperand(rn_lo, shift, count));
1086   } else {
1087     assert_different_registers(rd_lo, rn_hi);
1088     assert_different_registers(rd_lo, count);
1089     mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
1090     rsb(tmp, count, 32, mi);
1091     if (rd_lo == rn_lo) {
1092       mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1093       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1094     } else {
1095       mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1096       orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1097     }
1098     mov(rd_hi, AsmOperand(rn_hi, shift, count));
1099   }
1100 }
1101 
1102 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1103                                 Register rn_lo, Register rn_hi,
1104                                 AsmShift shift, int count) {
1105   assert(count != 0 && (count & ~63) == 0, "must be");
1106 
1107   if (shift == lsl) {
1108     assert_different_registers(rd_hi, rn_lo);
1109     if (count >= 32) {
1110       mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
1111       mov(rd_lo, 0);
1112     } else {
1113       mov(rd_hi, AsmOperand(rn_hi, lsl, count));
1114       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
1115       mov(rd_lo, AsmOperand(rn_lo, lsl, count));
1116     }
1117   } else {
1118     assert_different_registers(rd_lo, rn_hi);
1119     if (count >= 32) {
1120       if (count == 32) {
1121         mov(rd_lo, rn_hi);
1122       } else {
1123         mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
1124       }
1125       if (shift == asr) {
1126         mov(rd_hi, AsmOperand(rn_hi, asr, 0));
1127       } else {
1128         mov(rd_hi, 0);
1129       }
1130     } else {
1131       mov(rd_lo, AsmOperand(rn_lo, lsr, count));
1132       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
1133       mov(rd_hi, AsmOperand(rn_hi, shift, count));
1134     }
1135   }
1136 }
1137 #endif // !AARCH64
1138 
1139 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
1140   // This code pattern is matched in NativeIntruction::skip_verify_oop.
1141   // Update it at modifications.
1142   if (!VerifyOops) return;
1143 
1144   char buffer[64];
1145 #ifdef COMPILER1
1146   if (CommentedAssembly) {
1147     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1148     block_comment(buffer);
1149   }
1150 #endif
1151   const char* msg_buffer = NULL;
1152   {
1153     ResourceMark rm;
1154     stringStream ss;
1155     ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
1156     msg_buffer = code_string(ss.as_string());
1157   }
1158 
1159   save_all_registers();
1160 
1161   if (reg != R2) {
1162       mov(R2, reg);                              // oop to verify
1163   }
1164   mov(R1, SP);                                   // register save area
1165 
1166   Label done;
1167   InlinedString Lmsg(msg_buffer);
1168   ldr_literal(R0, Lmsg);                         // message
1169 
1170   // call indirectly to solve generation ordering problem
1171   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1172   call(Rtemp);
1173 
1174   restore_all_registers();
1175 
1176   b(done);
1177 #ifdef COMPILER2
1178   int off = offset();
1179 #endif
1180   bind_literal(Lmsg);
1181 #ifdef COMPILER2
1182   if (offset() - off == 1 * wordSize) {
1183     // no padding, so insert nop for worst-case sizing
1184     nop();
1185   }
1186 #endif
1187   bind(done);
1188 }
1189 
1190 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
1191   if (!VerifyOops) return;
1192 
1193   const char* msg_buffer = NULL;
1194   {
1195     ResourceMark rm;
1196     stringStream ss;
1197     if ((addr.base() == SP) && (addr.index()==noreg)) {
1198       ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
1199     } else {
1200       ss.print("verify_oop_addr: %s", s);
1201     }
1202     ss.print(" (%s:%d)", file, line);
1203     msg_buffer = code_string(ss.as_string());
1204   }
1205 
1206   int push_size = save_all_registers();
1207 
1208   if (addr.base() == SP) {
1209     // computes an addr that takes into account the push
1210     if (addr.index() != noreg) {
1211       Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
1212       add(new_base, SP, push_size);
1213       addr = addr.rebase(new_base);
1214     } else {
1215       addr = addr.plus_disp(push_size);
1216     }
1217   }
1218 
1219   ldr(R2, addr);                                 // oop to verify
1220   mov(R1, SP);                                   // register save area
1221 
1222   Label done;
1223   InlinedString Lmsg(msg_buffer);
1224   ldr_literal(R0, Lmsg);                         // message
1225 
1226   // call indirectly to solve generation ordering problem
1227   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1228   call(Rtemp);
1229 
1230   restore_all_registers();
1231 
1232   b(done);
1233   bind_literal(Lmsg);
1234   bind(done);
1235 }
1236 
1237 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
1238   if (needs_explicit_null_check(offset)) {
1239 #ifdef AARCH64
1240     ldr(ZR, Address(reg));
1241 #else
1242     assert_different_registers(reg, tmp);
1243     if (tmp == noreg) {
1244       tmp = Rtemp;
1245       assert((! Thread::current()->is_Compiler_thread()) ||
1246              (! (ciEnv::current()->task() == NULL)) ||
1247              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1248              "Rtemp not available in C2"); // explicit tmp register required
1249       // XXX: could we mark the code buffer as not compatible with C2 ?
1250     }
1251     ldr(tmp, Address(reg));
1252 #endif
1253   }
1254 }
1255 
1256 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1257 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1258                                  RegisterOrConstant size_expression, Label& slow_case) {
1259   if (!Universe::heap()->supports_inline_contig_alloc()) {
1260     b(slow_case);
1261     return;
1262   }
1263 
1264   CollectedHeap* ch = Universe::heap();
1265 
1266   const Register top_addr = tmp1;
1267   const Register heap_end = tmp2;
1268 
1269   if (size_expression.is_register()) {
1270     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
1271   } else {
1272     assert_different_registers(obj, obj_end, top_addr, heap_end);
1273   }
1274 
1275   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
1276   if (load_const) {
1277     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
1278   } else {
1279     ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
1280   }
1281   // Calculate new heap_top by adding the size of the object
1282   Label retry;
1283   bind(retry);
1284 
1285 #ifdef AARCH64
1286   ldxr(obj, top_addr);
1287 #else
1288   ldr(obj, Address(top_addr));
1289 #endif // AARCH64
1290 
1291   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
1292   add_rc(obj_end, obj, size_expression);
1293   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
1294   cmp(obj_end, obj);
1295   b(slow_case, lo);
1296   // Update heap_top if allocation succeeded
1297   cmp(obj_end, heap_end);
1298   b(slow_case, hi);
1299 
1300 #ifdef AARCH64
1301   stxr(heap_end/*scratched*/, obj_end, top_addr);
1302   cbnz_w(heap_end, retry);
1303 #else
1304   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
1305   b(retry, ne);
1306 #endif // AARCH64
1307 }
1308 
1309 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1310 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1311                                  RegisterOrConstant size_expression, Label& slow_case) {
1312   const Register tlab_end = tmp1;
1313   assert_different_registers(obj, obj_end, tlab_end);
1314 
1315   ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1316   ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
1317   add_rc(obj_end, obj, size_expression);
1318   cmp(obj_end, tlab_end);
1319   b(slow_case, hi);
1320   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1321 }
1322 
1323 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
1324 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1325   Label loop;
1326   const Register ptr = start;
1327 
1328 #ifdef AARCH64
1329   // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1330   const Register size = tmp;
1331   Label remaining, done;
1332 
1333   sub(size, end, start);
1334 
1335 #ifdef ASSERT
1336   { Label L;
1337     tst(size, wordSize - 1);
1338     b(L, eq);
1339     stop("size is not a multiple of wordSize");
1340     bind(L);
1341   }
1342 #endif // ASSERT
1343 
1344   subs(size, size, wordSize);
1345   b(remaining, le);
1346 
1347   // Zero by 2 words per iteration.
1348   bind(loop);
1349   subs(size, size, 2*wordSize);
1350   stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1351   b(loop, gt);
1352 
1353   bind(remaining);
1354   b(done, ne);
1355   str(ZR, Address(ptr));
1356   bind(done);
1357 #else
1358   mov(tmp, 0);
1359   bind(loop);
1360   cmp(ptr, end);
1361   str(tmp, Address(ptr, wordSize, post_indexed), lo);
1362   b(loop, lo);
1363 #endif // AARCH64
1364 }
1365 
1366 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1367 #ifdef AARCH64
1368   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1369   add_rc(tmp, tmp, size_in_bytes);
1370   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1371 #else
1372   // Bump total bytes allocated by this thread
1373   Label done;
1374 
1375   // Borrow the Rthread for alloc counter
1376   Register Ralloc = Rthread;
1377   add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
1378   ldr(tmp, Address(Ralloc));
1379   adds(tmp, tmp, size_in_bytes);
1380   str(tmp, Address(Ralloc), cc);
1381   b(done, cc);
1382 
1383   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1384   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1385   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1386   Register low, high;
1387   // Select ether R0/R1 or R2/R3
1388 
1389   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1390     low = R2;
1391     high  = R3;
1392   } else {
1393     low = R0;
1394     high  = R1;
1395   }
1396   push(RegisterSet(low, high));
1397 
1398   ldrd(low, Address(Ralloc));
1399   adds(low, low, size_in_bytes);
1400   adc(high, high, 0);
1401   strd(low, Address(Ralloc));
1402 
1403   pop(RegisterSet(low, high));
1404 
1405   bind(done);
1406 
1407   // Unborrow the Rthread
1408   sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
1409 #endif // AARCH64
1410 }
1411 
1412 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1413   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1414   if (UseStackBanging) {
1415     const int page_size = os::vm_page_size();
1416 
1417     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1418     strb(R0, Address(tmp));
1419 #ifdef AARCH64
1420     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1421       sub(tmp, tmp, page_size);
1422       strb(R0, Address(tmp));
1423     }
1424 #else
1425     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1426       strb(R0, Address(tmp, -0xff0, pre_indexed));
1427     }
1428 #endif // AARCH64
1429   }
1430 }
1431 
1432 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
1433   if (UseStackBanging) {
1434     Label loop;
1435 
1436     mov(tmp, SP);
1437     add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
1438 #ifdef AARCH64
1439     sub(tmp, tmp, Rsize);
1440     bind(loop);
1441     subs(Rsize, Rsize, os::vm_page_size());
1442     strb(ZR, Address(tmp, Rsize));
1443 #else
1444     bind(loop);
1445     subs(Rsize, Rsize, 0xff0);
1446     strb(R0, Address(tmp, -0xff0, pre_indexed));
1447 #endif // AARCH64
1448     b(loop, hi);
1449   }
1450 }
1451 
1452 void MacroAssembler::stop(const char* msg) {
1453   // This code pattern is matched in NativeIntruction::is_stop.
1454   // Update it at modifications.
1455 #ifdef COMPILER1
1456   if (CommentedAssembly) {
1457     block_comment("stop");
1458   }
1459 #endif
1460 
1461   InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1462   InlinedString Lmsg(msg);
1463 
1464   // save all registers for further inspection
1465   save_all_registers();
1466 
1467   ldr_literal(R0, Lmsg);                     // message
1468   mov(R1, SP);                               // register save area
1469 
1470 #ifdef AARCH64
1471   ldr_literal(Rtemp, Ldebug);
1472   br(Rtemp);
1473 #else
1474   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
1475 #endif // AARCH64
1476 
1477 #if defined(COMPILER2) && defined(AARCH64)
1478   int off = offset();
1479 #endif
1480   bind_literal(Lmsg);
1481   bind_literal(Ldebug);
1482 #if defined(COMPILER2) && defined(AARCH64)
1483   if (offset() - off == 2 * wordSize) {
1484     // no padding, so insert nop for worst-case sizing
1485     nop();
1486   }
1487 #endif
1488 }
1489 
1490 void MacroAssembler::warn(const char* msg) {
1491 #ifdef COMPILER1
1492   if (CommentedAssembly) {
1493     block_comment("warn");
1494   }
1495 #endif
1496 
1497   InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1498   InlinedString Lmsg(msg);
1499   Label done;
1500 
1501   int push_size = save_caller_save_registers();
1502 
1503 #ifdef AARCH64
1504   // TODO-AARCH64 - get rid of extra debug parameters
1505   mov(R1, LR);
1506   mov(R2, FP);
1507   add(R3, SP, push_size);
1508 #endif
1509 
1510   ldr_literal(R0, Lmsg);                    // message
1511   ldr_literal(LR, Lwarn);                   // call warning
1512 
1513   call(LR);
1514 
1515   restore_caller_save_registers();
1516 
1517   b(done);
1518   bind_literal(Lmsg);
1519   bind_literal(Lwarn);
1520   bind(done);
1521 }
1522 
1523 
1524 int MacroAssembler::save_all_registers() {
1525   // This code pattern is matched in NativeIntruction::is_save_all_registers.
1526   // Update it at modifications.
1527 #ifdef AARCH64
1528   const Register tmp = Rtemp;
1529   raw_push(R30, ZR);
1530   for (int i = 28; i >= 0; i -= 2) {
1531       raw_push(as_Register(i), as_Register(i+1));
1532   }
1533   mov_pc_to(tmp);
1534   str(tmp, Address(SP, 31*wordSize));
1535   ldr(tmp, Address(SP, tmp->encoding()*wordSize));
1536   return 32*wordSize;
1537 #else
1538   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1539   return 15*wordSize;
1540 #endif // AARCH64
1541 }
1542 
1543 void MacroAssembler::restore_all_registers() {
1544 #ifdef AARCH64
1545   for (int i = 0; i <= 28; i += 2) {
1546     raw_pop(as_Register(i), as_Register(i+1));
1547   }
1548   raw_pop(R30, ZR);
1549 #else
1550   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
1551   add(SP, SP, wordSize);                         // discard saved PC
1552 #endif // AARCH64
1553 }
1554 
1555 int MacroAssembler::save_caller_save_registers() {
1556 #ifdef AARCH64
1557   for (int i = 0; i <= 16; i += 2) {
1558     raw_push(as_Register(i), as_Register(i+1));
1559   }
1560   raw_push(R18, LR);
1561   return 20*wordSize;
1562 #else
1563 #if R9_IS_SCRATCHED
1564   // Save also R10 to preserve alignment
1565   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1566   return 8*wordSize;
1567 #else
1568   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1569   return 6*wordSize;
1570 #endif
1571 #endif // AARCH64
1572 }
1573 
1574 void MacroAssembler::restore_caller_save_registers() {
1575 #ifdef AARCH64
1576   raw_pop(R18, LR);
1577   for (int i = 16; i >= 0; i -= 2) {
1578     raw_pop(as_Register(i), as_Register(i+1));
1579   }
1580 #else
1581 #if R9_IS_SCRATCHED
1582   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1583 #else
1584   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1585 #endif
1586 #endif // AARCH64
1587 }
1588 
1589 void MacroAssembler::debug(const char* msg, const intx* registers) {
1590   // In order to get locks to work, we need to fake a in_VM state
1591   JavaThread* thread = JavaThread::current();
1592   thread->set_thread_state(_thread_in_vm);
1593 
1594   if (ShowMessageBoxOnError) {
1595     ttyLocker ttyl;
1596     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1597       BytecodeCounter::print();
1598     }
1599     if (os::message_box(msg, "Execution stopped, print registers?")) {
1600 #ifdef AARCH64
1601       // saved registers: R0-R30, PC
1602       const int nregs = 32;
1603 #else
1604       // saved registers: R0-R12, LR, PC
1605       const int nregs = 15;
1606       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1607 #endif // AARCH64
1608 
1609       for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
1610         tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
1611       }
1612 
1613 #ifdef AARCH64
1614       tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
1615 #endif // AARCH64
1616 
1617       // derive original SP value from the address of register save area
1618       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
1619     }
1620     BREAKPOINT;
1621   } else {
1622     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1623   }
1624   assert(false, "DEBUG MESSAGE: %s", msg);
1625   fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1626 }
1627 
1628 void MacroAssembler::unimplemented(const char* what) {
1629   const char* buf = NULL;
1630   {
1631     ResourceMark rm;
1632     stringStream ss;
1633     ss.print("unimplemented: %s", what);
1634     buf = code_string(ss.as_string());
1635   }
1636   stop(buf);
1637 }
1638 
1639 
1640 // Implementation of FixedSizeCodeBlock
1641 
1642 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1643 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1644 }
1645 
1646 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1647   if (_enabled) {
1648     address curr_pc = _masm->pc();
1649 
1650     assert(_start < curr_pc, "invalid current pc");
1651     guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1652 
1653     int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1654     for (int i = 0; i < nops_count; i++) {
1655       _masm->nop();
1656     }
1657   }
1658 }
1659 
1660 #ifdef AARCH64
1661 
1662 // Serializes memory.
1663 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
1664 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
1665   if (!os::is_MP()) return;
1666 
1667   // TODO-AARCH64 investigate dsb vs dmb effects
1668   if (order_constraint == StoreStore) {
1669     dmb(DMB_st);
1670   } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
1671     dmb(DMB_ld);
1672   } else {
1673     dmb(DMB_all);
1674   }
1675 }
1676 
1677 #else
1678 
1679 // Serializes memory. Potentially blows flags and reg.
1680 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
1681 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1682 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
1683 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1684                             Register tmp,
1685                             bool preserve_flags,
1686                             Register load_tgt) {
1687   if (!os::is_MP()) return;
1688 
1689   if (order_constraint == StoreStore) {
1690     dmb(DMB_st, tmp);
1691   } else if ((order_constraint & StoreLoad)  ||
1692              (order_constraint & LoadLoad)   ||
1693              (order_constraint & StoreStore) ||
1694              (load_tgt == noreg)             ||
1695              preserve_flags) {
1696     dmb(DMB_all, tmp);
1697   } else {
1698     // LoadStore: speculative stores reordeing is prohibited
1699 
1700     // By providing an ordered load target register, we avoid an extra memory load reference
1701     Label not_taken;
1702     bind(not_taken);
1703     cmp(load_tgt, load_tgt);
1704     b(not_taken, ne);
1705   }
1706 }
1707 
1708 #endif // AARCH64
1709 
1710 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1711 // on failure, so fall-through can only mean success.
1712 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1713 // This is only needed for C2, which for some reason does not rety,
1714 // while C1/interpreter does.
1715 // TODO: measure if it makes a difference
1716 
1717 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1718   Register base, Register tmp, Label &slow_case,
1719   bool allow_fallthrough_on_failure, bool one_shot)
1720 {
1721 
1722   bool fallthrough_is_success = false;
1723 
1724   // ARM Litmus Test example does prefetching here.
1725   // TODO: investigate if it helps performance
1726 
1727   // The last store was to the displaced header, so to prevent
1728   // reordering we must issue a StoreStore or Release barrier before
1729   // the CAS store.
1730 
1731 #ifdef AARCH64
1732 
1733   Register Rscratch = tmp;
1734   Register Roop = base;
1735   Register mark = oldval;
1736   Register Rbox = newval;
1737   Label loop;
1738 
1739   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1740 
1741   // Instead of StoreStore here, we use store-release-exclusive below
1742 
1743   bind(loop);
1744 
1745   ldaxr(tmp, base);  // acquire
1746   cmp(tmp, oldval);
1747   b(slow_case, ne);
1748   stlxr(tmp, newval, base); // release
1749   if (one_shot) {
1750     cmp_w(tmp, 0);
1751   } else {
1752     cbnz_w(tmp, loop);
1753     fallthrough_is_success = true;
1754   }
1755 
1756   // MemBarAcquireLock would normally go here, but
1757   // we already do ldaxr+stlxr above, which has
1758   // Sequential Consistency
1759 
1760 #else
1761   membar(MacroAssembler::StoreStore, noreg);
1762 
1763   if (one_shot) {
1764     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1765     cmp(tmp, oldval);
1766     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1767     cmp(tmp, 0, eq);
1768   } else {
1769     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1770   }
1771 
1772   // MemBarAcquireLock barrier
1773   // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1774   // but that doesn't prevent a load or store from floating up between
1775   // the load and store in the CAS sequence, so play it safe and
1776   // do a full fence.
1777   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1778 #endif
1779   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1780     b(slow_case, ne);
1781   }
1782 }
1783 
1784 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1785   Register base, Register tmp, Label &slow_case,
1786   bool allow_fallthrough_on_failure, bool one_shot)
1787 {
1788 
1789   bool fallthrough_is_success = false;
1790 
1791   assert_different_registers(oldval,newval,base,tmp);
1792 
1793 #ifdef AARCH64
1794   Label loop;
1795 
1796   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1797 
1798   bind(loop);
1799   ldxr(tmp, base);
1800   cmp(tmp, oldval);
1801   b(slow_case, ne);
1802   // MemBarReleaseLock barrier
1803   stlxr(tmp, newval, base);
1804   if (one_shot) {
1805     cmp_w(tmp, 0);
1806   } else {
1807     cbnz_w(tmp, loop);
1808     fallthrough_is_success = true;
1809   }
1810 #else
1811   // MemBarReleaseLock barrier
1812   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1813   // but that doesn't prevent a load or store from floating down between
1814   // the load and store in the CAS sequence, so play it safe and
1815   // do a full fence.
1816   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1817 
1818   if (one_shot) {
1819     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1820     cmp(tmp, oldval);
1821     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1822     cmp(tmp, 0, eq);
1823   } else {
1824     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1825   }
1826 #endif
1827   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1828     b(slow_case, ne);
1829   }
1830 
1831   // ExitEnter
1832   // According to JSR-133 Cookbook, this should be StoreLoad, the same
1833   // barrier that follows volatile store.
1834   // TODO: Should be able to remove on armv8 if volatile loads
1835   // use the load-acquire instruction.
1836   membar(StoreLoad, noreg);
1837 }
1838 
1839 #ifndef PRODUCT
1840 
1841 // Preserves flags and all registers.
1842 // On SMP the updated value might not be visible to external observers without a sychronization barrier
1843 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1844   if (counter_addr != NULL) {
1845     InlinedAddress counter_addr_literal((address)counter_addr);
1846     Label done, retry;
1847     if (cond != al) {
1848       b(done, inverse(cond));
1849     }
1850 
1851 #ifdef AARCH64
1852     raw_push(R0, R1);
1853     raw_push(R2, ZR);
1854 
1855     ldr_literal(R0, counter_addr_literal);
1856 
1857     bind(retry);
1858     ldxr_w(R1, R0);
1859     add_w(R1, R1, 1);
1860     stxr_w(R2, R1, R0);
1861     cbnz_w(R2, retry);
1862 
1863     raw_pop(R2, ZR);
1864     raw_pop(R0, R1);
1865 #else
1866     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1867     ldr_literal(R0, counter_addr_literal);
1868 
1869     mrs(CPSR, Rtemp);
1870 
1871     bind(retry);
1872     ldr_s32(R1, Address(R0));
1873     add(R2, R1, 1);
1874     atomic_cas_bool(R1, R2, R0, 0, R3);
1875     b(retry, ne);
1876 
1877     msr(CPSR_fsxc, Rtemp);
1878 
1879     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1880 #endif // AARCH64
1881 
1882     b(done);
1883     bind_literal(counter_addr_literal);
1884 
1885     bind(done);
1886   }
1887 }
1888 
1889 #endif // !PRODUCT
1890 
1891 
1892 // Building block for CAS cases of biased locking: makes CAS and records statistics.
1893 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
1894 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
1895                                                  Register tmp, Label& slow_case, int* counter_addr) {
1896 
1897   cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
1898 #ifdef ASSERT
1899   breakpoint(ne); // Fallthrough only on success
1900 #endif
1901 #ifndef PRODUCT
1902   if (counter_addr != NULL) {
1903     cond_atomic_inc32(al, counter_addr);
1904   }
1905 #endif // !PRODUCT
1906 }
1907 
1908 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
1909                                          bool swap_reg_contains_mark,
1910                                          Register tmp2,
1911                                          Label& done, Label& slow_case,
1912                                          BiasedLockingCounters* counters) {
1913   // obj_reg must be preserved (at least) if the bias locking fails
1914   // tmp_reg is a temporary register
1915   // swap_reg was used as a temporary but contained a value
1916   //   that was used afterwards in some call pathes. Callers
1917   //   have been fixed so that swap_reg no longer needs to be
1918   //   saved.
1919   // Rtemp in no longer scratched
1920 
1921   assert(UseBiasedLocking, "why call this otherwise?");
1922   assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
1923   guarantee(swap_reg!=tmp_reg, "invariant");
1924   assert(tmp_reg != noreg, "must supply tmp_reg");
1925 
1926 #ifndef PRODUCT
1927   if (PrintBiasedLockingStatistics && (counters == NULL)) {
1928     counters = BiasedLocking::counters();
1929   }
1930 #endif
1931 
1932   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1933   Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
1934 
1935   // Biased locking
1936   // See whether the lock is currently biased toward our thread and
1937   // whether the epoch is still valid
1938   // Note that the runtime guarantees sufficient alignment of JavaThread
1939   // pointers to allow age to be placed into low bits
1940   // First check to see whether biasing is even enabled for this object
1941   Label cas_label;
1942 
1943   // The null check applies to the mark loading, if we need to load it.
1944   // If the mark has already been loaded in swap_reg then it has already
1945   // been performed and the offset is irrelevant.
1946   int null_check_offset = offset();
1947   if (!swap_reg_contains_mark) {
1948     ldr(swap_reg, mark_addr);
1949   }
1950 
1951   // On MP platform loads could return 'stale' values in some cases.
1952   // That is acceptable since either CAS or slow case path is taken in the worst case.
1953 
1954   andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1955   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
1956 
1957   b(cas_label, ne);
1958 
1959   // The bias pattern is present in the object's header. Need to check
1960   // whether the bias owner and the epoch are both still current.
1961   load_klass(tmp_reg, obj_reg);
1962   ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
1963   orr(tmp_reg, tmp_reg, Rthread);
1964   eor(tmp_reg, tmp_reg, swap_reg);
1965 
1966 #ifdef AARCH64
1967   ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
1968 #else
1969   bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
1970 #endif // AARCH64
1971 
1972 #ifndef PRODUCT
1973   if (counters != NULL) {
1974     cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
1975   }
1976 #endif // !PRODUCT
1977 
1978   b(done, eq);
1979 
1980   Label try_revoke_bias;
1981   Label try_rebias;
1982 
1983   // At this point we know that the header has the bias pattern and
1984   // that we are not the bias owner in the current epoch. We need to
1985   // figure out more details about the state of the header in order to
1986   // know what operations can be legally performed on the object's
1987   // header.
1988 
1989   // If the low three bits in the xor result aren't clear, that means
1990   // the prototype header is no longer biased and we have to revoke
1991   // the bias on this object.
1992   tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1993   b(try_revoke_bias, ne);
1994 
1995   // Biasing is still enabled for this data type. See whether the
1996   // epoch of the current bias is still valid, meaning that the epoch
1997   // bits of the mark word are equal to the epoch bits of the
1998   // prototype header. (Note that the prototype header's epoch bits
1999   // only change at a safepoint.) If not, attempt to rebias the object
2000   // toward the current thread. Note that we must be absolutely sure
2001   // that the current epoch is invalid in order to do this because
2002   // otherwise the manipulations it performs on the mark word are
2003   // illegal.
2004   tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
2005   b(try_rebias, ne);
2006 
2007   // tmp_reg has the age, epoch and pattern bits cleared
2008   // The remaining (owner) bits are (Thread ^ current_owner)
2009 
2010   // The epoch of the current bias is still valid but we know nothing
2011   // about the owner; it might be set or it might be clear. Try to
2012   // acquire the bias of the object using an atomic operation. If this
2013   // fails we will go in to the runtime to revoke the object's bias.
2014   // Note that we first construct the presumed unbiased header so we
2015   // don't accidentally blow away another thread's valid bias.
2016 
2017   // Note that we know the owner is not ourself. Hence, success can
2018   // only happen when the owner bits is 0
2019 
2020 #ifdef AARCH64
2021   // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
2022   // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
2023   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2024   andr(swap_reg, swap_reg, tmp2);
2025 #else
2026   // until the assembler can be made smarter, we need to make some assumptions about the values
2027   // so we can optimize this:
2028   assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
2029 
2030   mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
2031   mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
2032 #endif // AARCH64
2033 
2034   orr(tmp_reg, swap_reg, Rthread); // new mark
2035 
2036   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2037         (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
2038 
2039   // If the biasing toward our thread failed, this means that
2040   // another thread succeeded in biasing it toward itself and we
2041   // need to revoke that bias. The revocation will occur in the
2042   // interpreter runtime in the slow case.
2043 
2044   b(done);
2045 
2046   bind(try_rebias);
2047 
2048   // At this point we know the epoch has expired, meaning that the
2049   // current "bias owner", if any, is actually invalid. Under these
2050   // circumstances _only_, we are allowed to use the current header's
2051   // value as the comparison value when doing the cas to acquire the
2052   // bias in the current epoch. In other words, we allow transfer of
2053   // the bias from one thread to another directly in this situation.
2054 
2055   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2056 
2057   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2058 
2059   // owner bits 'random'. Set them to Rthread.
2060 #ifdef AARCH64
2061   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2062   andr(tmp_reg, tmp_reg, tmp2);
2063 #else
2064   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2065   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2066 #endif // AARCH64
2067 
2068   orr(tmp_reg, tmp_reg, Rthread); // new mark
2069 
2070   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2071         (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
2072 
2073   // If the biasing toward our thread failed, then another thread
2074   // succeeded in biasing it toward itself and we need to revoke that
2075   // bias. The revocation will occur in the runtime in the slow case.
2076 
2077   b(done);
2078 
2079   bind(try_revoke_bias);
2080 
2081   // The prototype mark in the klass doesn't have the bias bit set any
2082   // more, indicating that objects of this data type are not supposed
2083   // to be biased any more. We are going to try to reset the mark of
2084   // this object to the prototype value and fall through to the
2085   // CAS-based locking scheme. Note that if our CAS fails, it means
2086   // that another thread raced us for the privilege of revoking the
2087   // bias of this particular object, so it's okay to continue in the
2088   // normal locking code.
2089 
2090   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2091 
2092   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2093 
2094   // owner bits 'random'. Clear them
2095 #ifdef AARCH64
2096   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2097   andr(tmp_reg, tmp_reg, tmp2);
2098 #else
2099   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2100   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2101 #endif // AARCH64
2102 
2103   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
2104         (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
2105 
2106   // Fall through to the normal CAS-based lock, because no matter what
2107   // the result of the above CAS, some thread must have succeeded in
2108   // removing the bias bit from the object's header.
2109 
2110   bind(cas_label);
2111 
2112   return null_check_offset;
2113 }
2114 
2115 
2116 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
2117   assert(UseBiasedLocking, "why call this otherwise?");
2118 
2119   // Check for biased locking unlock case, which is a no-op
2120   // Note: we do not have to check the thread ID for two reasons.
2121   // First, the interpreter checks for IllegalMonitorStateException at
2122   // a higher level. Second, if the bias was revoked while we held the
2123   // lock, the object could not be rebiased toward another thread, so
2124   // the bias bit would be clear.
2125   ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2126 
2127   andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2128   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
2129   b(done, eq);
2130 }
2131 
2132 
2133 void MacroAssembler::resolve_jobject(Register value,
2134                                      Register tmp1,
2135                                      Register tmp2) {
2136   assert_different_registers(value, tmp1, tmp2);
2137   Label done, not_weak;
2138   cbz(value, done);             // Use NULL as-is.
2139   STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
2140   tbz(value, 0, not_weak);      // Test for jweak tag.
2141 
2142   // Resolve jweak.
2143   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
2144                  Address(value, -JNIHandles::weak_tag_value), value, tmp1, tmp2, noreg);
2145   b(done);
2146   bind(not_weak);
2147   // Resolve (untagged) jobject.
2148   access_load_at(T_OBJECT, IN_NATIVE,
2149                  Address(value, 0), value, tmp1, tmp2, noreg);
2150   verify_oop(value);
2151   bind(done);
2152 }
2153 
2154 
2155 //////////////////////////////////////////////////////////////////////////////////
2156 
2157 #ifdef AARCH64
2158 
2159 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
2160   switch (size_in_bytes) {
2161     case  8: ldr(dst, src); break;
2162     case  4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
2163     case  2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
2164     case  1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
2165     default: ShouldNotReachHere();
2166   }
2167 }
2168 
2169 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2170   switch (size_in_bytes) {
2171     case  8: str(src, dst);    break;
2172     case  4: str_32(src, dst); break;
2173     case  2: strh(src, dst);   break;
2174     case  1: strb(src, dst);   break;
2175     default: ShouldNotReachHere();
2176   }
2177 }
2178 
2179 #else
2180 
2181 void MacroAssembler::load_sized_value(Register dst, Address src,
2182                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
2183   switch (size_in_bytes) {
2184     case  4: ldr(dst, src, cond); break;
2185     case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
2186     case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
2187     default: ShouldNotReachHere();
2188   }
2189 }
2190 
2191 
2192 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
2193   switch (size_in_bytes) {
2194     case  4: str(src, dst, cond); break;
2195     case  2: strh(src, dst, cond);   break;
2196     case  1: strb(src, dst, cond);   break;
2197     default: ShouldNotReachHere();
2198   }
2199 }
2200 #endif // AARCH64
2201 
2202 // Look up the method for a megamorphic invokeinterface call.
2203 // The target method is determined by <Rinterf, Rindex>.
2204 // The receiver klass is in Rklass.
2205 // On success, the result will be in method_result, and execution falls through.
2206 // On failure, execution transfers to the given label.
2207 void MacroAssembler::lookup_interface_method(Register Rklass,
2208                                              Register Rintf,
2209                                              RegisterOrConstant itable_index,
2210                                              Register method_result,
2211                                              Register Rscan,
2212                                              Register Rtmp,
2213                                              Label& L_no_such_interface) {
2214 
2215   assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
2216 
2217   const int entry_size = itableOffsetEntry::size() * HeapWordSize;
2218   assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience");
2219 
2220   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2221   const int base = in_bytes(Klass::vtable_start_offset());
2222   const int scale = exact_log2(vtableEntry::size_in_bytes());
2223   ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
2224   add(Rscan, Rklass, base);
2225   add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
2226 
2227   // Search through the itable for an interface equal to incoming Rintf
2228   // itable looks like [intface][offset][intface][offset][intface][offset]
2229 
2230   Label loop;
2231   bind(loop);
2232   ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
2233 #ifdef AARCH64
2234   Label found;
2235   cmp(Rtmp, Rintf);
2236   b(found, eq);
2237   cbnz(Rtmp, loop);
2238 #else
2239   cmp(Rtmp, Rintf);  // set ZF and CF if interface is found
2240   cmn(Rtmp, 0, ne);  // check if tmp == 0 and clear CF if it is
2241   b(loop, ne);
2242 #endif // AARCH64
2243 
2244 #ifdef AARCH64
2245   b(L_no_such_interface);
2246   bind(found);
2247 #else
2248   // CF == 0 means we reached the end of itable without finding icklass
2249   b(L_no_such_interface, cc);
2250 #endif // !AARCH64
2251 
2252   if (method_result != noreg) {
2253     // Interface found at previous position of Rscan, now load the method
2254     ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size));
2255     if (itable_index.is_register()) {
2256       add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
2257       assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
2258       assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
2259       ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
2260     } else {
2261       int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
2262                           itableMethodEntry::method_offset_in_bytes();
2263       add_slow(method_result, Rklass, method_offset);
2264       ldr(method_result, Address(method_result, Rtmp));
2265     }
2266   }
2267 }
2268 
2269 #ifdef COMPILER2
2270 // TODO: 8 bytes at a time? pre-fetch?
2271 // Compare char[] arrays aligned to 4 bytes.
2272 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
2273                                         Register limit, Register result,
2274                                       Register chr1, Register chr2, Label& Ldone) {
2275   Label Lvector, Lloop;
2276 
2277   // if (ary1 == ary2)
2278   //     return true; 
2279   cmpoop(ary1, ary2);
2280   b(Ldone, eq);
2281 
2282   // Note: limit contains number of bytes (2*char_elements) != 0.
2283   tst(limit, 0x2); // trailing character ?
2284   b(Lvector, eq);
2285 
2286   // compare the trailing char
2287   sub(limit, limit, sizeof(jchar));
2288   ldrh(chr1, Address(ary1, limit));
2289   ldrh(chr2, Address(ary2, limit));
2290   cmp(chr1, chr2);
2291   mov(result, 0, ne);     // not equal
2292   b(Ldone, ne);
2293 
2294   // only one char ?
2295   tst(limit, limit);
2296   mov(result, 1, eq);
2297   b(Ldone, eq);
2298 
2299   // word by word compare, dont't need alignment check
2300   bind(Lvector);
2301 
2302   // Shift ary1 and ary2 to the end of the arrays, negate limit
2303   add(ary1, limit, ary1);
2304   add(ary2, limit, ary2);
2305   neg(limit, limit);
2306 
2307   bind(Lloop);
2308   ldr_u32(chr1, Address(ary1, limit));
2309   ldr_u32(chr2, Address(ary2, limit));
2310   cmp_32(chr1, chr2);
2311   mov(result, 0, ne);     // not equal
2312   b(Ldone, ne);
2313   adds(limit, limit, 2*sizeof(jchar));
2314   b(Lloop, ne);
2315 
2316   // Caller should set it:
2317   // mov(result_reg, 1);  //equal
2318 }
2319 #endif
2320 
2321 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
2322   mov_slow(tmpreg1, counter_addr);
2323   ldr_s32(tmpreg2, tmpreg1);
2324   add_32(tmpreg2, tmpreg2, 1);
2325   str_32(tmpreg2, tmpreg1);
2326 }
2327 
2328 void MacroAssembler::floating_cmp(Register dst) {
2329 #ifdef AARCH64
2330   NOT_TESTED();
2331   cset(dst, gt);            // 1 if '>', else 0
2332   csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
2333 #else
2334   vmrs(dst, FPSCR);
2335   orr(dst, dst, 0x08000000);
2336   eor(dst, dst, AsmOperand(dst, lsl, 3));
2337   mov(dst, AsmOperand(dst, asr, 30));
2338 #endif
2339 }
2340 
2341 void MacroAssembler::restore_default_fp_mode() {
2342 #ifdef AARCH64
2343   msr(SysReg_FPCR, ZR);
2344 #else
2345 #ifndef __SOFTFP__
2346   // Round to Near mode, IEEE compatible, masked exceptions
2347   mov(Rtemp, 0);
2348   vmsr(FPSCR, Rtemp);
2349 #endif // !__SOFTFP__
2350 #endif // AARCH64
2351 }
2352 
2353 #ifndef AARCH64
2354 // 24-bit word range == 26-bit byte range
2355 bool check26(int offset) {
2356   // this could be simplified, but it mimics encoding and decoding
2357   // an actual branch insrtuction
2358   int off1 = offset << 6 >> 8;
2359   int encoded = off1 & ((1<<24)-1);
2360   int decoded = encoded << 8 >> 6;
2361   return offset == decoded;
2362 }
2363 #endif // !AARCH64
2364 
2365 // Perform some slight adjustments so the default 32MB code cache
2366 // is fully reachable.
2367 static inline address first_cache_address() {
2368   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
2369 }
2370 static inline address last_cache_address() {
2371   return CodeCache::high_bound() - Assembler::InstructionSize;
2372 }
2373 
2374 #ifdef AARCH64
2375 // Can we reach target using ADRP?
2376 bool MacroAssembler::page_reachable_from_cache(address target) {
2377   intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
2378   intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
2379   intptr_t addr = (intptr_t)target & ~0xfff;
2380 
2381   intptr_t loffset = addr - cl;
2382   intptr_t hoffset = addr - ch;
2383   return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
2384 }
2385 #endif
2386 
2387 // Can we reach target using unconditional branch or call from anywhere
2388 // in the code cache (because code can be relocated)?
2389 bool MacroAssembler::_reachable_from_cache(address target) {
2390 #ifdef __thumb__
2391   if ((1 & (intptr_t)target) != 0) {
2392     // Return false to avoid 'b' if we need switching to THUMB mode.
2393     return false;
2394   }
2395 #endif
2396 
2397   address cl = first_cache_address();
2398   address ch = last_cache_address();
2399 
2400   if (ForceUnreachable) {
2401     // Only addresses from CodeCache can be treated as reachable.
2402     if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
2403       return false;
2404     }
2405   }
2406 
2407   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
2408   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
2409 
2410 #ifdef AARCH64
2411   return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
2412 #else
2413   return check26(loffset - 8) && check26(hoffset - 8);
2414 #endif
2415 }
2416 
2417 bool MacroAssembler::reachable_from_cache(address target) {
2418   assert(CodeCache::contains(pc()), "not supported");
2419   return _reachable_from_cache(target);
2420 }
2421 
2422 // Can we reach the entire code cache from anywhere else in the code cache?
2423 bool MacroAssembler::_cache_fully_reachable() {
2424   address cl = first_cache_address();
2425   address ch = last_cache_address();
2426   return _reachable_from_cache(cl) && _reachable_from_cache(ch);
2427 }
2428 
2429 bool MacroAssembler::cache_fully_reachable() {
2430   assert(CodeCache::contains(pc()), "not supported");
2431   return _cache_fully_reachable();
2432 }
2433 
2434 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2435   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2436   if (reachable_from_cache(target)) {
2437     relocate(rtype);
2438     b(target NOT_AARCH64_ARG(cond));
2439     return;
2440   }
2441 
2442   // Note: relocate is not needed for the code below,
2443   // encoding targets in absolute format.
2444   if (ignore_non_patchable_relocations()) {
2445     rtype = relocInfo::none;
2446   }
2447 
2448 #ifdef AARCH64
2449   assert (scratch != noreg, "should be specified");
2450   InlinedAddress address_literal(target, rtype);
2451   ldr_literal(scratch, address_literal);
2452   br(scratch);
2453   int off = offset();
2454   bind_literal(address_literal);
2455 #ifdef COMPILER2
2456   if (offset() - off == wordSize) {
2457     // no padding, so insert nop for worst-case sizing
2458     nop();
2459   }
2460 #endif
2461 #else
2462   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
2463     // Note: this version cannot be (atomically) patched
2464     mov_slow(scratch, (intptr_t)target, cond);
2465     bx(scratch, cond);
2466   } else {
2467     Label skip;
2468     InlinedAddress address_literal(target);
2469     if (cond != al) {
2470       b(skip, inverse(cond));
2471     }
2472     relocate(rtype);
2473     ldr_literal(PC, address_literal);
2474     bind_literal(address_literal);
2475     bind(skip);
2476   }
2477 #endif // AARCH64
2478 }
2479 
2480 // Similar to jump except that:
2481 // - near calls are valid only if any destination in the cache is near
2482 // - no movt/movw (not atomically patchable)
2483 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2484   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2485   if (cache_fully_reachable()) {
2486     // Note: this assumes that all possible targets (the initial one
2487     // and the addressed patched to) are all in the code cache.
2488     assert(CodeCache::contains(target), "target might be too far");
2489     relocate(rtype);
2490     b(target NOT_AARCH64_ARG(cond));
2491     return;
2492   }
2493 
2494   // Discard the relocation information if not needed for CacheCompiledCode
2495   // since the next encodings are all in absolute format.
2496   if (ignore_non_patchable_relocations()) {
2497     rtype = relocInfo::none;
2498   }
2499 
2500 #ifdef AARCH64
2501   assert (scratch != noreg, "should be specified");
2502   InlinedAddress address_literal(target);
2503   relocate(rtype);
2504   ldr_literal(scratch, address_literal);
2505   br(scratch);
2506   int off = offset();
2507   bind_literal(address_literal);
2508 #ifdef COMPILER2
2509   if (offset() - off == wordSize) {
2510     // no padding, so insert nop for worst-case sizing
2511     nop();
2512   }
2513 #endif
2514 #else
2515   {
2516     Label skip;
2517     InlinedAddress address_literal(target);
2518     if (cond != al) {
2519       b(skip, inverse(cond));
2520     }
2521     relocate(rtype);
2522     ldr_literal(PC, address_literal);
2523     bind_literal(address_literal);
2524     bind(skip);
2525   }
2526 #endif // AARCH64
2527 }
2528 
2529 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
2530   Register scratch = LR;
2531   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
2532   if (reachable_from_cache(target)) {
2533     relocate(rspec);
2534     bl(target NOT_AARCH64_ARG(cond));
2535     return;
2536   }
2537 
2538   // Note: relocate is not needed for the code below,
2539   // encoding targets in absolute format.
2540   if (ignore_non_patchable_relocations()) {
2541     // This assumes the information was needed only for relocating the code.
2542     rspec = RelocationHolder::none;
2543   }
2544 
2545 #ifndef AARCH64
2546   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
2547     // Note: this version cannot be (atomically) patched
2548     mov_slow(scratch, (intptr_t)target, cond);
2549     blx(scratch, cond);
2550     return;
2551   }
2552 #endif
2553 
2554   {
2555     Label ret_addr;
2556 #ifndef AARCH64
2557     if (cond != al) {
2558       b(ret_addr, inverse(cond));
2559     }
2560 #endif
2561 
2562 
2563 #ifdef AARCH64
2564     // TODO-AARCH64: make more optimal implementation
2565     // [ Keep in sync with MacroAssembler::call_size ]
2566     assert(rspec.type() == relocInfo::none, "call reloc not implemented");
2567     mov_slow(scratch, target);
2568     blr(scratch);
2569 #else
2570     InlinedAddress address_literal(target);
2571     relocate(rspec);
2572     adr(LR, ret_addr);
2573     ldr_literal(PC, address_literal);
2574 
2575     bind_literal(address_literal);
2576     bind(ret_addr);
2577 #endif
2578   }
2579 }
2580 
2581 #if defined(AARCH64) && defined(COMPILER2)
2582 int MacroAssembler::call_size(address target, bool far, bool patchable) {
2583   // FIXME: mov_slow is variable-length
2584   if (!far) return 1; // bl
2585   if (patchable) return 2;  // ldr; blr
2586   return instr_count_for_mov_slow((intptr_t)target) + 1;
2587 }
2588 #endif
2589 
2590 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
2591   assert(rspec.type() == relocInfo::static_call_type ||
2592          rspec.type() == relocInfo::none ||
2593          rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
2594 
2595   // Always generate the relocation information, needed for patching
2596   relocate(rspec); // used by NativeCall::is_call_before()
2597   if (cache_fully_reachable()) {
2598     // Note: this assumes that all possible targets (the initial one
2599     // and the addresses patched to) are all in the code cache.
2600     assert(CodeCache::contains(target), "target might be too far");
2601     bl(target);
2602   } else {
2603 #if defined(AARCH64) && defined(COMPILER2)
2604     if (c2) {
2605       // return address needs to match call_size().
2606       // no need to trash Rtemp
2607       int off = offset();
2608       Label skip_literal;
2609       InlinedAddress address_literal(target);
2610       ldr_literal(LR, address_literal);
2611       blr(LR);
2612       int ret_addr_offset = offset();
2613       assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
2614       b(skip_literal);
2615       int off2 = offset();
2616       bind_literal(address_literal);
2617       if (offset() - off2 == wordSize) {
2618         // no padding, so insert nop for worst-case sizing
2619         nop();
2620       }
2621       bind(skip_literal);
2622       return ret_addr_offset;
2623     }
2624 #endif
2625     Label ret_addr;
2626     InlinedAddress address_literal(target);
2627 #ifdef AARCH64
2628     ldr_literal(Rtemp, address_literal);
2629     adr(LR, ret_addr);
2630     br(Rtemp);
2631 #else
2632     adr(LR, ret_addr);
2633     ldr_literal(PC, address_literal);
2634 #endif
2635     bind_literal(address_literal);
2636     bind(ret_addr);
2637   }
2638   return offset();
2639 }
2640 
2641 // ((OopHandle)result).resolve();
2642 void MacroAssembler::resolve_oop_handle(Register result) {
2643   // OopHandle::resolve is an indirection.
2644   ldr(result, Address(result, 0));
2645 }
2646 
2647 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
2648   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2649   ldr(tmp, Address(method, Method::const_offset()));
2650   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
2651   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
2652   ldr(mirror, Address(tmp, mirror_offset));
2653   resolve_oop_handle(mirror);
2654 }
2655 
2656 
2657 ///////////////////////////////////////////////////////////////////////////////
2658 
2659 // Compressed pointers
2660 
2661 #ifdef AARCH64
2662 
2663 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
2664   if (UseCompressedClassPointers) {
2665     ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2666     decode_klass_not_null(dst_klass);
2667   } else {
2668     ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2669   }
2670 }
2671 
2672 #else
2673 
2674 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
2675   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
2676 }
2677 
2678 #endif // AARCH64
2679 
2680 // Blows src_klass.
2681 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
2682 #ifdef AARCH64
2683   if (UseCompressedClassPointers) {
2684     assert(src_klass != dst_oop, "not enough registers");
2685     encode_klass_not_null(src_klass);
2686     str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2687     return;
2688   }
2689 #endif // AARCH64
2690   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2691 }
2692 
2693 #ifdef AARCH64
2694 
2695 void MacroAssembler::store_klass_gap(Register dst) {
2696   if (UseCompressedClassPointers) {
2697     str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2698   }
2699 }
2700 
2701 #endif // AARCH64
2702 
2703 
2704 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2705   access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3);
2706 }
2707 
2708 // Blows src and flags.
2709 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2710   access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false);
2711 }
2712 
2713 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2714   access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true);
2715 }
2716 
2717 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
2718                                     Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) {
2719   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2720   decorators = AccessInternal::decorator_fixup(decorators);
2721   bool as_raw = (decorators & AS_RAW) != 0;
2722   if (as_raw) {
2723     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2724   } else {
2725     bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2726   }
2727 }
2728 
2729 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
2730                                      Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
2731   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2732   decorators = AccessInternal::decorator_fixup(decorators);
2733   bool as_raw = (decorators & AS_RAW) != 0;
2734   if (as_raw) {
2735     bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2736   } else {
2737     bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2738   }
2739 }
2740 
2741 
2742 #ifdef AARCH64
2743 
2744 // Algorithm must match oop.inline.hpp encode_heap_oop.
2745 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
2746   // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
2747   // Update it at modifications.
2748   assert (UseCompressedOops, "must be compressed");
2749   assert (Universe::heap() != NULL, "java heap should be initialized");
2750 #ifdef ASSERT
2751   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2752 #endif
2753   verify_oop(src);
2754   if (Universe::narrow_oop_base() == NULL) {
2755     if (Universe::narrow_oop_shift() != 0) {
2756       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2757       _lsr(dst, src, Universe::narrow_oop_shift());
2758     } else if (dst != src) {
2759       mov(dst, src);
2760     }
2761   } else {
2762     tst(src, src);
2763     csel(dst, Rheap_base, src, eq);
2764     sub(dst, dst, Rheap_base);
2765     if (Universe::narrow_oop_shift() != 0) {
2766       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2767       _lsr(dst, dst, Universe::narrow_oop_shift());
2768     }
2769   }
2770 }
2771 
2772 // Same algorithm as oop.inline.hpp decode_heap_oop.
2773 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
2774 #ifdef ASSERT
2775   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2776 #endif
2777   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2778   if (Universe::narrow_oop_base() != NULL) {
2779     tst(src, src);
2780     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2781     csel(dst, dst, ZR, ne);
2782   } else {
2783     _lsl(dst, src, Universe::narrow_oop_shift());
2784   }
2785   verify_oop(dst);
2786 }
2787 
2788 #ifdef COMPILER2
2789 // Algorithm must match oop.inline.hpp encode_heap_oop.
2790 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule
2791 // must be changed.
2792 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
2793   assert (UseCompressedOops, "must be compressed");
2794   assert (Universe::heap() != NULL, "java heap should be initialized");
2795 #ifdef ASSERT
2796   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2797 #endif
2798   verify_oop(src);
2799   if (Universe::narrow_oop_base() == NULL) {
2800     if (Universe::narrow_oop_shift() != 0) {
2801       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2802       _lsr(dst, src, Universe::narrow_oop_shift());
2803     } else if (dst != src) {
2804           mov(dst, src);
2805     }
2806   } else {
2807     sub(dst, src, Rheap_base);
2808     if (Universe::narrow_oop_shift() != 0) {
2809       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2810       _lsr(dst, dst, Universe::narrow_oop_shift());
2811     }
2812   }
2813 }
2814 
2815 // Same algorithm as oops.inline.hpp decode_heap_oop.
2816 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule
2817 // must be changed.
2818 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
2819 #ifdef ASSERT
2820   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2821 #endif
2822   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2823   if (Universe::narrow_oop_base() != NULL) {
2824     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2825   } else {
2826     _lsl(dst, src, Universe::narrow_oop_shift());
2827   }
2828   verify_oop(dst);
2829 }
2830 
2831 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2832   assert(UseCompressedClassPointers, "should only be used for compressed header");
2833   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2834   int klass_index = oop_recorder()->find_index(k);
2835   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2836 
2837   // Relocation with special format (see relocInfo_arm.hpp).
2838   relocate(rspec);
2839   narrowKlass encoded_k = Klass::encode_klass(k);
2840   movz(dst, encoded_k & 0xffff, 0);
2841   movk(dst, (encoded_k >> 16) & 0xffff, 16);
2842 }
2843 
2844 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2845   assert(UseCompressedOops, "should only be used for compressed header");
2846   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2847   int oop_index = oop_recorder()->find_index(obj);
2848   RelocationHolder rspec = oop_Relocation::spec(oop_index);
2849 
2850   relocate(rspec);
2851   movz(dst, 0xffff, 0);
2852   movk(dst, 0xffff, 16);
2853 }
2854 
2855 #endif // COMPILER2
2856 // Must preserve condition codes, or C2 encodeKlass_not_null rule
2857 // must be changed.
2858 void MacroAssembler::encode_klass_not_null(Register r) {
2859   if (Universe::narrow_klass_base() != NULL) {
2860     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
2861     assert(r != Rheap_base, "Encoding a klass in Rheap_base");
2862     mov_slow(Rheap_base, Universe::narrow_klass_base());
2863     sub(r, r, Rheap_base);
2864   }
2865   if (Universe::narrow_klass_shift() != 0) {
2866     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2867     _lsr(r, r, Universe::narrow_klass_shift());
2868   }
2869   if (Universe::narrow_klass_base() != NULL) {
2870     reinit_heapbase();
2871   }
2872 }
2873 
2874 // Must preserve condition codes, or C2 encodeKlass_not_null rule
2875 // must be changed.
2876 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
2877   if (dst == src) {
2878     encode_klass_not_null(src);
2879     return;
2880   }
2881   if (Universe::narrow_klass_base() != NULL) {
2882     mov_slow(dst, (int64_t)Universe::narrow_klass_base());
2883     sub(dst, src, dst);
2884     if (Universe::narrow_klass_shift() != 0) {
2885       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2886       _lsr(dst, dst, Universe::narrow_klass_shift());
2887     }
2888   } else {
2889     if (Universe::narrow_klass_shift() != 0) {
2890       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2891       _lsr(dst, src, Universe::narrow_klass_shift());
2892     } else {
2893       mov(dst, src);
2894     }
2895   }
2896 }
2897 
2898 // Function instr_count_for_decode_klass_not_null() counts the instructions
2899 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
2900 // when (Universe::heap() != NULL).  Hence, if the instructions they
2901 // generate change, then this method needs to be updated.
2902 int MacroAssembler::instr_count_for_decode_klass_not_null() {
2903   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
2904   assert(Universe::heap() != NULL, "java heap should be initialized");
2905   if (Universe::narrow_klass_base() != NULL) {
2906     return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
2907       1 +                                                                 // add
2908       instr_count_for_mov_slow(Universe::narrow_ptrs_base());   // reinit_heapbase() = mov_slow
2909   } else {
2910     if (Universe::narrow_klass_shift() != 0) {
2911       return 1;
2912     }
2913   }
2914   return 0;
2915 }
2916 
2917 // Must preserve condition codes, or C2 decodeKlass_not_null rule
2918 // must be changed.
2919 void MacroAssembler::decode_klass_not_null(Register r) {
2920   int off = offset();
2921   assert(UseCompressedClassPointers, "should only be used for compressed headers");
2922   assert(Universe::heap() != NULL, "java heap should be initialized");
2923   assert(r != Rheap_base, "Decoding a klass in Rheap_base");
2924   // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
2925   // Also do not verify_oop as this is called by verify_oop.
2926   if (Universe::narrow_klass_base() != NULL) {
2927     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
2928     mov_slow(Rheap_base, Universe::narrow_klass_base());
2929     add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
2930     reinit_heapbase();
2931   } else {
2932     if (Universe::narrow_klass_shift() != 0) {
2933       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2934       _lsl(r, r, Universe::narrow_klass_shift());
2935     }
2936   }
2937   assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
2938 }
2939 
2940 // Must preserve condition codes, or C2 decodeKlass_not_null rule
2941 // must be changed.
2942 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
2943   if (src == dst) {
2944     decode_klass_not_null(src);
2945     return;
2946   }
2947 
2948   assert(UseCompressedClassPointers, "should only be used for compressed headers");
2949   assert(Universe::heap() != NULL, "java heap should be initialized");
2950   assert(src != Rheap_base, "Decoding a klass in Rheap_base");
2951   assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
2952   // Also do not verify_oop as this is called by verify_oop.
2953   if (Universe::narrow_klass_base() != NULL) {
2954     mov_slow(dst, Universe::narrow_klass_base());
2955     add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
2956   } else {
2957     _lsl(dst, src, Universe::narrow_klass_shift());
2958   }
2959 }
2960 
2961 
2962 void MacroAssembler::reinit_heapbase() {
2963   if (UseCompressedOops || UseCompressedClassPointers) {
2964     if (Universe::heap() != NULL) {
2965       mov_slow(Rheap_base, Universe::narrow_ptrs_base());
2966     } else {
2967       ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
2968     }
2969   }
2970 }
2971 
2972 #ifdef ASSERT
2973 void MacroAssembler::verify_heapbase(const char* msg) {
2974   // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
2975   // Update it at modifications.
2976   assert (UseCompressedOops, "should be compressed");
2977   assert (Universe::heap() != NULL, "java heap should be initialized");
2978   if (CheckCompressedOops) {
2979     Label ok;
2980     str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
2981     raw_push(Rtemp, ZR);
2982     mrs(Rtemp, Assembler::SysReg_NZCV);
2983     str(Rtemp, Address(SP, 1 * wordSize));
2984     mov_slow(Rtemp, Universe::narrow_ptrs_base());
2985     cmp(Rheap_base, Rtemp);
2986     b(ok, eq);
2987     stop(msg);
2988     bind(ok);
2989     ldr(Rtemp, Address(SP, 1 * wordSize));
2990     msr(Assembler::SysReg_NZCV, Rtemp);
2991     raw_pop(Rtemp, ZR);
2992     str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
2993   }
2994 }
2995 #endif // ASSERT
2996 
2997 #endif // AARCH64
2998 
2999 #ifdef COMPILER2
3000 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
3001 {
3002   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3003 
3004   Register Rmark      = Rscratch2;
3005 
3006   assert(Roop != Rscratch, "");
3007   assert(Roop != Rmark, "");
3008   assert(Rbox != Rscratch, "");
3009   assert(Rbox != Rmark, "");
3010 
3011   Label fast_lock, done;
3012 
3013   if (UseBiasedLocking && !UseOptoBiasInlining) {
3014     Label failed;
3015 #ifdef AARCH64
3016     biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed);
3017 #else
3018     biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
3019 #endif
3020     bind(failed);
3021   }
3022 
3023   ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
3024   tst(Rmark, markOopDesc::unlocked_value);
3025   b(fast_lock, ne);
3026 
3027   // Check for recursive lock
3028   // See comments in InterpreterMacroAssembler::lock_object for
3029   // explanations on the fast recursive locking check.
3030 #ifdef AARCH64
3031   intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
3032   Assembler::LogicalImmediate imm(mask, false);
3033   mov(Rscratch, SP);
3034   sub(Rscratch, Rmark, Rscratch);
3035   ands(Rscratch, Rscratch, imm);
3036   // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
3037   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3038   b(done);
3039 
3040 #else
3041   // -1- test low 2 bits
3042   movs(Rscratch, AsmOperand(Rmark, lsl, 30));
3043   // -2- test (hdr - SP) if the low two bits are 0
3044   sub(Rscratch, Rmark, SP, eq);
3045   movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
3046   // If still 'eq' then recursive locking OK
3047   // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
3048   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3049   b(done);
3050 #endif
3051 
3052   bind(fast_lock);
3053   str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3054 
3055   bool allow_fallthrough_on_failure = true;
3056   bool one_shot = true;
3057   cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3058 
3059   bind(done);
3060 
3061 }
3062 
3063 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2  AARCH64_ONLY_ARG(Register Rscratch3))
3064 {
3065   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3066 
3067   Register Rmark      = Rscratch2;
3068 
3069   assert(Roop != Rscratch, "");
3070   assert(Roop != Rmark, "");
3071   assert(Rbox != Rscratch, "");
3072   assert(Rbox != Rmark, "");
3073 
3074   Label done;
3075 
3076   if (UseBiasedLocking && !UseOptoBiasInlining) {
3077     biased_locking_exit(Roop, Rscratch, done);
3078   }
3079 
3080   ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3081   // If hdr is NULL, we've got recursive locking and there's nothing more to do
3082   cmp(Rmark, 0);
3083   b(done, eq);
3084 
3085   // Restore the object header
3086   bool allow_fallthrough_on_failure = true;
3087   bool one_shot = true;
3088   cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3089 
3090   bind(done);
3091 
3092 }
3093 #endif // COMPILER2