1 /*
   2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "ci/ciEnv.hpp"
  30 #include "code/nativeInst.hpp"
  31 #include "compiler/disassembler.hpp"
  32 #include "gc/shared/barrierSet.hpp"
  33 #include "gc/shared/cardTable.hpp"
  34 #include "gc/shared/barrierSetAssembler.hpp"
  35 #include "gc/shared/cardTableBarrierSet.hpp"
  36 #include "gc/shared/collectedHeap.inline.hpp"
  37 #include "interpreter/interpreter.hpp"
  38 #include "memory/resourceArea.hpp"
  39 #include "oops/klass.inline.hpp"
  40 #include "prims/methodHandles.hpp"
  41 #include "runtime/biasedLocking.hpp"
  42 #include "runtime/interfaceSupport.inline.hpp"
  43 #include "runtime/objectMonitor.hpp"
  44 #include "runtime/os.hpp"
  45 #include "runtime/sharedRuntime.hpp"
  46 #include "runtime/stubRoutines.hpp"
  47 #include "utilities/macros.hpp"
  48 #if INCLUDE_ALL_GCS
  49 #include "gc/g1/g1BarrierSet.hpp"
  50 #include "gc/g1/g1CardTable.hpp"
  51 #include "gc/g1/g1ThreadLocalData.hpp"
  52 #include "gc/g1/heapRegion.hpp"
  53 #endif
  54 
  55 // Implementation of AddressLiteral
  56 
  57 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
  58   switch (rtype) {
  59   case relocInfo::oop_type:
  60     // Oops are a special case. Normally they would be their own section
  61     // but in cases like icBuffer they are literals in the code stream that
  62     // we don't have a section for. We use none so that we get a literal address
  63     // which is always patchable.
  64     break;
  65   case relocInfo::external_word_type:
  66     _rspec = external_word_Relocation::spec(_target);
  67     break;
  68   case relocInfo::internal_word_type:
  69     _rspec = internal_word_Relocation::spec(_target);
  70     break;
  71   case relocInfo::opt_virtual_call_type:
  72     _rspec = opt_virtual_call_Relocation::spec();
  73     break;
  74   case relocInfo::static_call_type:
  75     _rspec = static_call_Relocation::spec();
  76     break;
  77   case relocInfo::runtime_call_type:
  78     _rspec = runtime_call_Relocation::spec();
  79     break;
  80   case relocInfo::poll_type:
  81   case relocInfo::poll_return_type:
  82     _rspec = Relocation::spec_simple(rtype);
  83     break;
  84   case relocInfo::none:
  85     break;
  86   default:
  87     ShouldNotReachHere();
  88     break;
  89   }
  90 }
  91 
  92 // Initially added to the Assembler interface as a pure virtual:
  93 //   RegisterConstant delayed_value(..)
  94 // for:
  95 //   6812678 macro assembler needs delayed binding of a few constants (for 6655638)
  96 // this was subsequently modified to its present name and return type
  97 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  98                                                       Register tmp,
  99                                                       int offset) {
 100   ShouldNotReachHere();
 101   return RegisterOrConstant(-1);
 102 }
 103 
 104 
 105 #ifdef AARCH64
 106 // Note: ARM32 version is OS dependent
 107 void MacroAssembler::breakpoint(AsmCondition cond) {
 108   if (cond == al) {
 109     brk();
 110   } else {
 111     Label L;
 112     b(L, inverse(cond));
 113     brk();
 114     bind(L);
 115   }
 116 }
 117 #endif // AARCH64
 118 
 119 
 120 // virtual method calling
 121 void MacroAssembler::lookup_virtual_method(Register recv_klass,
 122                                            Register vtable_index,
 123                                            Register method_result) {
 124   const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
 125   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
 126   add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
 127   ldr(method_result, Address(recv_klass, base_offset));
 128 }
 129 
 130 
 131 // Simplified, combined version, good for typical uses.
 132 // Falls through on failure.
 133 void MacroAssembler::check_klass_subtype(Register sub_klass,
 134                                          Register super_klass,
 135                                          Register temp_reg,
 136                                          Register temp_reg2,
 137                                          Register temp_reg3,
 138                                          Label& L_success) {
 139   Label L_failure;
 140   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
 141   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
 142   bind(L_failure);
 143 };
 144 
 145 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
 146                                                    Register super_klass,
 147                                                    Register temp_reg,
 148                                                    Register temp_reg2,
 149                                                    Label* L_success,
 150                                                    Label* L_failure,
 151                                                    Label* L_slow_path) {
 152 
 153   assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
 154   const Register super_check_offset = temp_reg2;
 155 
 156   Label L_fallthrough;
 157   int label_nulls = 0;
 158   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 159   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 160   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
 161   assert(label_nulls <= 1, "at most one NULL in the batch");
 162 
 163   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 164   int sco_offset = in_bytes(Klass::super_check_offset_offset());
 165   Address super_check_offset_addr(super_klass, sco_offset);
 166 
 167   // If the pointers are equal, we are done (e.g., String[] elements).
 168   // This self-check enables sharing of secondary supertype arrays among
 169   // non-primary types such as array-of-interface.  Otherwise, each such
 170   // type would need its own customized SSA.
 171   // We move this check to the front of the fast path because many
 172   // type checks are in fact trivially successful in this manner,
 173   // so we get a nicely predicted branch right at the start of the check.
 174   cmp(sub_klass, super_klass);
 175   b(*L_success, eq);
 176 
 177   // Check the supertype display:
 178   ldr_u32(super_check_offset, super_check_offset_addr);
 179 
 180   Address super_check_addr(sub_klass, super_check_offset);
 181   ldr(temp_reg, super_check_addr);
 182   cmp(super_klass, temp_reg); // load displayed supertype
 183 
 184   // This check has worked decisively for primary supers.
 185   // Secondary supers are sought in the super_cache ('super_cache_addr').
 186   // (Secondary supers are interfaces and very deeply nested subtypes.)
 187   // This works in the same check above because of a tricky aliasing
 188   // between the super_cache and the primary super display elements.
 189   // (The 'super_check_addr' can address either, as the case requires.)
 190   // Note that the cache is updated below if it does not help us find
 191   // what we need immediately.
 192   // So if it was a primary super, we can just fail immediately.
 193   // Otherwise, it's the slow path for us (no success at this point).
 194 
 195   b(*L_success, eq);
 196   cmp_32(super_check_offset, sc_offset);
 197   if (L_failure == &L_fallthrough) {
 198     b(*L_slow_path, eq);
 199   } else {
 200     b(*L_failure, ne);
 201     if (L_slow_path != &L_fallthrough) {
 202       b(*L_slow_path);
 203     }
 204   }
 205 
 206   bind(L_fallthrough);
 207 }
 208 
 209 
 210 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
 211                                                    Register super_klass,
 212                                                    Register temp_reg,
 213                                                    Register temp2_reg,
 214                                                    Register temp3_reg,
 215                                                    Label* L_success,
 216                                                    Label* L_failure,
 217                                                    bool set_cond_codes) {
 218 #ifdef AARCH64
 219   NOT_IMPLEMENTED();
 220 #else
 221   // Note: if used by code that expects a register to be 0 on success,
 222   // this register must be temp_reg and set_cond_codes must be true
 223 
 224   Register saved_reg = noreg;
 225 
 226   // get additional tmp registers
 227   if (temp3_reg == noreg) {
 228     saved_reg = temp3_reg = LR;
 229     push(saved_reg);
 230   }
 231 
 232   assert(temp2_reg != noreg, "need all the temporary registers");
 233   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
 234 
 235   Register cmp_temp = temp_reg;
 236   Register scan_temp = temp3_reg;
 237   Register count_temp = temp2_reg;
 238 
 239   Label L_fallthrough;
 240   int label_nulls = 0;
 241   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 242   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 243   assert(label_nulls <= 1, "at most one NULL in the batch");
 244 
 245   // a couple of useful fields in sub_klass:
 246   int ss_offset = in_bytes(Klass::secondary_supers_offset());
 247   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 248   Address secondary_supers_addr(sub_klass, ss_offset);
 249   Address super_cache_addr(     sub_klass, sc_offset);
 250 
 251 #ifndef PRODUCT
 252   inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
 253 #endif
 254 
 255   // We will consult the secondary-super array.
 256   ldr(scan_temp, Address(sub_klass, ss_offset));
 257 
 258   assert(! UseCompressedOops, "search_key must be the compressed super_klass");
 259   // else search_key is the
 260   Register search_key = super_klass;
 261 
 262   // Load the array length.
 263   ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
 264   add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
 265 
 266   add(count_temp, count_temp, 1);
 267 
 268   Label L_loop, L_setnz_and_fail, L_fail;
 269 
 270   // Top of search loop
 271   bind(L_loop);
 272   // Notes:
 273   //  scan_temp starts at the array elements
 274   //  count_temp is 1+size
 275   subs(count_temp, count_temp, 1);
 276   if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
 277     // direct jump to L_failure if failed and no cleanup needed
 278     b(*L_failure, eq); // not found and
 279   } else {
 280     b(L_fail, eq); // not found in the array
 281   }
 282 
 283   // Load next super to check
 284   // In the array of super classes elements are pointer sized.
 285   int element_size = wordSize;
 286   ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
 287 
 288   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
 289   subs(cmp_temp, cmp_temp, search_key);
 290 
 291   // A miss means we are NOT a subtype and need to keep looping
 292   b(L_loop, ne);
 293 
 294   // Falling out the bottom means we found a hit; we ARE a subtype
 295 
 296   // Note: temp_reg/cmp_temp is already 0 and flag Z is set
 297 
 298   // Success.  Cache the super we found and proceed in triumph.
 299   str(super_klass, Address(sub_klass, sc_offset));
 300 
 301   if (saved_reg != noreg) {
 302     // Return success
 303     pop(saved_reg);
 304   }
 305 
 306   b(*L_success);
 307 
 308   bind(L_fail);
 309   // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
 310   if (set_cond_codes) {
 311     movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
 312   }
 313   if (saved_reg != noreg) {
 314     pop(saved_reg);
 315   }
 316   if (L_failure != &L_fallthrough) {
 317     b(*L_failure);
 318   }
 319 
 320   bind(L_fallthrough);
 321 #endif
 322 }
 323 
 324 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
 325 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
 326   assert_different_registers(params_base, params_count);
 327   add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
 328   return Address(tmp, -Interpreter::stackElementSize);
 329 }
 330 
 331 
 332 void MacroAssembler::align(int modulus) {
 333   while (offset() % modulus != 0) {
 334     nop();
 335   }
 336 }
 337 
 338 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
 339                                         Register last_java_fp,
 340                                         bool save_last_java_pc,
 341                                         Register tmp) {
 342   int pc_offset;
 343   if (last_java_fp != noreg) {
 344     // optional
 345     str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
 346     _fp_saved = true;
 347   } else {
 348     _fp_saved = false;
 349   }
 350   if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
 351 #ifdef AARCH64
 352     pc_offset = mov_pc_to(tmp);
 353     str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
 354 #else
 355     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
 356     pc_offset = offset() + VM_Version::stored_pc_adjustment();
 357 #endif
 358     _pc_saved = true;
 359   } else {
 360     _pc_saved = false;
 361     pc_offset = -1;
 362   }
 363   // According to comment in javaFrameAnchorm SP must be saved last, so that other
 364   // entries are valid when SP is set.
 365 
 366   // However, this is probably not a strong constrainst since for instance PC is
 367   // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
 368   // we now write the fields in the expected order but we have not added a StoreStore
 369   // barrier.
 370 
 371   // XXX: if the ordering is really important, PC should always be saved (without forgetting
 372   // to update oop_map offsets) and a StoreStore barrier might be needed.
 373 
 374   if (last_java_sp == noreg) {
 375     last_java_sp = SP; // always saved
 376   }
 377 #ifdef AARCH64
 378   if (last_java_sp == SP) {
 379     mov(tmp, SP);
 380     str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 381   } else {
 382     str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 383   }
 384 #else
 385   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 386 #endif
 387 
 388   return pc_offset; // for oopmaps
 389 }
 390 
 391 void MacroAssembler::reset_last_Java_frame(Register tmp) {
 392   const Register Rzero = zero_register(tmp);
 393   str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
 394   if (_fp_saved) {
 395     str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
 396   }
 397   if (_pc_saved) {
 398     str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
 399   }
 400 }
 401 
 402 
 403 // Implementation of call_VM versions
 404 
 405 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
 406   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 407   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
 408 
 409 #ifndef AARCH64
 410   // Safer to save R9 here since callers may have been written
 411   // assuming R9 survives. This is suboptimal but is not worth
 412   // optimizing for the few platforms where R9 is scratched.
 413   push(RegisterSet(R4) | R9ifScratched);
 414   mov(R4, SP);
 415   bic(SP, SP, StackAlignmentInBytes - 1);
 416 #endif // AARCH64
 417   call(entry_point, relocInfo::runtime_call_type);
 418 #ifndef AARCH64
 419   mov(SP, R4);
 420   pop(RegisterSet(R4) | R9ifScratched);
 421 #endif // AARCH64
 422 }
 423 
 424 
 425 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
 426   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 427   assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
 428 
 429   const Register tmp = Rtemp;
 430   assert_different_registers(oop_result, tmp);
 431 
 432   set_last_Java_frame(SP, FP, true, tmp);
 433 
 434 #ifdef ASSERT
 435   AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
 436 #endif // ASSERT
 437 
 438 #ifndef AARCH64
 439 #if R9_IS_SCRATCHED
 440   // Safer to save R9 here since callers may have been written
 441   // assuming R9 survives. This is suboptimal but is not worth
 442   // optimizing for the few platforms where R9 is scratched.
 443 
 444   // Note: cannot save R9 above the saved SP (some calls expect for
 445   // instance the Java stack top at the saved SP)
 446   // => once saved (with set_last_Java_frame), decrease SP before rounding to
 447   // ensure the slot at SP will be free for R9).
 448   sub(SP, SP, 4);
 449   bic(SP, SP, StackAlignmentInBytes - 1);
 450   str(R9, Address(SP, 0));
 451 #else
 452   bic(SP, SP, StackAlignmentInBytes - 1);
 453 #endif // R9_IS_SCRATCHED
 454 #endif
 455 
 456   mov(R0, Rthread);
 457   call(entry_point, relocInfo::runtime_call_type);
 458 
 459 #ifndef AARCH64
 460 #if R9_IS_SCRATCHED
 461   ldr(R9, Address(SP, 0));
 462 #endif
 463   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
 464 #endif
 465 
 466   reset_last_Java_frame(tmp);
 467 
 468   // C++ interp handles this in the interpreter
 469   check_and_handle_popframe();
 470   check_and_handle_earlyret();
 471 
 472   if (check_exceptions) {
 473     // check for pending exceptions
 474     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
 475 #ifdef AARCH64
 476     Label L;
 477     cbz(tmp, L);
 478     mov_pc_to(Rexception_pc);
 479     b(StubRoutines::forward_exception_entry());
 480     bind(L);
 481 #else
 482     cmp(tmp, 0);
 483     mov(Rexception_pc, PC, ne);
 484     b(StubRoutines::forward_exception_entry(), ne);
 485 #endif // AARCH64
 486   }
 487 
 488   // get oop result if there is one and reset the value in the thread
 489   if (oop_result->is_valid()) {
 490     get_vm_result(oop_result, tmp);
 491   }
 492 }
 493 
 494 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
 495   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 496 }
 497 
 498 
 499 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
 500   assert (arg_1 == R1, "fixed register for arg_1");
 501   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 502 }
 503 
 504 
 505 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 506   assert (arg_1 == R1, "fixed register for arg_1");
 507   assert (arg_2 == R2, "fixed register for arg_2");
 508   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 509 }
 510 
 511 
 512 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 513   assert (arg_1 == R1, "fixed register for arg_1");
 514   assert (arg_2 == R2, "fixed register for arg_2");
 515   assert (arg_3 == R3, "fixed register for arg_3");
 516   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 517 }
 518 
 519 
 520 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
 521   // Not used on ARM
 522   Unimplemented();
 523 }
 524 
 525 
 526 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
 527   // Not used on ARM
 528   Unimplemented();
 529 }
 530 
 531 
 532 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 533 // Not used on ARM
 534   Unimplemented();
 535 }
 536 
 537 
 538 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 539   // Not used on ARM
 540   Unimplemented();
 541 }
 542 
 543 // Raw call, without saving/restoring registers, exception handling, etc.
 544 // Mainly used from various stubs.
 545 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
 546   const Register tmp = Rtemp; // Rtemp free since scratched by call
 547   set_last_Java_frame(SP, FP, true, tmp);
 548 #if R9_IS_SCRATCHED
 549   if (save_R9_if_scratched) {
 550     // Note: Saving also R10 for alignment.
 551     push(RegisterSet(R9, R10));
 552   }
 553 #endif
 554   mov(R0, Rthread);
 555   call(entry_point, relocInfo::runtime_call_type);
 556 #if R9_IS_SCRATCHED
 557   if (save_R9_if_scratched) {
 558     pop(RegisterSet(R9, R10));
 559   }
 560 #endif
 561   reset_last_Java_frame(tmp);
 562 }
 563 
 564 void MacroAssembler::call_VM_leaf(address entry_point) {
 565   call_VM_leaf_helper(entry_point, 0);
 566 }
 567 
 568 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
 569   assert (arg_1 == R0, "fixed register for arg_1");
 570   call_VM_leaf_helper(entry_point, 1);
 571 }
 572 
 573 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
 574   assert (arg_1 == R0, "fixed register for arg_1");
 575   assert (arg_2 == R1, "fixed register for arg_2");
 576   call_VM_leaf_helper(entry_point, 2);
 577 }
 578 
 579 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
 580   assert (arg_1 == R0, "fixed register for arg_1");
 581   assert (arg_2 == R1, "fixed register for arg_2");
 582   assert (arg_3 == R2, "fixed register for arg_3");
 583   call_VM_leaf_helper(entry_point, 3);
 584 }
 585 
 586 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
 587   assert (arg_1 == R0, "fixed register for arg_1");
 588   assert (arg_2 == R1, "fixed register for arg_2");
 589   assert (arg_3 == R2, "fixed register for arg_3");
 590   assert (arg_4 == R3, "fixed register for arg_4");
 591   call_VM_leaf_helper(entry_point, 4);
 592 }
 593 
 594 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
 595   assert_different_registers(oop_result, tmp);
 596   ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
 597   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
 598   verify_oop(oop_result);
 599 }
 600 
 601 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
 602   assert_different_registers(metadata_result, tmp);
 603   ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
 604   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
 605 }
 606 
 607 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
 608   if (arg2.is_register()) {
 609     add(dst, arg1, arg2.as_register());
 610   } else {
 611     add(dst, arg1, arg2.as_constant());
 612   }
 613 }
 614 
 615 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
 616 #ifdef AARCH64
 617   if (c == 0) {
 618     if (rd != rn) {
 619       mov(rd, rn);
 620     }
 621     return;
 622   }
 623   if (c < 0) {
 624     sub_slow(rd, rn, -c);
 625     return;
 626   }
 627   if (c > right_n_bits(24)) {
 628     guarantee(rd != rn, "no large add_slow with only one register");
 629     mov_slow(rd, c);
 630     add(rd, rn, rd);
 631   } else {
 632     int lo = c & right_n_bits(12);
 633     int hi = (c >> 12) & right_n_bits(12);
 634     if (lo != 0) {
 635       add(rd, rn, lo, lsl0);
 636     }
 637     if (hi != 0) {
 638       add(rd, (lo == 0) ? rn : rd, hi, lsl12);
 639     }
 640   }
 641 #else
 642   // This function is used in compiler for handling large frame offsets
 643   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 644     return sub(rd, rn, (-c));
 645   }
 646   int low = c & 0x3fc;
 647   if (low != 0) {
 648     add(rd, rn, low);
 649     rn = rd;
 650   }
 651   if (c & ~0x3fc) {
 652     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
 653     add(rd, rn, c & ~0x3fc);
 654   } else if (rd != rn) {
 655     assert(c == 0, "");
 656     mov(rd, rn); // need to generate at least one move!
 657   }
 658 #endif // AARCH64
 659 }
 660 
 661 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
 662 #ifdef AARCH64
 663   if (c <= 0) {
 664     add_slow(rd, rn, -c);
 665     return;
 666   }
 667   if (c > right_n_bits(24)) {
 668     guarantee(rd != rn, "no large sub_slow with only one register");
 669     mov_slow(rd, c);
 670     sub(rd, rn, rd);
 671   } else {
 672     int lo = c & right_n_bits(12);
 673     int hi = (c >> 12) & right_n_bits(12);
 674     if (lo != 0) {
 675       sub(rd, rn, lo, lsl0);
 676     }
 677     if (hi != 0) {
 678       sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
 679     }
 680   }
 681 #else
 682   // This function is used in compiler for handling large frame offsets
 683   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 684     return add(rd, rn, (-c));
 685   }
 686   int low = c & 0x3fc;
 687   if (low != 0) {
 688     sub(rd, rn, low);
 689     rn = rd;
 690   }
 691   if (c & ~0x3fc) {
 692     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
 693     sub(rd, rn, c & ~0x3fc);
 694   } else if (rd != rn) {
 695     assert(c == 0, "");
 696     mov(rd, rn); // need to generate at least one move!
 697   }
 698 #endif // AARCH64
 699 }
 700 
 701 void MacroAssembler::mov_slow(Register rd, address addr) {
 702   // do *not* call the non relocated mov_related_address
 703   mov_slow(rd, (intptr_t)addr);
 704 }
 705 
 706 void MacroAssembler::mov_slow(Register rd, const char *str) {
 707   mov_slow(rd, (intptr_t)str);
 708 }
 709 
 710 #ifdef AARCH64
 711 
 712 // Common code for mov_slow and instr_count_for_mov_slow.
 713 // Returns number of instructions of mov_slow pattern,
 714 // generating it if non-null MacroAssembler is given.
 715 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
 716   // This code pattern is matched in NativeIntruction::is_mov_slow.
 717   // Update it at modifications.
 718 
 719   const intx mask = right_n_bits(16);
 720   // 1 movz instruction
 721   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 722     if ((c & ~(mask << base_shift)) == 0) {
 723       if (masm != NULL) {
 724         masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
 725       }
 726       return 1;
 727     }
 728   }
 729   // 1 movn instruction
 730   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 731     if (((~c) & ~(mask << base_shift)) == 0) {
 732       if (masm != NULL) {
 733         masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
 734       }
 735       return 1;
 736     }
 737   }
 738   // 1 orr instruction
 739   {
 740     LogicalImmediate imm(c, false);
 741     if (imm.is_encoded()) {
 742       if (masm != NULL) {
 743         masm->orr(rd, ZR, imm);
 744       }
 745       return 1;
 746     }
 747   }
 748   // 1 movz/movn + up to 3 movk instructions
 749   int zeroes = 0;
 750   int ones = 0;
 751   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 752     int part = (c >> base_shift) & mask;
 753     if (part == 0) {
 754       ++zeroes;
 755     } else if (part == mask) {
 756       ++ones;
 757     }
 758   }
 759   int def_bits = 0;
 760   if (ones > zeroes) {
 761     def_bits = mask;
 762   }
 763   int inst_count = 0;
 764   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 765     int part = (c >> base_shift) & mask;
 766     if (part != def_bits) {
 767       if (masm != NULL) {
 768         if (inst_count > 0) {
 769           masm->movk(rd, part, base_shift);
 770         } else {
 771           if (def_bits == 0) {
 772             masm->movz(rd, part, base_shift);
 773           } else {
 774             masm->movn(rd, ~part & mask, base_shift);
 775           }
 776         }
 777       }
 778       inst_count++;
 779     }
 780   }
 781   assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
 782   return inst_count;
 783 }
 784 
 785 void MacroAssembler::mov_slow(Register rd, intptr_t c) {
 786 #ifdef ASSERT
 787   int off = offset();
 788 #endif
 789   (void) mov_slow_helper(rd, c, this);
 790   assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
 791 }
 792 
 793 // Counts instructions generated by mov_slow(rd, c).
 794 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
 795   return mov_slow_helper(noreg, c, NULL);
 796 }
 797 
 798 int MacroAssembler::instr_count_for_mov_slow(address c) {
 799   return mov_slow_helper(noreg, (intptr_t)c, NULL);
 800 }
 801 
 802 #else
 803 
 804 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
 805   if (AsmOperand::is_rotated_imm(c)) {
 806     mov(rd, c, cond);
 807   } else if (AsmOperand::is_rotated_imm(~c)) {
 808     mvn(rd, ~c, cond);
 809   } else if (VM_Version::supports_movw()) {
 810     movw(rd, c & 0xffff, cond);
 811     if ((unsigned int)c >> 16) {
 812       movt(rd, (unsigned int)c >> 16, cond);
 813     }
 814   } else {
 815     // Find first non-zero bit
 816     int shift = 0;
 817     while ((c & (3 << shift)) == 0) {
 818       shift += 2;
 819     }
 820     // Put the least significant part of the constant
 821     int mask = 0xff << shift;
 822     mov(rd, c & mask, cond);
 823     // Add up to 3 other parts of the constant;
 824     // each of them can be represented as rotated_imm
 825     if (c & (mask << 8)) {
 826       orr(rd, rd, c & (mask << 8), cond);
 827     }
 828     if (c & (mask << 16)) {
 829       orr(rd, rd, c & (mask << 16), cond);
 830     }
 831     if (c & (mask << 24)) {
 832       orr(rd, rd, c & (mask << 24), cond);
 833     }
 834   }
 835 }
 836 
 837 #endif // AARCH64
 838 
 839 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
 840 #ifdef AARCH64
 841                              bool patchable
 842 #else
 843                              AsmCondition cond
 844 #endif
 845                              ) {
 846 
 847   if (o == NULL) {
 848 #ifdef AARCH64
 849     if (patchable) {
 850       nop();
 851     }
 852     mov(rd, ZR);
 853 #else
 854     mov(rd, 0, cond);
 855 #endif
 856     return;
 857   }
 858 
 859   if (oop_index == 0) {
 860     oop_index = oop_recorder()->allocate_oop_index(o);
 861   }
 862   relocate(oop_Relocation::spec(oop_index));
 863 
 864 #ifdef AARCH64
 865   if (patchable) {
 866     nop();
 867   }
 868   ldr(rd, pc());
 869 #else
 870   if (VM_Version::supports_movw()) {
 871     movw(rd, 0, cond);
 872     movt(rd, 0, cond);
 873   } else {
 874     ldr(rd, Address(PC), cond);
 875     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
 876     nop();
 877   }
 878 #endif
 879 }
 880 
 881 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
 882   if (o == NULL) {
 883 #ifdef AARCH64
 884     if (patchable) {
 885       nop();
 886     }
 887 #endif
 888     mov(rd, 0);
 889     return;
 890   }
 891 
 892   if (metadata_index == 0) {
 893     metadata_index = oop_recorder()->allocate_metadata_index(o);
 894   }
 895   relocate(metadata_Relocation::spec(metadata_index));
 896 
 897 #ifdef AARCH64
 898   if (patchable) {
 899     nop();
 900   }
 901 #ifdef COMPILER2
 902   if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
 903     mov_slow(rd, (address)o);
 904     return;
 905   }
 906 #endif
 907   ldr(rd, pc());
 908 #else
 909   if (VM_Version::supports_movw()) {
 910     movw(rd, ((int)o) & 0xffff);
 911     movt(rd, (unsigned int)o >> 16);
 912   } else {
 913     ldr(rd, Address(PC));
 914     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
 915     nop();
 916   }
 917 #endif // AARCH64
 918 }
 919 
 920 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
 921   Label skip_constant;
 922   union {
 923     jfloat f;
 924     jint i;
 925   } accessor;
 926   accessor.f = c;
 927 
 928 #ifdef AARCH64
 929   // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
 930   Label L;
 931   ldr_s(fd, target(L));
 932   b(skip_constant);
 933   bind(L);
 934   emit_int32(accessor.i);
 935   bind(skip_constant);
 936 #else
 937   flds(fd, Address(PC), cond);
 938   b(skip_constant);
 939   emit_int32(accessor.i);
 940   bind(skip_constant);
 941 #endif // AARCH64
 942 }
 943 
 944 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
 945   Label skip_constant;
 946   union {
 947     jdouble d;
 948     jint i[2];
 949   } accessor;
 950   accessor.d = c;
 951 
 952 #ifdef AARCH64
 953   // TODO-AARCH64 - try to optimize loading of double constants with fmov
 954   Label L;
 955   ldr_d(fd, target(L));
 956   b(skip_constant);
 957   align(wordSize);
 958   bind(L);
 959   emit_int32(accessor.i[0]);
 960   emit_int32(accessor.i[1]);
 961   bind(skip_constant);
 962 #else
 963   fldd(fd, Address(PC), cond);
 964   b(skip_constant);
 965   emit_int32(accessor.i[0]);
 966   emit_int32(accessor.i[1]);
 967   bind(skip_constant);
 968 #endif // AARCH64
 969 }
 970 
 971 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
 972   intptr_t addr = (intptr_t) address_of_global;
 973 #ifdef AARCH64
 974   assert((addr & 0x3) == 0, "address should be aligned");
 975 
 976   // FIXME: TODO
 977   if (false && page_reachable_from_cache(address_of_global)) {
 978     assert(false,"TODO: relocate");
 979     //relocate();
 980     adrp(reg, address_of_global);
 981     ldrsw(reg, Address(reg, addr & 0xfff));
 982   } else {
 983     mov_slow(reg, addr & ~0x3fff);
 984     ldrsw(reg, Address(reg, addr & 0x3fff));
 985   }
 986 #else
 987   mov_slow(reg, addr & ~0xfff);
 988   ldr(reg, Address(reg, addr & 0xfff));
 989 #endif
 990 }
 991 
 992 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
 993 #ifdef AARCH64
 994   intptr_t addr = (intptr_t) address_of_global;
 995   assert ((addr & 0x7) == 0, "address should be aligned");
 996   mov_slow(reg, addr & ~0x7fff);
 997   ldr(reg, Address(reg, addr & 0x7fff));
 998 #else
 999   ldr_global_s32(reg, address_of_global);
1000 #endif
1001 }
1002 
1003 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
1004   intptr_t addr = (intptr_t) address_of_global;
1005   mov_slow(reg, addr & ~0xfff);
1006   ldrb(reg, Address(reg, addr & 0xfff));
1007 }
1008 
1009 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
1010 #ifdef AARCH64
1011   switch (bits) {
1012     case  8: uxtb(rd, rn); break;
1013     case 16: uxth(rd, rn); break;
1014     case 32: mov_w(rd, rn); break;
1015     default: ShouldNotReachHere();
1016   }
1017 #else
1018   if (bits <= 8) {
1019     andr(rd, rn, (1 << bits) - 1);
1020   } else if (bits >= 24) {
1021     bic(rd, rn, -1 << bits);
1022   } else {
1023     mov(rd, AsmOperand(rn, lsl, 32 - bits));
1024     mov(rd, AsmOperand(rd, lsr, 32 - bits));
1025   }
1026 #endif
1027 }
1028 
1029 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
1030 #ifdef AARCH64
1031   switch (bits) {
1032     case  8: sxtb(rd, rn); break;
1033     case 16: sxth(rd, rn); break;
1034     case 32: sxtw(rd, rn); break;
1035     default: ShouldNotReachHere();
1036   }
1037 #else
1038   mov(rd, AsmOperand(rn, lsl, 32 - bits));
1039   mov(rd, AsmOperand(rd, asr, 32 - bits));
1040 #endif
1041 }
1042 
1043 #ifndef AARCH64
1044 
1045 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
1046                                Register rn_lo, Register rn_hi,
1047                                AsmCondition cond) {
1048   if (rd_lo != rn_hi) {
1049     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1050     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1051   } else if (rd_hi != rn_lo) {
1052     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1053     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1054   } else {
1055     eor(rd_lo, rd_hi, rd_lo, cond);
1056     eor(rd_hi, rd_lo, rd_hi, cond);
1057     eor(rd_lo, rd_hi, rd_lo, cond);
1058   }
1059 }
1060 
1061 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1062                                 Register rn_lo, Register rn_hi,
1063                                 AsmShift shift, Register count) {
1064   Register tmp;
1065   if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
1066     tmp = rd_lo;
1067   } else {
1068     tmp = rd_hi;
1069   }
1070   assert_different_registers(tmp, count, rn_lo, rn_hi);
1071 
1072   subs(tmp, count, 32);
1073   if (shift == lsl) {
1074     assert_different_registers(rd_hi, rn_lo);
1075     assert_different_registers(count, rd_hi);
1076     mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
1077     rsb(tmp, count, 32, mi);
1078     if (rd_hi == rn_hi) {
1079       mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1080       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1081     } else {
1082       mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1083       orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1084     }
1085     mov(rd_lo, AsmOperand(rn_lo, shift, count));
1086   } else {
1087     assert_different_registers(rd_lo, rn_hi);
1088     assert_different_registers(rd_lo, count);
1089     mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
1090     rsb(tmp, count, 32, mi);
1091     if (rd_lo == rn_lo) {
1092       mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1093       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1094     } else {
1095       mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1096       orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1097     }
1098     mov(rd_hi, AsmOperand(rn_hi, shift, count));
1099   }
1100 }
1101 
1102 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1103                                 Register rn_lo, Register rn_hi,
1104                                 AsmShift shift, int count) {
1105   assert(count != 0 && (count & ~63) == 0, "must be");
1106 
1107   if (shift == lsl) {
1108     assert_different_registers(rd_hi, rn_lo);
1109     if (count >= 32) {
1110       mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
1111       mov(rd_lo, 0);
1112     } else {
1113       mov(rd_hi, AsmOperand(rn_hi, lsl, count));
1114       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
1115       mov(rd_lo, AsmOperand(rn_lo, lsl, count));
1116     }
1117   } else {
1118     assert_different_registers(rd_lo, rn_hi);
1119     if (count >= 32) {
1120       if (count == 32) {
1121         mov(rd_lo, rn_hi);
1122       } else {
1123         mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
1124       }
1125       if (shift == asr) {
1126         mov(rd_hi, AsmOperand(rn_hi, asr, 0));
1127       } else {
1128         mov(rd_hi, 0);
1129       }
1130     } else {
1131       mov(rd_lo, AsmOperand(rn_lo, lsr, count));
1132       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
1133       mov(rd_hi, AsmOperand(rn_hi, shift, count));
1134     }
1135   }
1136 }
1137 #endif // !AARCH64
1138 
1139 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
1140   // This code pattern is matched in NativeIntruction::skip_verify_oop.
1141   // Update it at modifications.
1142   if (!VerifyOops) return;
1143 
1144   char buffer[64];
1145 #ifdef COMPILER1
1146   if (CommentedAssembly) {
1147     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1148     block_comment(buffer);
1149   }
1150 #endif
1151   const char* msg_buffer = NULL;
1152   {
1153     ResourceMark rm;
1154     stringStream ss;
1155     ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
1156     msg_buffer = code_string(ss.as_string());
1157   }
1158 
1159   save_all_registers();
1160 
1161   if (reg != R2) {
1162       mov(R2, reg);                              // oop to verify
1163   }
1164   mov(R1, SP);                                   // register save area
1165 
1166   Label done;
1167   InlinedString Lmsg(msg_buffer);
1168   ldr_literal(R0, Lmsg);                         // message
1169 
1170   // call indirectly to solve generation ordering problem
1171   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1172   call(Rtemp);
1173 
1174   restore_all_registers();
1175 
1176   b(done);
1177 #ifdef COMPILER2
1178   int off = offset();
1179 #endif
1180   bind_literal(Lmsg);
1181 #ifdef COMPILER2
1182   if (offset() - off == 1 * wordSize) {
1183     // no padding, so insert nop for worst-case sizing
1184     nop();
1185   }
1186 #endif
1187   bind(done);
1188 }
1189 
1190 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
1191   if (!VerifyOops) return;
1192 
1193   const char* msg_buffer = NULL;
1194   {
1195     ResourceMark rm;
1196     stringStream ss;
1197     if ((addr.base() == SP) && (addr.index()==noreg)) {
1198       ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
1199     } else {
1200       ss.print("verify_oop_addr: %s", s);
1201     }
1202     ss.print(" (%s:%d)", file, line);
1203     msg_buffer = code_string(ss.as_string());
1204   }
1205 
1206   int push_size = save_all_registers();
1207 
1208   if (addr.base() == SP) {
1209     // computes an addr that takes into account the push
1210     if (addr.index() != noreg) {
1211       Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
1212       add(new_base, SP, push_size);
1213       addr = addr.rebase(new_base);
1214     } else {
1215       addr = addr.plus_disp(push_size);
1216     }
1217   }
1218 
1219   ldr(R2, addr);                                 // oop to verify
1220   mov(R1, SP);                                   // register save area
1221 
1222   Label done;
1223   InlinedString Lmsg(msg_buffer);
1224   ldr_literal(R0, Lmsg);                         // message
1225 
1226   // call indirectly to solve generation ordering problem
1227   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1228   call(Rtemp);
1229 
1230   restore_all_registers();
1231 
1232   b(done);
1233   bind_literal(Lmsg);
1234   bind(done);
1235 }
1236 
1237 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
1238   if (needs_explicit_null_check(offset)) {
1239 #ifdef AARCH64
1240     ldr(ZR, Address(reg));
1241 #else
1242     assert_different_registers(reg, tmp);
1243     if (tmp == noreg) {
1244       tmp = Rtemp;
1245       assert((! Thread::current()->is_Compiler_thread()) ||
1246              (! (ciEnv::current()->task() == NULL)) ||
1247              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1248              "Rtemp not available in C2"); // explicit tmp register required
1249       // XXX: could we mark the code buffer as not compatible with C2 ?
1250     }
1251     ldr(tmp, Address(reg));
1252 #endif
1253   }
1254 }
1255 
1256 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1257 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1258                                  RegisterOrConstant size_expression, Label& slow_case) {
1259   if (!Universe::heap()->supports_inline_contig_alloc()) {
1260     b(slow_case);
1261     return;
1262   }
1263 
1264   CollectedHeap* ch = Universe::heap();
1265 
1266   const Register top_addr = tmp1;
1267   const Register heap_end = tmp2;
1268 
1269   if (size_expression.is_register()) {
1270     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
1271   } else {
1272     assert_different_registers(obj, obj_end, top_addr, heap_end);
1273   }
1274 
1275   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
1276   if (load_const) {
1277     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
1278   } else {
1279     ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
1280   }
1281   // Calculate new heap_top by adding the size of the object
1282   Label retry;
1283   bind(retry);
1284 
1285 #ifdef AARCH64
1286   ldxr(obj, top_addr);
1287 #else
1288   ldr(obj, Address(top_addr));
1289 #endif // AARCH64
1290 
1291   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
1292   add_rc(obj_end, obj, size_expression);
1293   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
1294   cmp(obj_end, obj);
1295   b(slow_case, lo);
1296   // Update heap_top if allocation succeeded
1297   cmp(obj_end, heap_end);
1298   b(slow_case, hi);
1299 
1300 #ifdef AARCH64
1301   stxr(heap_end/*scratched*/, obj_end, top_addr);
1302   cbnz_w(heap_end, retry);
1303 #else
1304   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
1305   b(retry, ne);
1306 #endif // AARCH64
1307 }
1308 
1309 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1310 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1311                                  RegisterOrConstant size_expression, Label& slow_case) {
1312   const Register tlab_end = tmp1;
1313   assert_different_registers(obj, obj_end, tlab_end);
1314 
1315   ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1316   ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
1317   add_rc(obj_end, obj, size_expression);
1318   cmp(obj_end, tlab_end);
1319   b(slow_case, hi);
1320   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1321 }
1322 
1323 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
1324 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1325   Label loop;
1326   const Register ptr = start;
1327 
1328 #ifdef AARCH64
1329   // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1330   const Register size = tmp;
1331   Label remaining, done;
1332 
1333   sub(size, end, start);
1334 
1335 #ifdef ASSERT
1336   { Label L;
1337     tst(size, wordSize - 1);
1338     b(L, eq);
1339     stop("size is not a multiple of wordSize");
1340     bind(L);
1341   }
1342 #endif // ASSERT
1343 
1344   subs(size, size, wordSize);
1345   b(remaining, le);
1346 
1347   // Zero by 2 words per iteration.
1348   bind(loop);
1349   subs(size, size, 2*wordSize);
1350   stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1351   b(loop, gt);
1352 
1353   bind(remaining);
1354   b(done, ne);
1355   str(ZR, Address(ptr));
1356   bind(done);
1357 #else
1358   mov(tmp, 0);
1359   bind(loop);
1360   cmp(ptr, end);
1361   str(tmp, Address(ptr, wordSize, post_indexed), lo);
1362   b(loop, lo);
1363 #endif // AARCH64
1364 }
1365 
1366 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1367 #ifdef AARCH64
1368   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1369   add_rc(tmp, tmp, size_in_bytes);
1370   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1371 #else
1372   // Bump total bytes allocated by this thread
1373   Label done;
1374 
1375   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1376   adds(tmp, tmp, size_in_bytes);
1377   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc);
1378   b(done, cc);
1379 
1380   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1381   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1382   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1383   Register low, high;
1384   // Select ether R0/R1 or R2/R3
1385 
1386   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1387     low = R2;
1388     high  = R3;
1389   } else {
1390     low = R0;
1391     high  = R1;
1392   }
1393   push(RegisterSet(low, high));
1394 
1395   ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1396   adds(low, low, size_in_bytes);
1397   adc(high, high, 0);
1398   strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1399 
1400   pop(RegisterSet(low, high));
1401 
1402   bind(done);
1403 #endif // AARCH64
1404 }
1405 
1406 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1407   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1408   if (UseStackBanging) {
1409     const int page_size = os::vm_page_size();
1410 
1411     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1412     strb(R0, Address(tmp));
1413 #ifdef AARCH64
1414     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1415       sub(tmp, tmp, page_size);
1416       strb(R0, Address(tmp));
1417     }
1418 #else
1419     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1420       strb(R0, Address(tmp, -0xff0, pre_indexed));
1421     }
1422 #endif // AARCH64
1423   }
1424 }
1425 
1426 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
1427   if (UseStackBanging) {
1428     Label loop;
1429 
1430     mov(tmp, SP);
1431     add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
1432 #ifdef AARCH64
1433     sub(tmp, tmp, Rsize);
1434     bind(loop);
1435     subs(Rsize, Rsize, os::vm_page_size());
1436     strb(ZR, Address(tmp, Rsize));
1437 #else
1438     bind(loop);
1439     subs(Rsize, Rsize, 0xff0);
1440     strb(R0, Address(tmp, -0xff0, pre_indexed));
1441 #endif // AARCH64
1442     b(loop, hi);
1443   }
1444 }
1445 
1446 void MacroAssembler::stop(const char* msg) {
1447   // This code pattern is matched in NativeIntruction::is_stop.
1448   // Update it at modifications.
1449 #ifdef COMPILER1
1450   if (CommentedAssembly) {
1451     block_comment("stop");
1452   }
1453 #endif
1454 
1455   InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1456   InlinedString Lmsg(msg);
1457 
1458   // save all registers for further inspection
1459   save_all_registers();
1460 
1461   ldr_literal(R0, Lmsg);                     // message
1462   mov(R1, SP);                               // register save area
1463 
1464 #ifdef AARCH64
1465   ldr_literal(Rtemp, Ldebug);
1466   br(Rtemp);
1467 #else
1468   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
1469 #endif // AARCH64
1470 
1471 #if defined(COMPILER2) && defined(AARCH64)
1472   int off = offset();
1473 #endif
1474   bind_literal(Lmsg);
1475   bind_literal(Ldebug);
1476 #if defined(COMPILER2) && defined(AARCH64)
1477   if (offset() - off == 2 * wordSize) {
1478     // no padding, so insert nop for worst-case sizing
1479     nop();
1480   }
1481 #endif
1482 }
1483 
1484 void MacroAssembler::warn(const char* msg) {
1485 #ifdef COMPILER1
1486   if (CommentedAssembly) {
1487     block_comment("warn");
1488   }
1489 #endif
1490 
1491   InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1492   InlinedString Lmsg(msg);
1493   Label done;
1494 
1495   int push_size = save_caller_save_registers();
1496 
1497 #ifdef AARCH64
1498   // TODO-AARCH64 - get rid of extra debug parameters
1499   mov(R1, LR);
1500   mov(R2, FP);
1501   add(R3, SP, push_size);
1502 #endif
1503 
1504   ldr_literal(R0, Lmsg);                    // message
1505   ldr_literal(LR, Lwarn);                   // call warning
1506 
1507   call(LR);
1508 
1509   restore_caller_save_registers();
1510 
1511   b(done);
1512   bind_literal(Lmsg);
1513   bind_literal(Lwarn);
1514   bind(done);
1515 }
1516 
1517 
1518 int MacroAssembler::save_all_registers() {
1519   // This code pattern is matched in NativeIntruction::is_save_all_registers.
1520   // Update it at modifications.
1521 #ifdef AARCH64
1522   const Register tmp = Rtemp;
1523   raw_push(R30, ZR);
1524   for (int i = 28; i >= 0; i -= 2) {
1525       raw_push(as_Register(i), as_Register(i+1));
1526   }
1527   mov_pc_to(tmp);
1528   str(tmp, Address(SP, 31*wordSize));
1529   ldr(tmp, Address(SP, tmp->encoding()*wordSize));
1530   return 32*wordSize;
1531 #else
1532   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1533   return 15*wordSize;
1534 #endif // AARCH64
1535 }
1536 
1537 void MacroAssembler::restore_all_registers() {
1538 #ifdef AARCH64
1539   for (int i = 0; i <= 28; i += 2) {
1540     raw_pop(as_Register(i), as_Register(i+1));
1541   }
1542   raw_pop(R30, ZR);
1543 #else
1544   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
1545   add(SP, SP, wordSize);                         // discard saved PC
1546 #endif // AARCH64
1547 }
1548 
1549 int MacroAssembler::save_caller_save_registers() {
1550 #ifdef AARCH64
1551   for (int i = 0; i <= 16; i += 2) {
1552     raw_push(as_Register(i), as_Register(i+1));
1553   }
1554   raw_push(R18, LR);
1555   return 20*wordSize;
1556 #else
1557 #if R9_IS_SCRATCHED
1558   // Save also R10 to preserve alignment
1559   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1560   return 8*wordSize;
1561 #else
1562   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1563   return 6*wordSize;
1564 #endif
1565 #endif // AARCH64
1566 }
1567 
1568 void MacroAssembler::restore_caller_save_registers() {
1569 #ifdef AARCH64
1570   raw_pop(R18, LR);
1571   for (int i = 16; i >= 0; i -= 2) {
1572     raw_pop(as_Register(i), as_Register(i+1));
1573   }
1574 #else
1575 #if R9_IS_SCRATCHED
1576   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1577 #else
1578   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1579 #endif
1580 #endif // AARCH64
1581 }
1582 
1583 void MacroAssembler::debug(const char* msg, const intx* registers) {
1584   // In order to get locks to work, we need to fake a in_VM state
1585   JavaThread* thread = JavaThread::current();
1586   thread->set_thread_state(_thread_in_vm);
1587 
1588   if (ShowMessageBoxOnError) {
1589     ttyLocker ttyl;
1590     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1591       BytecodeCounter::print();
1592     }
1593     if (os::message_box(msg, "Execution stopped, print registers?")) {
1594 #ifdef AARCH64
1595       // saved registers: R0-R30, PC
1596       const int nregs = 32;
1597 #else
1598       // saved registers: R0-R12, LR, PC
1599       const int nregs = 15;
1600       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1601 #endif // AARCH64
1602 
1603       for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
1604         tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
1605       }
1606 
1607 #ifdef AARCH64
1608       tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
1609 #endif // AARCH64
1610 
1611       // derive original SP value from the address of register save area
1612       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
1613     }
1614     BREAKPOINT;
1615   } else {
1616     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1617   }
1618   assert(false, "DEBUG MESSAGE: %s", msg);
1619   fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1620 }
1621 
1622 void MacroAssembler::unimplemented(const char* what) {
1623   const char* buf = NULL;
1624   {
1625     ResourceMark rm;
1626     stringStream ss;
1627     ss.print("unimplemented: %s", what);
1628     buf = code_string(ss.as_string());
1629   }
1630   stop(buf);
1631 }
1632 
1633 
1634 // Implementation of FixedSizeCodeBlock
1635 
1636 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1637 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1638 }
1639 
1640 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1641   if (_enabled) {
1642     address curr_pc = _masm->pc();
1643 
1644     assert(_start < curr_pc, "invalid current pc");
1645     guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1646 
1647     int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1648     for (int i = 0; i < nops_count; i++) {
1649       _masm->nop();
1650     }
1651   }
1652 }
1653 
1654 #ifdef AARCH64
1655 
1656 // Serializes memory.
1657 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
1658 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
1659   if (!os::is_MP()) return;
1660 
1661   // TODO-AARCH64 investigate dsb vs dmb effects
1662   if (order_constraint == StoreStore) {
1663     dmb(DMB_st);
1664   } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
1665     dmb(DMB_ld);
1666   } else {
1667     dmb(DMB_all);
1668   }
1669 }
1670 
1671 #else
1672 
1673 // Serializes memory. Potentially blows flags and reg.
1674 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
1675 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1676 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
1677 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1678                             Register tmp,
1679                             bool preserve_flags,
1680                             Register load_tgt) {
1681   if (!os::is_MP()) return;
1682 
1683   if (order_constraint == StoreStore) {
1684     dmb(DMB_st, tmp);
1685   } else if ((order_constraint & StoreLoad)  ||
1686              (order_constraint & LoadLoad)   ||
1687              (order_constraint & StoreStore) ||
1688              (load_tgt == noreg)             ||
1689              preserve_flags) {
1690     dmb(DMB_all, tmp);
1691   } else {
1692     // LoadStore: speculative stores reordeing is prohibited
1693 
1694     // By providing an ordered load target register, we avoid an extra memory load reference
1695     Label not_taken;
1696     bind(not_taken);
1697     cmp(load_tgt, load_tgt);
1698     b(not_taken, ne);
1699   }
1700 }
1701 
1702 #endif // AARCH64
1703 
1704 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1705 // on failure, so fall-through can only mean success.
1706 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1707 // This is only needed for C2, which for some reason does not rety,
1708 // while C1/interpreter does.
1709 // TODO: measure if it makes a difference
1710 
1711 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1712   Register base, Register tmp, Label &slow_case,
1713   bool allow_fallthrough_on_failure, bool one_shot)
1714 {
1715 
1716   bool fallthrough_is_success = false;
1717 
1718   // ARM Litmus Test example does prefetching here.
1719   // TODO: investigate if it helps performance
1720 
1721   // The last store was to the displaced header, so to prevent
1722   // reordering we must issue a StoreStore or Release barrier before
1723   // the CAS store.
1724 
1725 #ifdef AARCH64
1726 
1727   Register Rscratch = tmp;
1728   Register Roop = base;
1729   Register mark = oldval;
1730   Register Rbox = newval;
1731   Label loop;
1732 
1733   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1734 
1735   // Instead of StoreStore here, we use store-release-exclusive below
1736 
1737   bind(loop);
1738 
1739   ldaxr(tmp, base);  // acquire
1740   cmp(tmp, oldval);
1741   b(slow_case, ne);
1742   stlxr(tmp, newval, base); // release
1743   if (one_shot) {
1744     cmp_w(tmp, 0);
1745   } else {
1746     cbnz_w(tmp, loop);
1747     fallthrough_is_success = true;
1748   }
1749 
1750   // MemBarAcquireLock would normally go here, but
1751   // we already do ldaxr+stlxr above, which has
1752   // Sequential Consistency
1753 
1754 #else
1755   membar(MacroAssembler::StoreStore, noreg);
1756 
1757   if (one_shot) {
1758     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1759     cmp(tmp, oldval);
1760     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1761     cmp(tmp, 0, eq);
1762   } else {
1763     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1764   }
1765 
1766   // MemBarAcquireLock barrier
1767   // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1768   // but that doesn't prevent a load or store from floating up between
1769   // the load and store in the CAS sequence, so play it safe and
1770   // do a full fence.
1771   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1772 #endif
1773   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1774     b(slow_case, ne);
1775   }
1776 }
1777 
1778 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1779   Register base, Register tmp, Label &slow_case,
1780   bool allow_fallthrough_on_failure, bool one_shot)
1781 {
1782 
1783   bool fallthrough_is_success = false;
1784 
1785   assert_different_registers(oldval,newval,base,tmp);
1786 
1787 #ifdef AARCH64
1788   Label loop;
1789 
1790   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1791 
1792   bind(loop);
1793   ldxr(tmp, base);
1794   cmp(tmp, oldval);
1795   b(slow_case, ne);
1796   // MemBarReleaseLock barrier
1797   stlxr(tmp, newval, base);
1798   if (one_shot) {
1799     cmp_w(tmp, 0);
1800   } else {
1801     cbnz_w(tmp, loop);
1802     fallthrough_is_success = true;
1803   }
1804 #else
1805   // MemBarReleaseLock barrier
1806   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1807   // but that doesn't prevent a load or store from floating down between
1808   // the load and store in the CAS sequence, so play it safe and
1809   // do a full fence.
1810   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1811 
1812   if (one_shot) {
1813     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1814     cmp(tmp, oldval);
1815     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1816     cmp(tmp, 0, eq);
1817   } else {
1818     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1819   }
1820 #endif
1821   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1822     b(slow_case, ne);
1823   }
1824 
1825   // ExitEnter
1826   // According to JSR-133 Cookbook, this should be StoreLoad, the same
1827   // barrier that follows volatile store.
1828   // TODO: Should be able to remove on armv8 if volatile loads
1829   // use the load-acquire instruction.
1830   membar(StoreLoad, noreg);
1831 }
1832 
1833 #ifndef PRODUCT
1834 
1835 // Preserves flags and all registers.
1836 // On SMP the updated value might not be visible to external observers without a sychronization barrier
1837 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1838   if (counter_addr != NULL) {
1839     InlinedAddress counter_addr_literal((address)counter_addr);
1840     Label done, retry;
1841     if (cond != al) {
1842       b(done, inverse(cond));
1843     }
1844 
1845 #ifdef AARCH64
1846     raw_push(R0, R1);
1847     raw_push(R2, ZR);
1848 
1849     ldr_literal(R0, counter_addr_literal);
1850 
1851     bind(retry);
1852     ldxr_w(R1, R0);
1853     add_w(R1, R1, 1);
1854     stxr_w(R2, R1, R0);
1855     cbnz_w(R2, retry);
1856 
1857     raw_pop(R2, ZR);
1858     raw_pop(R0, R1);
1859 #else
1860     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1861     ldr_literal(R0, counter_addr_literal);
1862 
1863     mrs(CPSR, Rtemp);
1864 
1865     bind(retry);
1866     ldr_s32(R1, Address(R0));
1867     add(R2, R1, 1);
1868     atomic_cas_bool(R1, R2, R0, 0, R3);
1869     b(retry, ne);
1870 
1871     msr(CPSR_fsxc, Rtemp);
1872 
1873     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1874 #endif // AARCH64
1875 
1876     b(done);
1877     bind_literal(counter_addr_literal);
1878 
1879     bind(done);
1880   }
1881 }
1882 
1883 #endif // !PRODUCT
1884 
1885 
1886 // Building block for CAS cases of biased locking: makes CAS and records statistics.
1887 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
1888 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
1889                                                  Register tmp, Label& slow_case, int* counter_addr) {
1890 
1891   cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
1892 #ifdef ASSERT
1893   breakpoint(ne); // Fallthrough only on success
1894 #endif
1895 #ifndef PRODUCT
1896   if (counter_addr != NULL) {
1897     cond_atomic_inc32(al, counter_addr);
1898   }
1899 #endif // !PRODUCT
1900 }
1901 
1902 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
1903                                          bool swap_reg_contains_mark,
1904                                          Register tmp2,
1905                                          Label& done, Label& slow_case,
1906                                          BiasedLockingCounters* counters) {
1907   // obj_reg must be preserved (at least) if the bias locking fails
1908   // tmp_reg is a temporary register
1909   // swap_reg was used as a temporary but contained a value
1910   //   that was used afterwards in some call pathes. Callers
1911   //   have been fixed so that swap_reg no longer needs to be
1912   //   saved.
1913   // Rtemp in no longer scratched
1914 
1915   assert(UseBiasedLocking, "why call this otherwise?");
1916   assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
1917   guarantee(swap_reg!=tmp_reg, "invariant");
1918   assert(tmp_reg != noreg, "must supply tmp_reg");
1919 
1920 #ifndef PRODUCT
1921   if (PrintBiasedLockingStatistics && (counters == NULL)) {
1922     counters = BiasedLocking::counters();
1923   }
1924 #endif
1925 
1926   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1927   Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
1928 
1929   // Biased locking
1930   // See whether the lock is currently biased toward our thread and
1931   // whether the epoch is still valid
1932   // Note that the runtime guarantees sufficient alignment of JavaThread
1933   // pointers to allow age to be placed into low bits
1934   // First check to see whether biasing is even enabled for this object
1935   Label cas_label;
1936 
1937   // The null check applies to the mark loading, if we need to load it.
1938   // If the mark has already been loaded in swap_reg then it has already
1939   // been performed and the offset is irrelevant.
1940   int null_check_offset = offset();
1941   if (!swap_reg_contains_mark) {
1942     ldr(swap_reg, mark_addr);
1943   }
1944 
1945   // On MP platform loads could return 'stale' values in some cases.
1946   // That is acceptable since either CAS or slow case path is taken in the worst case.
1947 
1948   andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1949   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
1950 
1951   b(cas_label, ne);
1952 
1953   // The bias pattern is present in the object's header. Need to check
1954   // whether the bias owner and the epoch are both still current.
1955   load_klass(tmp_reg, obj_reg);
1956   ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
1957   orr(tmp_reg, tmp_reg, Rthread);
1958   eor(tmp_reg, tmp_reg, swap_reg);
1959 
1960 #ifdef AARCH64
1961   ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
1962 #else
1963   bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
1964 #endif // AARCH64
1965 
1966 #ifndef PRODUCT
1967   if (counters != NULL) {
1968     cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
1969   }
1970 #endif // !PRODUCT
1971 
1972   b(done, eq);
1973 
1974   Label try_revoke_bias;
1975   Label try_rebias;
1976 
1977   // At this point we know that the header has the bias pattern and
1978   // that we are not the bias owner in the current epoch. We need to
1979   // figure out more details about the state of the header in order to
1980   // know what operations can be legally performed on the object's
1981   // header.
1982 
1983   // If the low three bits in the xor result aren't clear, that means
1984   // the prototype header is no longer biased and we have to revoke
1985   // the bias on this object.
1986   tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1987   b(try_revoke_bias, ne);
1988 
1989   // Biasing is still enabled for this data type. See whether the
1990   // epoch of the current bias is still valid, meaning that the epoch
1991   // bits of the mark word are equal to the epoch bits of the
1992   // prototype header. (Note that the prototype header's epoch bits
1993   // only change at a safepoint.) If not, attempt to rebias the object
1994   // toward the current thread. Note that we must be absolutely sure
1995   // that the current epoch is invalid in order to do this because
1996   // otherwise the manipulations it performs on the mark word are
1997   // illegal.
1998   tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
1999   b(try_rebias, ne);
2000 
2001   // tmp_reg has the age, epoch and pattern bits cleared
2002   // The remaining (owner) bits are (Thread ^ current_owner)
2003 
2004   // The epoch of the current bias is still valid but we know nothing
2005   // about the owner; it might be set or it might be clear. Try to
2006   // acquire the bias of the object using an atomic operation. If this
2007   // fails we will go in to the runtime to revoke the object's bias.
2008   // Note that we first construct the presumed unbiased header so we
2009   // don't accidentally blow away another thread's valid bias.
2010 
2011   // Note that we know the owner is not ourself. Hence, success can
2012   // only happen when the owner bits is 0
2013 
2014 #ifdef AARCH64
2015   // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
2016   // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
2017   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2018   andr(swap_reg, swap_reg, tmp2);
2019 #else
2020   // until the assembler can be made smarter, we need to make some assumptions about the values
2021   // so we can optimize this:
2022   assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
2023 
2024   mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
2025   mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
2026 #endif // AARCH64
2027 
2028   orr(tmp_reg, swap_reg, Rthread); // new mark
2029 
2030   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2031         (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
2032 
2033   // If the biasing toward our thread failed, this means that
2034   // another thread succeeded in biasing it toward itself and we
2035   // need to revoke that bias. The revocation will occur in the
2036   // interpreter runtime in the slow case.
2037 
2038   b(done);
2039 
2040   bind(try_rebias);
2041 
2042   // At this point we know the epoch has expired, meaning that the
2043   // current "bias owner", if any, is actually invalid. Under these
2044   // circumstances _only_, we are allowed to use the current header's
2045   // value as the comparison value when doing the cas to acquire the
2046   // bias in the current epoch. In other words, we allow transfer of
2047   // the bias from one thread to another directly in this situation.
2048 
2049   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2050 
2051   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2052 
2053   // owner bits 'random'. Set them to Rthread.
2054 #ifdef AARCH64
2055   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2056   andr(tmp_reg, tmp_reg, tmp2);
2057 #else
2058   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2059   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2060 #endif // AARCH64
2061 
2062   orr(tmp_reg, tmp_reg, Rthread); // new mark
2063 
2064   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2065         (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
2066 
2067   // If the biasing toward our thread failed, then another thread
2068   // succeeded in biasing it toward itself and we need to revoke that
2069   // bias. The revocation will occur in the runtime in the slow case.
2070 
2071   b(done);
2072 
2073   bind(try_revoke_bias);
2074 
2075   // The prototype mark in the klass doesn't have the bias bit set any
2076   // more, indicating that objects of this data type are not supposed
2077   // to be biased any more. We are going to try to reset the mark of
2078   // this object to the prototype value and fall through to the
2079   // CAS-based locking scheme. Note that if our CAS fails, it means
2080   // that another thread raced us for the privilege of revoking the
2081   // bias of this particular object, so it's okay to continue in the
2082   // normal locking code.
2083 
2084   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2085 
2086   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2087 
2088   // owner bits 'random'. Clear them
2089 #ifdef AARCH64
2090   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2091   andr(tmp_reg, tmp_reg, tmp2);
2092 #else
2093   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2094   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2095 #endif // AARCH64
2096 
2097   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
2098         (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
2099 
2100   // Fall through to the normal CAS-based lock, because no matter what
2101   // the result of the above CAS, some thread must have succeeded in
2102   // removing the bias bit from the object's header.
2103 
2104   bind(cas_label);
2105 
2106   return null_check_offset;
2107 }
2108 
2109 
2110 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
2111   assert(UseBiasedLocking, "why call this otherwise?");
2112 
2113   // Check for biased locking unlock case, which is a no-op
2114   // Note: we do not have to check the thread ID for two reasons.
2115   // First, the interpreter checks for IllegalMonitorStateException at
2116   // a higher level. Second, if the bias was revoked while we held the
2117   // lock, the object could not be rebiased toward another thread, so
2118   // the bias bit would be clear.
2119   ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2120 
2121   andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2122   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
2123   b(done, eq);
2124 }
2125 
2126 
2127 void MacroAssembler::resolve_jobject(Register value,
2128                                      Register tmp1,
2129                                      Register tmp2) {
2130   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2131 
2132   assert_different_registers(value, tmp1, tmp2);
2133   Label done, not_weak;
2134   cbz(value, done);             // Use NULL as-is.
2135   STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
2136   tbz(value, 0, not_weak);      // Test for jweak tag.
2137 
2138   // Resolve jweak.
2139   bs->load_at(this, IN_ROOT | ON_PHANTOM_OOP_REF, T_OBJECT,
2140               value, Address(value, -JNIHandles::weak_tag_value), tmp1, tmp2, noreg);
2141   b(done);
2142   bind(not_weak);
2143   // Resolve (untagged) jobject.
2144   bs->load_at(this, IN_ROOT | IN_CONCURRENT_ROOT, T_OBJECT,
2145               value, Address(value, 0), tmp1, tmp2, noreg);
2146   verify_oop(value);
2147   bind(done);
2148 }
2149 
2150 
2151 //////////////////////////////////////////////////////////////////////////////////
2152 
2153 #ifdef AARCH64
2154 
2155 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
2156   switch (size_in_bytes) {
2157     case  8: ldr(dst, src); break;
2158     case  4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
2159     case  2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
2160     case  1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
2161     default: ShouldNotReachHere();
2162   }
2163 }
2164 
2165 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2166   switch (size_in_bytes) {
2167     case  8: str(src, dst);    break;
2168     case  4: str_32(src, dst); break;
2169     case  2: strh(src, dst);   break;
2170     case  1: strb(src, dst);   break;
2171     default: ShouldNotReachHere();
2172   }
2173 }
2174 
2175 #else
2176 
2177 void MacroAssembler::load_sized_value(Register dst, Address src,
2178                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
2179   switch (size_in_bytes) {
2180     case  4: ldr(dst, src, cond); break;
2181     case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
2182     case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
2183     default: ShouldNotReachHere();
2184   }
2185 }
2186 
2187 
2188 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
2189   switch (size_in_bytes) {
2190     case  4: str(src, dst, cond); break;
2191     case  2: strh(src, dst, cond);   break;
2192     case  1: strb(src, dst, cond);   break;
2193     default: ShouldNotReachHere();
2194   }
2195 }
2196 #endif // AARCH64
2197 
2198 // Look up the method for a megamorphic invokeinterface call.
2199 // The target method is determined by <Rinterf, Rindex>.
2200 // The receiver klass is in Rklass.
2201 // On success, the result will be in method_result, and execution falls through.
2202 // On failure, execution transfers to the given label.
2203 void MacroAssembler::lookup_interface_method(Register Rklass,
2204                                              Register Rintf,
2205                                              RegisterOrConstant itable_index,
2206                                              Register method_result,
2207                                              Register Rscan,
2208                                              Register Rtmp,
2209                                              Label& L_no_such_interface) {
2210 
2211   assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
2212 
2213   const int entry_size = itableOffsetEntry::size() * HeapWordSize;
2214   assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience");
2215 
2216   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2217   const int base = in_bytes(Klass::vtable_start_offset());
2218   const int scale = exact_log2(vtableEntry::size_in_bytes());
2219   ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
2220   add(Rscan, Rklass, base);
2221   add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
2222 
2223   // Search through the itable for an interface equal to incoming Rintf
2224   // itable looks like [intface][offset][intface][offset][intface][offset]
2225 
2226   Label loop;
2227   bind(loop);
2228   ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
2229 #ifdef AARCH64
2230   Label found;
2231   cmp(Rtmp, Rintf);
2232   b(found, eq);
2233   cbnz(Rtmp, loop);
2234 #else
2235   cmp(Rtmp, Rintf);  // set ZF and CF if interface is found
2236   cmn(Rtmp, 0, ne);  // check if tmp == 0 and clear CF if it is
2237   b(loop, ne);
2238 #endif // AARCH64
2239 
2240 #ifdef AARCH64
2241   b(L_no_such_interface);
2242   bind(found);
2243 #else
2244   // CF == 0 means we reached the end of itable without finding icklass
2245   b(L_no_such_interface, cc);
2246 #endif // !AARCH64
2247 
2248   if (method_result != noreg) {
2249     // Interface found at previous position of Rscan, now load the method
2250     ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size));
2251     if (itable_index.is_register()) {
2252       add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
2253       assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
2254       assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
2255       ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
2256     } else {
2257       int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
2258                           itableMethodEntry::method_offset_in_bytes();
2259       add_slow(method_result, Rklass, method_offset);
2260       ldr(method_result, Address(method_result, Rtmp));
2261     }
2262   }
2263 }
2264 
2265 #ifdef COMPILER2
2266 // TODO: 8 bytes at a time? pre-fetch?
2267 // Compare char[] arrays aligned to 4 bytes.
2268 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
2269                                         Register limit, Register result,
2270                                       Register chr1, Register chr2, Label& Ldone) {
2271   Label Lvector, Lloop;
2272 
2273   // Note: limit contains number of bytes (2*char_elements) != 0.
2274   tst(limit, 0x2); // trailing character ?
2275   b(Lvector, eq);
2276 
2277   // compare the trailing char
2278   sub(limit, limit, sizeof(jchar));
2279   ldrh(chr1, Address(ary1, limit));
2280   ldrh(chr2, Address(ary2, limit));
2281   cmp(chr1, chr2);
2282   mov(result, 0, ne);     // not equal
2283   b(Ldone, ne);
2284 
2285   // only one char ?
2286   tst(limit, limit);
2287   mov(result, 1, eq);
2288   b(Ldone, eq);
2289 
2290   // word by word compare, dont't need alignment check
2291   bind(Lvector);
2292 
2293   // Shift ary1 and ary2 to the end of the arrays, negate limit
2294   add(ary1, limit, ary1);
2295   add(ary2, limit, ary2);
2296   neg(limit, limit);
2297 
2298   bind(Lloop);
2299   ldr_u32(chr1, Address(ary1, limit));
2300   ldr_u32(chr2, Address(ary2, limit));
2301   cmp_32(chr1, chr2);
2302   mov(result, 0, ne);     // not equal
2303   b(Ldone, ne);
2304   adds(limit, limit, 2*sizeof(jchar));
2305   b(Lloop, ne);
2306 
2307   // Caller should set it:
2308   // mov(result_reg, 1);  //equal
2309 }
2310 #endif
2311 
2312 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
2313   mov_slow(tmpreg1, counter_addr);
2314   ldr_s32(tmpreg2, tmpreg1);
2315   add_32(tmpreg2, tmpreg2, 1);
2316   str_32(tmpreg2, tmpreg1);
2317 }
2318 
2319 void MacroAssembler::floating_cmp(Register dst) {
2320 #ifdef AARCH64
2321   NOT_TESTED();
2322   cset(dst, gt);            // 1 if '>', else 0
2323   csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
2324 #else
2325   vmrs(dst, FPSCR);
2326   orr(dst, dst, 0x08000000);
2327   eor(dst, dst, AsmOperand(dst, lsl, 3));
2328   mov(dst, AsmOperand(dst, asr, 30));
2329 #endif
2330 }
2331 
2332 void MacroAssembler::restore_default_fp_mode() {
2333 #ifdef AARCH64
2334   msr(SysReg_FPCR, ZR);
2335 #else
2336 #ifndef __SOFTFP__
2337   // Round to Near mode, IEEE compatible, masked exceptions
2338   mov(Rtemp, 0);
2339   vmsr(FPSCR, Rtemp);
2340 #endif // !__SOFTFP__
2341 #endif // AARCH64
2342 }
2343 
2344 #ifndef AARCH64
2345 // 24-bit word range == 26-bit byte range
2346 bool check26(int offset) {
2347   // this could be simplified, but it mimics encoding and decoding
2348   // an actual branch insrtuction
2349   int off1 = offset << 6 >> 8;
2350   int encoded = off1 & ((1<<24)-1);
2351   int decoded = encoded << 8 >> 6;
2352   return offset == decoded;
2353 }
2354 #endif // !AARCH64
2355 
2356 // Perform some slight adjustments so the default 32MB code cache
2357 // is fully reachable.
2358 static inline address first_cache_address() {
2359   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
2360 }
2361 static inline address last_cache_address() {
2362   return CodeCache::high_bound() - Assembler::InstructionSize;
2363 }
2364 
2365 #ifdef AARCH64
2366 // Can we reach target using ADRP?
2367 bool MacroAssembler::page_reachable_from_cache(address target) {
2368   intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
2369   intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
2370   intptr_t addr = (intptr_t)target & ~0xfff;
2371 
2372   intptr_t loffset = addr - cl;
2373   intptr_t hoffset = addr - ch;
2374   return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
2375 }
2376 #endif
2377 
2378 // Can we reach target using unconditional branch or call from anywhere
2379 // in the code cache (because code can be relocated)?
2380 bool MacroAssembler::_reachable_from_cache(address target) {
2381 #ifdef __thumb__
2382   if ((1 & (intptr_t)target) != 0) {
2383     // Return false to avoid 'b' if we need switching to THUMB mode.
2384     return false;
2385   }
2386 #endif
2387 
2388   address cl = first_cache_address();
2389   address ch = last_cache_address();
2390 
2391   if (ForceUnreachable) {
2392     // Only addresses from CodeCache can be treated as reachable.
2393     if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
2394       return false;
2395     }
2396   }
2397 
2398   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
2399   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
2400 
2401 #ifdef AARCH64
2402   return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
2403 #else
2404   return check26(loffset - 8) && check26(hoffset - 8);
2405 #endif
2406 }
2407 
2408 bool MacroAssembler::reachable_from_cache(address target) {
2409   assert(CodeCache::contains(pc()), "not supported");
2410   return _reachable_from_cache(target);
2411 }
2412 
2413 // Can we reach the entire code cache from anywhere else in the code cache?
2414 bool MacroAssembler::_cache_fully_reachable() {
2415   address cl = first_cache_address();
2416   address ch = last_cache_address();
2417   return _reachable_from_cache(cl) && _reachable_from_cache(ch);
2418 }
2419 
2420 bool MacroAssembler::cache_fully_reachable() {
2421   assert(CodeCache::contains(pc()), "not supported");
2422   return _cache_fully_reachable();
2423 }
2424 
2425 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2426   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2427   if (reachable_from_cache(target)) {
2428     relocate(rtype);
2429     b(target NOT_AARCH64_ARG(cond));
2430     return;
2431   }
2432 
2433   // Note: relocate is not needed for the code below,
2434   // encoding targets in absolute format.
2435   if (ignore_non_patchable_relocations()) {
2436     rtype = relocInfo::none;
2437   }
2438 
2439 #ifdef AARCH64
2440   assert (scratch != noreg, "should be specified");
2441   InlinedAddress address_literal(target, rtype);
2442   ldr_literal(scratch, address_literal);
2443   br(scratch);
2444   int off = offset();
2445   bind_literal(address_literal);
2446 #ifdef COMPILER2
2447   if (offset() - off == wordSize) {
2448     // no padding, so insert nop for worst-case sizing
2449     nop();
2450   }
2451 #endif
2452 #else
2453   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
2454     // Note: this version cannot be (atomically) patched
2455     mov_slow(scratch, (intptr_t)target, cond);
2456     bx(scratch, cond);
2457   } else {
2458     Label skip;
2459     InlinedAddress address_literal(target);
2460     if (cond != al) {
2461       b(skip, inverse(cond));
2462     }
2463     relocate(rtype);
2464     ldr_literal(PC, address_literal);
2465     bind_literal(address_literal);
2466     bind(skip);
2467   }
2468 #endif // AARCH64
2469 }
2470 
2471 // Similar to jump except that:
2472 // - near calls are valid only if any destination in the cache is near
2473 // - no movt/movw (not atomically patchable)
2474 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2475   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2476   if (cache_fully_reachable()) {
2477     // Note: this assumes that all possible targets (the initial one
2478     // and the addressed patched to) are all in the code cache.
2479     assert(CodeCache::contains(target), "target might be too far");
2480     relocate(rtype);
2481     b(target NOT_AARCH64_ARG(cond));
2482     return;
2483   }
2484 
2485   // Discard the relocation information if not needed for CacheCompiledCode
2486   // since the next encodings are all in absolute format.
2487   if (ignore_non_patchable_relocations()) {
2488     rtype = relocInfo::none;
2489   }
2490 
2491 #ifdef AARCH64
2492   assert (scratch != noreg, "should be specified");
2493   InlinedAddress address_literal(target);
2494   relocate(rtype);
2495   ldr_literal(scratch, address_literal);
2496   br(scratch);
2497   int off = offset();
2498   bind_literal(address_literal);
2499 #ifdef COMPILER2
2500   if (offset() - off == wordSize) {
2501     // no padding, so insert nop for worst-case sizing
2502     nop();
2503   }
2504 #endif
2505 #else
2506   {
2507     Label skip;
2508     InlinedAddress address_literal(target);
2509     if (cond != al) {
2510       b(skip, inverse(cond));
2511     }
2512     relocate(rtype);
2513     ldr_literal(PC, address_literal);
2514     bind_literal(address_literal);
2515     bind(skip);
2516   }
2517 #endif // AARCH64
2518 }
2519 
2520 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
2521   Register scratch = LR;
2522   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
2523   if (reachable_from_cache(target)) {
2524     relocate(rspec);
2525     bl(target NOT_AARCH64_ARG(cond));
2526     return;
2527   }
2528 
2529   // Note: relocate is not needed for the code below,
2530   // encoding targets in absolute format.
2531   if (ignore_non_patchable_relocations()) {
2532     // This assumes the information was needed only for relocating the code.
2533     rspec = RelocationHolder::none;
2534   }
2535 
2536 #ifndef AARCH64
2537   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
2538     // Note: this version cannot be (atomically) patched
2539     mov_slow(scratch, (intptr_t)target, cond);
2540     blx(scratch, cond);
2541     return;
2542   }
2543 #endif
2544 
2545   {
2546     Label ret_addr;
2547 #ifndef AARCH64
2548     if (cond != al) {
2549       b(ret_addr, inverse(cond));
2550     }
2551 #endif
2552 
2553 
2554 #ifdef AARCH64
2555     // TODO-AARCH64: make more optimal implementation
2556     // [ Keep in sync with MacroAssembler::call_size ]
2557     assert(rspec.type() == relocInfo::none, "call reloc not implemented");
2558     mov_slow(scratch, target);
2559     blr(scratch);
2560 #else
2561     InlinedAddress address_literal(target);
2562     relocate(rspec);
2563     adr(LR, ret_addr);
2564     ldr_literal(PC, address_literal);
2565 
2566     bind_literal(address_literal);
2567     bind(ret_addr);
2568 #endif
2569   }
2570 }
2571 
2572 #if defined(AARCH64) && defined(COMPILER2)
2573 int MacroAssembler::call_size(address target, bool far, bool patchable) {
2574   // FIXME: mov_slow is variable-length
2575   if (!far) return 1; // bl
2576   if (patchable) return 2;  // ldr; blr
2577   return instr_count_for_mov_slow((intptr_t)target) + 1;
2578 }
2579 #endif
2580 
2581 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
2582   assert(rspec.type() == relocInfo::static_call_type ||
2583          rspec.type() == relocInfo::none ||
2584          rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
2585 
2586   // Always generate the relocation information, needed for patching
2587   relocate(rspec); // used by NativeCall::is_call_before()
2588   if (cache_fully_reachable()) {
2589     // Note: this assumes that all possible targets (the initial one
2590     // and the addresses patched to) are all in the code cache.
2591     assert(CodeCache::contains(target), "target might be too far");
2592     bl(target);
2593   } else {
2594 #if defined(AARCH64) && defined(COMPILER2)
2595     if (c2) {
2596       // return address needs to match call_size().
2597       // no need to trash Rtemp
2598       int off = offset();
2599       Label skip_literal;
2600       InlinedAddress address_literal(target);
2601       ldr_literal(LR, address_literal);
2602       blr(LR);
2603       int ret_addr_offset = offset();
2604       assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
2605       b(skip_literal);
2606       int off2 = offset();
2607       bind_literal(address_literal);
2608       if (offset() - off2 == wordSize) {
2609         // no padding, so insert nop for worst-case sizing
2610         nop();
2611       }
2612       bind(skip_literal);
2613       return ret_addr_offset;
2614     }
2615 #endif
2616     Label ret_addr;
2617     InlinedAddress address_literal(target);
2618 #ifdef AARCH64
2619     ldr_literal(Rtemp, address_literal);
2620     adr(LR, ret_addr);
2621     br(Rtemp);
2622 #else
2623     adr(LR, ret_addr);
2624     ldr_literal(PC, address_literal);
2625 #endif
2626     bind_literal(address_literal);
2627     bind(ret_addr);
2628   }
2629   return offset();
2630 }
2631 
2632 // ((OopHandle)result).resolve();
2633 void MacroAssembler::resolve_oop_handle(Register result) {
2634   // OopHandle::resolve is an indirection.
2635   ldr(result, Address(result, 0));
2636 }
2637 
2638 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
2639   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2640   ldr(tmp, Address(method, Method::const_offset()));
2641   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
2642   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
2643   ldr(mirror, Address(tmp, mirror_offset));
2644   resolve_oop_handle(mirror);
2645 }
2646 
2647 
2648 ///////////////////////////////////////////////////////////////////////////////
2649 
2650 // Compressed pointers
2651 
2652 #ifdef AARCH64
2653 
2654 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
2655   if (UseCompressedClassPointers) {
2656     ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2657     decode_klass_not_null(dst_klass);
2658   } else {
2659     ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2660   }
2661 }
2662 
2663 #else
2664 
2665 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
2666   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
2667 }
2668 
2669 #endif // AARCH64
2670 
2671 // Blows src_klass.
2672 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
2673 #ifdef AARCH64
2674   if (UseCompressedClassPointers) {
2675     assert(src_klass != dst_oop, "not enough registers");
2676     encode_klass_not_null(src_klass);
2677     str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2678     return;
2679   }
2680 #endif // AARCH64
2681   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2682 }
2683 
2684 #ifdef AARCH64
2685 
2686 void MacroAssembler::store_klass_gap(Register dst) {
2687   if (UseCompressedClassPointers) {
2688     str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2689   }
2690 }
2691 
2692 #endif // AARCH64
2693 
2694 
2695 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2696   access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3);
2697 }
2698 
2699 // Blows src and flags.
2700 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2701   access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false);
2702 }
2703 
2704 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2705   access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true);
2706 }
2707 
2708 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
2709                                     Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) {
2710   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2711   bool as_raw = (decorators & AS_RAW) != 0;
2712   if (as_raw) {
2713     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2714   } else {
2715     bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2716   }
2717 }
2718 
2719 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
2720                                      Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
2721   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2722   bool as_raw = (decorators & AS_RAW) != 0;
2723   if (as_raw) {
2724     bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2725   } else {
2726     bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2727   }
2728 }
2729 
2730 
2731 #ifdef AARCH64
2732 
2733 // Algorithm must match oop.inline.hpp encode_heap_oop.
2734 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
2735   // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
2736   // Update it at modifications.
2737   assert (UseCompressedOops, "must be compressed");
2738   assert (Universe::heap() != NULL, "java heap should be initialized");
2739 #ifdef ASSERT
2740   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2741 #endif
2742   verify_oop(src);
2743   if (Universe::narrow_oop_base() == NULL) {
2744     if (Universe::narrow_oop_shift() != 0) {
2745       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2746       _lsr(dst, src, Universe::narrow_oop_shift());
2747     } else if (dst != src) {
2748       mov(dst, src);
2749     }
2750   } else {
2751     tst(src, src);
2752     csel(dst, Rheap_base, src, eq);
2753     sub(dst, dst, Rheap_base);
2754     if (Universe::narrow_oop_shift() != 0) {
2755       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2756       _lsr(dst, dst, Universe::narrow_oop_shift());
2757     }
2758   }
2759 }
2760 
2761 // Same algorithm as oop.inline.hpp decode_heap_oop.
2762 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
2763 #ifdef ASSERT
2764   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2765 #endif
2766   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2767   if (Universe::narrow_oop_base() != NULL) {
2768     tst(src, src);
2769     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2770     csel(dst, dst, ZR, ne);
2771   } else {
2772     _lsl(dst, src, Universe::narrow_oop_shift());
2773   }
2774   verify_oop(dst);
2775 }
2776 
2777 #ifdef COMPILER2
2778 // Algorithm must match oop.inline.hpp encode_heap_oop.
2779 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule
2780 // must be changed.
2781 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
2782   assert (UseCompressedOops, "must be compressed");
2783   assert (Universe::heap() != NULL, "java heap should be initialized");
2784 #ifdef ASSERT
2785   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2786 #endif
2787   verify_oop(src);
2788   if (Universe::narrow_oop_base() == NULL) {
2789     if (Universe::narrow_oop_shift() != 0) {
2790       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2791       _lsr(dst, src, Universe::narrow_oop_shift());
2792     } else if (dst != src) {
2793           mov(dst, src);
2794     }
2795   } else {
2796     sub(dst, src, Rheap_base);
2797     if (Universe::narrow_oop_shift() != 0) {
2798       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2799       _lsr(dst, dst, Universe::narrow_oop_shift());
2800     }
2801   }
2802 }
2803 
2804 // Same algorithm as oops.inline.hpp decode_heap_oop.
2805 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule
2806 // must be changed.
2807 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
2808 #ifdef ASSERT
2809   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2810 #endif
2811   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2812   if (Universe::narrow_oop_base() != NULL) {
2813     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2814   } else {
2815     _lsl(dst, src, Universe::narrow_oop_shift());
2816   }
2817   verify_oop(dst);
2818 }
2819 
2820 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2821   assert(UseCompressedClassPointers, "should only be used for compressed header");
2822   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2823   int klass_index = oop_recorder()->find_index(k);
2824   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
2825 
2826   // Relocation with special format (see relocInfo_arm.hpp).
2827   relocate(rspec);
2828   narrowKlass encoded_k = Klass::encode_klass(k);
2829   movz(dst, encoded_k & 0xffff, 0);
2830   movk(dst, (encoded_k >> 16) & 0xffff, 16);
2831 }
2832 
2833 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2834   assert(UseCompressedOops, "should only be used for compressed header");
2835   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
2836   int oop_index = oop_recorder()->find_index(obj);
2837   RelocationHolder rspec = oop_Relocation::spec(oop_index);
2838 
2839   relocate(rspec);
2840   movz(dst, 0xffff, 0);
2841   movk(dst, 0xffff, 16);
2842 }
2843 
2844 #endif // COMPILER2
2845 // Must preserve condition codes, or C2 encodeKlass_not_null rule
2846 // must be changed.
2847 void MacroAssembler::encode_klass_not_null(Register r) {
2848   if (Universe::narrow_klass_base() != NULL) {
2849     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
2850     assert(r != Rheap_base, "Encoding a klass in Rheap_base");
2851     mov_slow(Rheap_base, Universe::narrow_klass_base());
2852     sub(r, r, Rheap_base);
2853   }
2854   if (Universe::narrow_klass_shift() != 0) {
2855     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2856     _lsr(r, r, Universe::narrow_klass_shift());
2857   }
2858   if (Universe::narrow_klass_base() != NULL) {
2859     reinit_heapbase();
2860   }
2861 }
2862 
2863 // Must preserve condition codes, or C2 encodeKlass_not_null rule
2864 // must be changed.
2865 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
2866   if (dst == src) {
2867     encode_klass_not_null(src);
2868     return;
2869   }
2870   if (Universe::narrow_klass_base() != NULL) {
2871     mov_slow(dst, (int64_t)Universe::narrow_klass_base());
2872     sub(dst, src, dst);
2873     if (Universe::narrow_klass_shift() != 0) {
2874       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2875       _lsr(dst, dst, Universe::narrow_klass_shift());
2876     }
2877   } else {
2878     if (Universe::narrow_klass_shift() != 0) {
2879       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2880       _lsr(dst, src, Universe::narrow_klass_shift());
2881     } else {
2882       mov(dst, src);
2883     }
2884   }
2885 }
2886 
2887 // Function instr_count_for_decode_klass_not_null() counts the instructions
2888 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
2889 // when (Universe::heap() != NULL).  Hence, if the instructions they
2890 // generate change, then this method needs to be updated.
2891 int MacroAssembler::instr_count_for_decode_klass_not_null() {
2892   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
2893   assert(Universe::heap() != NULL, "java heap should be initialized");
2894   if (Universe::narrow_klass_base() != NULL) {
2895     return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
2896       1 +                                                                 // add
2897       instr_count_for_mov_slow(Universe::narrow_ptrs_base());   // reinit_heapbase() = mov_slow
2898   } else {
2899     if (Universe::narrow_klass_shift() != 0) {
2900       return 1;
2901     }
2902   }
2903   return 0;
2904 }
2905 
2906 // Must preserve condition codes, or C2 decodeKlass_not_null rule
2907 // must be changed.
2908 void MacroAssembler::decode_klass_not_null(Register r) {
2909   int off = offset();
2910   assert(UseCompressedClassPointers, "should only be used for compressed headers");
2911   assert(Universe::heap() != NULL, "java heap should be initialized");
2912   assert(r != Rheap_base, "Decoding a klass in Rheap_base");
2913   // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
2914   // Also do not verify_oop as this is called by verify_oop.
2915   if (Universe::narrow_klass_base() != NULL) {
2916     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
2917     mov_slow(Rheap_base, Universe::narrow_klass_base());
2918     add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
2919     reinit_heapbase();
2920   } else {
2921     if (Universe::narrow_klass_shift() != 0) {
2922       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
2923       _lsl(r, r, Universe::narrow_klass_shift());
2924     }
2925   }
2926   assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
2927 }
2928 
2929 // Must preserve condition codes, or C2 decodeKlass_not_null rule
2930 // must be changed.
2931 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
2932   if (src == dst) {
2933     decode_klass_not_null(src);
2934     return;
2935   }
2936 
2937   assert(UseCompressedClassPointers, "should only be used for compressed headers");
2938   assert(Universe::heap() != NULL, "java heap should be initialized");
2939   assert(src != Rheap_base, "Decoding a klass in Rheap_base");
2940   assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
2941   // Also do not verify_oop as this is called by verify_oop.
2942   if (Universe::narrow_klass_base() != NULL) {
2943     mov_slow(dst, Universe::narrow_klass_base());
2944     add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
2945   } else {
2946     _lsl(dst, src, Universe::narrow_klass_shift());
2947   }
2948 }
2949 
2950 
2951 void MacroAssembler::reinit_heapbase() {
2952   if (UseCompressedOops || UseCompressedClassPointers) {
2953     if (Universe::heap() != NULL) {
2954       mov_slow(Rheap_base, Universe::narrow_ptrs_base());
2955     } else {
2956       ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
2957     }
2958   }
2959 }
2960 
2961 #ifdef ASSERT
2962 void MacroAssembler::verify_heapbase(const char* msg) {
2963   // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
2964   // Update it at modifications.
2965   assert (UseCompressedOops, "should be compressed");
2966   assert (Universe::heap() != NULL, "java heap should be initialized");
2967   if (CheckCompressedOops) {
2968     Label ok;
2969     str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
2970     raw_push(Rtemp, ZR);
2971     mrs(Rtemp, Assembler::SysReg_NZCV);
2972     str(Rtemp, Address(SP, 1 * wordSize));
2973     mov_slow(Rtemp, Universe::narrow_ptrs_base());
2974     cmp(Rheap_base, Rtemp);
2975     b(ok, eq);
2976     stop(msg);
2977     bind(ok);
2978     ldr(Rtemp, Address(SP, 1 * wordSize));
2979     msr(Assembler::SysReg_NZCV, Rtemp);
2980     raw_pop(Rtemp, ZR);
2981     str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
2982   }
2983 }
2984 #endif // ASSERT
2985 
2986 #endif // AARCH64
2987 
2988 #ifdef COMPILER2
2989 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
2990 {
2991   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
2992 
2993   Register Rmark      = Rscratch2;
2994 
2995   assert(Roop != Rscratch, "");
2996   assert(Roop != Rmark, "");
2997   assert(Rbox != Rscratch, "");
2998   assert(Rbox != Rmark, "");
2999 
3000   Label fast_lock, done;
3001 
3002   if (UseBiasedLocking && !UseOptoBiasInlining) {
3003     Label failed;
3004 #ifdef AARCH64
3005     biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed);
3006 #else
3007     biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
3008 #endif
3009     bind(failed);
3010   }
3011 
3012   ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
3013   tst(Rmark, markOopDesc::unlocked_value);
3014   b(fast_lock, ne);
3015 
3016   // Check for recursive lock
3017   // See comments in InterpreterMacroAssembler::lock_object for
3018   // explanations on the fast recursive locking check.
3019 #ifdef AARCH64
3020   intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
3021   Assembler::LogicalImmediate imm(mask, false);
3022   mov(Rscratch, SP);
3023   sub(Rscratch, Rmark, Rscratch);
3024   ands(Rscratch, Rscratch, imm);
3025   b(done, ne); // exit with failure
3026   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero
3027   b(done);
3028 
3029 #else
3030   // -1- test low 2 bits
3031   movs(Rscratch, AsmOperand(Rmark, lsl, 30));
3032   // -2- test (hdr - SP) if the low two bits are 0
3033   sub(Rscratch, Rmark, SP, eq);
3034   movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
3035   // If still 'eq' then recursive locking OK
3036   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero
3037   b(done);
3038 #endif
3039 
3040   bind(fast_lock);
3041   str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3042 
3043   bool allow_fallthrough_on_failure = true;
3044   bool one_shot = true;
3045   cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3046 
3047   bind(done);
3048 
3049 }
3050 
3051 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2  AARCH64_ONLY_ARG(Register Rscratch3))
3052 {
3053   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3054 
3055   Register Rmark      = Rscratch2;
3056 
3057   assert(Roop != Rscratch, "");
3058   assert(Roop != Rmark, "");
3059   assert(Rbox != Rscratch, "");
3060   assert(Rbox != Rmark, "");
3061 
3062   Label done;
3063 
3064   if (UseBiasedLocking && !UseOptoBiasInlining) {
3065     biased_locking_exit(Roop, Rscratch, done);
3066   }
3067 
3068   ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3069   // If hdr is NULL, we've got recursive locking and there's nothing more to do
3070   cmp(Rmark, 0);
3071   b(done, eq);
3072 
3073   // Restore the object header
3074   bool allow_fallthrough_on_failure = true;
3075   bool one_shot = true;
3076   cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3077 
3078   bind(done);
3079 
3080 }
3081 #endif // COMPILER2