1 /*
   2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "ci/ciEnv.hpp"
  30 #include "code/nativeInst.hpp"
  31 #include "compiler/disassembler.hpp"
  32 #include "gc/shared/barrierSet.hpp"
  33 #include "gc/shared/cardTable.hpp"
  34 #include "gc/shared/cardTableBarrierSet.hpp"
  35 #include "gc/shared/collectedHeap.inline.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "memory/resourceArea.hpp"
  38 #include "oops/klass.inline.hpp"
  39 #include "prims/methodHandles.hpp"
  40 #include "runtime/biasedLocking.hpp"
  41 #include "runtime/interfaceSupport.inline.hpp"
  42 #include "runtime/objectMonitor.hpp"
  43 #include "runtime/os.hpp"
  44 #include "runtime/sharedRuntime.hpp"
  45 #include "runtime/stubRoutines.hpp"
  46 #include "utilities/macros.hpp"
  47 #if INCLUDE_G1GC
  48 #include "gc/g1/g1BarrierSet.hpp"
  49 #include "gc/g1/g1CardTable.hpp"
  50 #include "gc/g1/g1ThreadLocalData.hpp"
  51 #include "gc/g1/heapRegion.hpp"
  52 #endif
  53 
  54 // Implementation of AddressLiteral
  55 
  56 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
  57   switch (rtype) {
  58   case relocInfo::oop_type:
  59     // Oops are a special case. Normally they would be their own section
  60     // but in cases like icBuffer they are literals in the code stream that
  61     // we don't have a section for. We use none so that we get a literal address
  62     // which is always patchable.
  63     break;
  64   case relocInfo::external_word_type:
  65     _rspec = external_word_Relocation::spec(_target);
  66     break;
  67   case relocInfo::internal_word_type:
  68     _rspec = internal_word_Relocation::spec(_target);
  69     break;
  70   case relocInfo::opt_virtual_call_type:
  71     _rspec = opt_virtual_call_Relocation::spec();
  72     break;
  73   case relocInfo::static_call_type:
  74     _rspec = static_call_Relocation::spec();
  75     break;
  76   case relocInfo::runtime_call_type:
  77     _rspec = runtime_call_Relocation::spec();
  78     break;
  79   case relocInfo::poll_type:
  80   case relocInfo::poll_return_type:
  81     _rspec = Relocation::spec_simple(rtype);
  82     break;
  83   case relocInfo::none:
  84     break;
  85   default:
  86     ShouldNotReachHere();
  87     break;
  88   }
  89 }
  90 
  91 // Initially added to the Assembler interface as a pure virtual:
  92 //   RegisterConstant delayed_value(..)
  93 // for:
  94 //   6812678 macro assembler needs delayed binding of a few constants (for 6655638)
  95 // this was subsequently modified to its present name and return type
  96 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  97                                                       Register tmp,
  98                                                       int offset) {
  99   ShouldNotReachHere();
 100   return RegisterOrConstant(-1);
 101 }
 102 
 103 
 104 #ifdef AARCH64
 105 // Note: ARM32 version is OS dependent
 106 void MacroAssembler::breakpoint(AsmCondition cond) {
 107   if (cond == al) {
 108     brk();
 109   } else {
 110     Label L;
 111     b(L, inverse(cond));
 112     brk();
 113     bind(L);
 114   }
 115 }
 116 #endif // AARCH64
 117 
 118 
 119 // virtual method calling
 120 void MacroAssembler::lookup_virtual_method(Register recv_klass,
 121                                            Register vtable_index,
 122                                            Register method_result) {
 123   const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
 124   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
 125   add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
 126   ldr(method_result, Address(recv_klass, base_offset));
 127 }
 128 
 129 
 130 // Simplified, combined version, good for typical uses.
 131 // Falls through on failure.
 132 void MacroAssembler::check_klass_subtype(Register sub_klass,
 133                                          Register super_klass,
 134                                          Register temp_reg,
 135                                          Register temp_reg2,
 136                                          Register temp_reg3,
 137                                          Label& L_success) {
 138   Label L_failure;
 139   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
 140   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
 141   bind(L_failure);
 142 };
 143 
 144 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
 145                                                    Register super_klass,
 146                                                    Register temp_reg,
 147                                                    Register temp_reg2,
 148                                                    Label* L_success,
 149                                                    Label* L_failure,
 150                                                    Label* L_slow_path) {
 151 
 152   assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
 153   const Register super_check_offset = temp_reg2;
 154 
 155   Label L_fallthrough;
 156   int label_nulls = 0;
 157   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 158   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 159   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
 160   assert(label_nulls <= 1, "at most one NULL in the batch");
 161 
 162   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 163   int sco_offset = in_bytes(Klass::super_check_offset_offset());
 164   Address super_check_offset_addr(super_klass, sco_offset);
 165 
 166   // If the pointers are equal, we are done (e.g., String[] elements).
 167   // This self-check enables sharing of secondary supertype arrays among
 168   // non-primary types such as array-of-interface.  Otherwise, each such
 169   // type would need its own customized SSA.
 170   // We move this check to the front of the fast path because many
 171   // type checks are in fact trivially successful in this manner,
 172   // so we get a nicely predicted branch right at the start of the check.
 173   cmp(sub_klass, super_klass);
 174   b(*L_success, eq);
 175 
 176   // Check the supertype display:
 177   ldr_u32(super_check_offset, super_check_offset_addr);
 178 
 179   Address super_check_addr(sub_klass, super_check_offset);
 180   ldr(temp_reg, super_check_addr);
 181   cmp(super_klass, temp_reg); // load displayed supertype
 182 
 183   // This check has worked decisively for primary supers.
 184   // Secondary supers are sought in the super_cache ('super_cache_addr').
 185   // (Secondary supers are interfaces and very deeply nested subtypes.)
 186   // This works in the same check above because of a tricky aliasing
 187   // between the super_cache and the primary super display elements.
 188   // (The 'super_check_addr' can address either, as the case requires.)
 189   // Note that the cache is updated below if it does not help us find
 190   // what we need immediately.
 191   // So if it was a primary super, we can just fail immediately.
 192   // Otherwise, it's the slow path for us (no success at this point).
 193 
 194   b(*L_success, eq);
 195   cmp_32(super_check_offset, sc_offset);
 196   if (L_failure == &L_fallthrough) {
 197     b(*L_slow_path, eq);
 198   } else {
 199     b(*L_failure, ne);
 200     if (L_slow_path != &L_fallthrough) {
 201       b(*L_slow_path);
 202     }
 203   }
 204 
 205   bind(L_fallthrough);
 206 }
 207 
 208 
 209 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
 210                                                    Register super_klass,
 211                                                    Register temp_reg,
 212                                                    Register temp2_reg,
 213                                                    Register temp3_reg,
 214                                                    Label* L_success,
 215                                                    Label* L_failure,
 216                                                    bool set_cond_codes) {
 217 #ifdef AARCH64
 218   NOT_IMPLEMENTED();
 219 #else
 220   // Note: if used by code that expects a register to be 0 on success,
 221   // this register must be temp_reg and set_cond_codes must be true
 222 
 223   Register saved_reg = noreg;
 224 
 225   // get additional tmp registers
 226   if (temp3_reg == noreg) {
 227     saved_reg = temp3_reg = LR;
 228     push(saved_reg);
 229   }
 230 
 231   assert(temp2_reg != noreg, "need all the temporary registers");
 232   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
 233 
 234   Register cmp_temp = temp_reg;
 235   Register scan_temp = temp3_reg;
 236   Register count_temp = temp2_reg;
 237 
 238   Label L_fallthrough;
 239   int label_nulls = 0;
 240   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 241   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 242   assert(label_nulls <= 1, "at most one NULL in the batch");
 243 
 244   // a couple of useful fields in sub_klass:
 245   int ss_offset = in_bytes(Klass::secondary_supers_offset());
 246   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 247   Address secondary_supers_addr(sub_klass, ss_offset);
 248   Address super_cache_addr(     sub_klass, sc_offset);
 249 
 250 #ifndef PRODUCT
 251   inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
 252 #endif
 253 
 254   // We will consult the secondary-super array.
 255   ldr(scan_temp, Address(sub_klass, ss_offset));
 256 
 257   assert(! UseCompressedOops, "search_key must be the compressed super_klass");
 258   // else search_key is the
 259   Register search_key = super_klass;
 260 
 261   // Load the array length.
 262   ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
 263   add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
 264 
 265   add(count_temp, count_temp, 1);
 266 
 267   Label L_loop, L_setnz_and_fail, L_fail;
 268 
 269   // Top of search loop
 270   bind(L_loop);
 271   // Notes:
 272   //  scan_temp starts at the array elements
 273   //  count_temp is 1+size
 274   subs(count_temp, count_temp, 1);
 275   if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
 276     // direct jump to L_failure if failed and no cleanup needed
 277     b(*L_failure, eq); // not found and
 278   } else {
 279     b(L_fail, eq); // not found in the array
 280   }
 281 
 282   // Load next super to check
 283   // In the array of super classes elements are pointer sized.
 284   int element_size = wordSize;
 285   ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
 286 
 287   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
 288   subs(cmp_temp, cmp_temp, search_key);
 289 
 290   // A miss means we are NOT a subtype and need to keep looping
 291   b(L_loop, ne);
 292 
 293   // Falling out the bottom means we found a hit; we ARE a subtype
 294 
 295   // Note: temp_reg/cmp_temp is already 0 and flag Z is set
 296 
 297   // Success.  Cache the super we found and proceed in triumph.
 298   str(super_klass, Address(sub_klass, sc_offset));
 299 
 300   if (saved_reg != noreg) {
 301     // Return success
 302     pop(saved_reg);
 303   }
 304 
 305   b(*L_success);
 306 
 307   bind(L_fail);
 308   // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
 309   if (set_cond_codes) {
 310     movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
 311   }
 312   if (saved_reg != noreg) {
 313     pop(saved_reg);
 314   }
 315   if (L_failure != &L_fallthrough) {
 316     b(*L_failure);
 317   }
 318 
 319   bind(L_fallthrough);
 320 #endif
 321 }
 322 
 323 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
 324 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
 325   assert_different_registers(params_base, params_count);
 326   add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
 327   return Address(tmp, -Interpreter::stackElementSize);
 328 }
 329 
 330 
 331 void MacroAssembler::align(int modulus) {
 332   while (offset() % modulus != 0) {
 333     nop();
 334   }
 335 }
 336 
 337 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
 338                                         Register last_java_fp,
 339                                         bool save_last_java_pc,
 340                                         Register tmp) {
 341   int pc_offset;
 342   if (last_java_fp != noreg) {
 343     // optional
 344     str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
 345     _fp_saved = true;
 346   } else {
 347     _fp_saved = false;
 348   }
 349   if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
 350 #ifdef AARCH64
 351     pc_offset = mov_pc_to(tmp);
 352     str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
 353 #else
 354     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
 355     pc_offset = offset() + VM_Version::stored_pc_adjustment();
 356 #endif
 357     _pc_saved = true;
 358   } else {
 359     _pc_saved = false;
 360     pc_offset = -1;
 361   }
 362   // According to comment in javaFrameAnchorm SP must be saved last, so that other
 363   // entries are valid when SP is set.
 364 
 365   // However, this is probably not a strong constrainst since for instance PC is
 366   // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
 367   // we now write the fields in the expected order but we have not added a StoreStore
 368   // barrier.
 369 
 370   // XXX: if the ordering is really important, PC should always be saved (without forgetting
 371   // to update oop_map offsets) and a StoreStore barrier might be needed.
 372 
 373   if (last_java_sp == noreg) {
 374     last_java_sp = SP; // always saved
 375   }
 376 #ifdef AARCH64
 377   if (last_java_sp == SP) {
 378     mov(tmp, SP);
 379     str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 380   } else {
 381     str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 382   }
 383 #else
 384   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 385 #endif
 386 
 387   return pc_offset; // for oopmaps
 388 }
 389 
 390 void MacroAssembler::reset_last_Java_frame(Register tmp) {
 391   const Register Rzero = zero_register(tmp);
 392   str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
 393   if (_fp_saved) {
 394     str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
 395   }
 396   if (_pc_saved) {
 397     str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
 398   }
 399 }
 400 
 401 
 402 // Implementation of call_VM versions
 403 
 404 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
 405   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 406   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
 407 
 408 #ifndef AARCH64
 409   // Safer to save R9 here since callers may have been written
 410   // assuming R9 survives. This is suboptimal but is not worth
 411   // optimizing for the few platforms where R9 is scratched.
 412   push(RegisterSet(R4) | R9ifScratched);
 413   mov(R4, SP);
 414   bic(SP, SP, StackAlignmentInBytes - 1);
 415 #endif // AARCH64
 416   call(entry_point, relocInfo::runtime_call_type);
 417 #ifndef AARCH64
 418   mov(SP, R4);
 419   pop(RegisterSet(R4) | R9ifScratched);
 420 #endif // AARCH64
 421 }
 422 
 423 
 424 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
 425   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 426   assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
 427 
 428   const Register tmp = Rtemp;
 429   assert_different_registers(oop_result, tmp);
 430 
 431   set_last_Java_frame(SP, FP, true, tmp);
 432 
 433 #ifdef ASSERT
 434   AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
 435 #endif // ASSERT
 436 
 437 #ifndef AARCH64
 438 #if R9_IS_SCRATCHED
 439   // Safer to save R9 here since callers may have been written
 440   // assuming R9 survives. This is suboptimal but is not worth
 441   // optimizing for the few platforms where R9 is scratched.
 442 
 443   // Note: cannot save R9 above the saved SP (some calls expect for
 444   // instance the Java stack top at the saved SP)
 445   // => once saved (with set_last_Java_frame), decrease SP before rounding to
 446   // ensure the slot at SP will be free for R9).
 447   sub(SP, SP, 4);
 448   bic(SP, SP, StackAlignmentInBytes - 1);
 449   str(R9, Address(SP, 0));
 450 #else
 451   bic(SP, SP, StackAlignmentInBytes - 1);
 452 #endif // R9_IS_SCRATCHED
 453 #endif
 454 
 455   mov(R0, Rthread);
 456   call(entry_point, relocInfo::runtime_call_type);
 457 
 458 #ifndef AARCH64
 459 #if R9_IS_SCRATCHED
 460   ldr(R9, Address(SP, 0));
 461 #endif
 462   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
 463 #endif
 464 
 465   reset_last_Java_frame(tmp);
 466 
 467   // C++ interp handles this in the interpreter
 468   check_and_handle_popframe();
 469   check_and_handle_earlyret();
 470 
 471   if (check_exceptions) {
 472     // check for pending exceptions
 473     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
 474 #ifdef AARCH64
 475     Label L;
 476     cbz(tmp, L);
 477     mov_pc_to(Rexception_pc);
 478     b(StubRoutines::forward_exception_entry());
 479     bind(L);
 480 #else
 481     cmp(tmp, 0);
 482     mov(Rexception_pc, PC, ne);
 483     b(StubRoutines::forward_exception_entry(), ne);
 484 #endif // AARCH64
 485   }
 486 
 487   // get oop result if there is one and reset the value in the thread
 488   if (oop_result->is_valid()) {
 489     get_vm_result(oop_result, tmp);
 490   }
 491 }
 492 
 493 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
 494   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 495 }
 496 
 497 
 498 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
 499   assert (arg_1 == R1, "fixed register for arg_1");
 500   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 501 }
 502 
 503 
 504 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 505   assert (arg_1 == R1, "fixed register for arg_1");
 506   assert (arg_2 == R2, "fixed register for arg_2");
 507   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 508 }
 509 
 510 
 511 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 512   assert (arg_1 == R1, "fixed register for arg_1");
 513   assert (arg_2 == R2, "fixed register for arg_2");
 514   assert (arg_3 == R3, "fixed register for arg_3");
 515   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 516 }
 517 
 518 
 519 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
 520   // Not used on ARM
 521   Unimplemented();
 522 }
 523 
 524 
 525 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
 526   // Not used on ARM
 527   Unimplemented();
 528 }
 529 
 530 
 531 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 532 // Not used on ARM
 533   Unimplemented();
 534 }
 535 
 536 
 537 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 538   // Not used on ARM
 539   Unimplemented();
 540 }
 541 
 542 // Raw call, without saving/restoring registers, exception handling, etc.
 543 // Mainly used from various stubs.
 544 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
 545   const Register tmp = Rtemp; // Rtemp free since scratched by call
 546   set_last_Java_frame(SP, FP, true, tmp);
 547 #if R9_IS_SCRATCHED
 548   if (save_R9_if_scratched) {
 549     // Note: Saving also R10 for alignment.
 550     push(RegisterSet(R9, R10));
 551   }
 552 #endif
 553   mov(R0, Rthread);
 554   call(entry_point, relocInfo::runtime_call_type);
 555 #if R9_IS_SCRATCHED
 556   if (save_R9_if_scratched) {
 557     pop(RegisterSet(R9, R10));
 558   }
 559 #endif
 560   reset_last_Java_frame(tmp);
 561 }
 562 
 563 void MacroAssembler::call_VM_leaf(address entry_point) {
 564   call_VM_leaf_helper(entry_point, 0);
 565 }
 566 
 567 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
 568   assert (arg_1 == R0, "fixed register for arg_1");
 569   call_VM_leaf_helper(entry_point, 1);
 570 }
 571 
 572 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
 573   assert (arg_1 == R0, "fixed register for arg_1");
 574   assert (arg_2 == R1, "fixed register for arg_2");
 575   call_VM_leaf_helper(entry_point, 2);
 576 }
 577 
 578 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
 579   assert (arg_1 == R0, "fixed register for arg_1");
 580   assert (arg_2 == R1, "fixed register for arg_2");
 581   assert (arg_3 == R2, "fixed register for arg_3");
 582   call_VM_leaf_helper(entry_point, 3);
 583 }
 584 
 585 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
 586   assert (arg_1 == R0, "fixed register for arg_1");
 587   assert (arg_2 == R1, "fixed register for arg_2");
 588   assert (arg_3 == R2, "fixed register for arg_3");
 589   assert (arg_4 == R3, "fixed register for arg_4");
 590   call_VM_leaf_helper(entry_point, 4);
 591 }
 592 
 593 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
 594   assert_different_registers(oop_result, tmp);
 595   ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
 596   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
 597   verify_oop(oop_result);
 598 }
 599 
 600 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
 601   assert_different_registers(metadata_result, tmp);
 602   ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
 603   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
 604 }
 605 
 606 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
 607   if (arg2.is_register()) {
 608     add(dst, arg1, arg2.as_register());
 609   } else {
 610     add(dst, arg1, arg2.as_constant());
 611   }
 612 }
 613 
 614 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
 615 #ifdef AARCH64
 616   if (c == 0) {
 617     if (rd != rn) {
 618       mov(rd, rn);
 619     }
 620     return;
 621   }
 622   if (c < 0) {
 623     sub_slow(rd, rn, -c);
 624     return;
 625   }
 626   if (c > right_n_bits(24)) {
 627     guarantee(rd != rn, "no large add_slow with only one register");
 628     mov_slow(rd, c);
 629     add(rd, rn, rd);
 630   } else {
 631     int lo = c & right_n_bits(12);
 632     int hi = (c >> 12) & right_n_bits(12);
 633     if (lo != 0) {
 634       add(rd, rn, lo, lsl0);
 635     }
 636     if (hi != 0) {
 637       add(rd, (lo == 0) ? rn : rd, hi, lsl12);
 638     }
 639   }
 640 #else
 641   // This function is used in compiler for handling large frame offsets
 642   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 643     return sub(rd, rn, (-c));
 644   }
 645   int low = c & 0x3fc;
 646   if (low != 0) {
 647     add(rd, rn, low);
 648     rn = rd;
 649   }
 650   if (c & ~0x3fc) {
 651     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
 652     add(rd, rn, c & ~0x3fc);
 653   } else if (rd != rn) {
 654     assert(c == 0, "");
 655     mov(rd, rn); // need to generate at least one move!
 656   }
 657 #endif // AARCH64
 658 }
 659 
 660 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
 661 #ifdef AARCH64
 662   if (c <= 0) {
 663     add_slow(rd, rn, -c);
 664     return;
 665   }
 666   if (c > right_n_bits(24)) {
 667     guarantee(rd != rn, "no large sub_slow with only one register");
 668     mov_slow(rd, c);
 669     sub(rd, rn, rd);
 670   } else {
 671     int lo = c & right_n_bits(12);
 672     int hi = (c >> 12) & right_n_bits(12);
 673     if (lo != 0) {
 674       sub(rd, rn, lo, lsl0);
 675     }
 676     if (hi != 0) {
 677       sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
 678     }
 679   }
 680 #else
 681   // This function is used in compiler for handling large frame offsets
 682   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 683     return add(rd, rn, (-c));
 684   }
 685   int low = c & 0x3fc;
 686   if (low != 0) {
 687     sub(rd, rn, low);
 688     rn = rd;
 689   }
 690   if (c & ~0x3fc) {
 691     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
 692     sub(rd, rn, c & ~0x3fc);
 693   } else if (rd != rn) {
 694     assert(c == 0, "");
 695     mov(rd, rn); // need to generate at least one move!
 696   }
 697 #endif // AARCH64
 698 }
 699 
 700 void MacroAssembler::mov_slow(Register rd, address addr) {
 701   // do *not* call the non relocated mov_related_address
 702   mov_slow(rd, (intptr_t)addr);
 703 }
 704 
 705 void MacroAssembler::mov_slow(Register rd, const char *str) {
 706   mov_slow(rd, (intptr_t)str);
 707 }
 708 
 709 #ifdef AARCH64
 710 
 711 // Common code for mov_slow and instr_count_for_mov_slow.
 712 // Returns number of instructions of mov_slow pattern,
 713 // generating it if non-null MacroAssembler is given.
 714 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
 715   // This code pattern is matched in NativeIntruction::is_mov_slow.
 716   // Update it at modifications.
 717 
 718   const intx mask = right_n_bits(16);
 719   // 1 movz instruction
 720   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 721     if ((c & ~(mask << base_shift)) == 0) {
 722       if (masm != NULL) {
 723         masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
 724       }
 725       return 1;
 726     }
 727   }
 728   // 1 movn instruction
 729   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 730     if (((~c) & ~(mask << base_shift)) == 0) {
 731       if (masm != NULL) {
 732         masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
 733       }
 734       return 1;
 735     }
 736   }
 737   // 1 orr instruction
 738   {
 739     LogicalImmediate imm(c, false);
 740     if (imm.is_encoded()) {
 741       if (masm != NULL) {
 742         masm->orr(rd, ZR, imm);
 743       }
 744       return 1;
 745     }
 746   }
 747   // 1 movz/movn + up to 3 movk instructions
 748   int zeroes = 0;
 749   int ones = 0;
 750   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 751     int part = (c >> base_shift) & mask;
 752     if (part == 0) {
 753       ++zeroes;
 754     } else if (part == mask) {
 755       ++ones;
 756     }
 757   }
 758   int def_bits = 0;
 759   if (ones > zeroes) {
 760     def_bits = mask;
 761   }
 762   int inst_count = 0;
 763   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 764     int part = (c >> base_shift) & mask;
 765     if (part != def_bits) {
 766       if (masm != NULL) {
 767         if (inst_count > 0) {
 768           masm->movk(rd, part, base_shift);
 769         } else {
 770           if (def_bits == 0) {
 771             masm->movz(rd, part, base_shift);
 772           } else {
 773             masm->movn(rd, ~part & mask, base_shift);
 774           }
 775         }
 776       }
 777       inst_count++;
 778     }
 779   }
 780   assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
 781   return inst_count;
 782 }
 783 
 784 void MacroAssembler::mov_slow(Register rd, intptr_t c) {
 785 #ifdef ASSERT
 786   int off = offset();
 787 #endif
 788   (void) mov_slow_helper(rd, c, this);
 789   assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
 790 }
 791 
 792 // Counts instructions generated by mov_slow(rd, c).
 793 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
 794   return mov_slow_helper(noreg, c, NULL);
 795 }
 796 
 797 int MacroAssembler::instr_count_for_mov_slow(address c) {
 798   return mov_slow_helper(noreg, (intptr_t)c, NULL);
 799 }
 800 
 801 #else
 802 
 803 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
 804   if (AsmOperand::is_rotated_imm(c)) {
 805     mov(rd, c, cond);
 806   } else if (AsmOperand::is_rotated_imm(~c)) {
 807     mvn(rd, ~c, cond);
 808   } else if (VM_Version::supports_movw()) {
 809     movw(rd, c & 0xffff, cond);
 810     if ((unsigned int)c >> 16) {
 811       movt(rd, (unsigned int)c >> 16, cond);
 812     }
 813   } else {
 814     // Find first non-zero bit
 815     int shift = 0;
 816     while ((c & (3 << shift)) == 0) {
 817       shift += 2;
 818     }
 819     // Put the least significant part of the constant
 820     int mask = 0xff << shift;
 821     mov(rd, c & mask, cond);
 822     // Add up to 3 other parts of the constant;
 823     // each of them can be represented as rotated_imm
 824     if (c & (mask << 8)) {
 825       orr(rd, rd, c & (mask << 8), cond);
 826     }
 827     if (c & (mask << 16)) {
 828       orr(rd, rd, c & (mask << 16), cond);
 829     }
 830     if (c & (mask << 24)) {
 831       orr(rd, rd, c & (mask << 24), cond);
 832     }
 833   }
 834 }
 835 
 836 #endif // AARCH64
 837 
 838 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
 839 #ifdef AARCH64
 840                              bool patchable
 841 #else
 842                              AsmCondition cond
 843 #endif
 844                              ) {
 845 
 846   if (o == NULL) {
 847 #ifdef AARCH64
 848     if (patchable) {
 849       nop();
 850     }
 851     mov(rd, ZR);
 852 #else
 853     mov(rd, 0, cond);
 854 #endif
 855     return;
 856   }
 857 
 858   if (oop_index == 0) {
 859     oop_index = oop_recorder()->allocate_oop_index(o);
 860   }
 861   relocate(oop_Relocation::spec(oop_index));
 862 
 863 #ifdef AARCH64
 864   if (patchable) {
 865     nop();
 866   }
 867   ldr(rd, pc());
 868 #else
 869   if (VM_Version::supports_movw()) {
 870     movw(rd, 0, cond);
 871     movt(rd, 0, cond);
 872   } else {
 873     ldr(rd, Address(PC), cond);
 874     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
 875     nop();
 876   }
 877 #endif
 878 }
 879 
 880 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
 881   if (o == NULL) {
 882 #ifdef AARCH64
 883     if (patchable) {
 884       nop();
 885     }
 886 #endif
 887     mov(rd, 0);
 888     return;
 889   }
 890 
 891   if (metadata_index == 0) {
 892     metadata_index = oop_recorder()->allocate_metadata_index(o);
 893   }
 894   relocate(metadata_Relocation::spec(metadata_index));
 895 
 896 #ifdef AARCH64
 897   if (patchable) {
 898     nop();
 899   }
 900 #ifdef COMPILER2
 901   if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
 902     mov_slow(rd, (address)o);
 903     return;
 904   }
 905 #endif
 906   ldr(rd, pc());
 907 #else
 908   if (VM_Version::supports_movw()) {
 909     movw(rd, ((int)o) & 0xffff);
 910     movt(rd, (unsigned int)o >> 16);
 911   } else {
 912     ldr(rd, Address(PC));
 913     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
 914     nop();
 915   }
 916 #endif // AARCH64
 917 }
 918 
 919 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
 920   Label skip_constant;
 921   union {
 922     jfloat f;
 923     jint i;
 924   } accessor;
 925   accessor.f = c;
 926 
 927 #ifdef AARCH64
 928   // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
 929   Label L;
 930   ldr_s(fd, target(L));
 931   b(skip_constant);
 932   bind(L);
 933   emit_int32(accessor.i);
 934   bind(skip_constant);
 935 #else
 936   flds(fd, Address(PC), cond);
 937   b(skip_constant);
 938   emit_int32(accessor.i);
 939   bind(skip_constant);
 940 #endif // AARCH64
 941 }
 942 
 943 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
 944   Label skip_constant;
 945   union {
 946     jdouble d;
 947     jint i[2];
 948   } accessor;
 949   accessor.d = c;
 950 
 951 #ifdef AARCH64
 952   // TODO-AARCH64 - try to optimize loading of double constants with fmov
 953   Label L;
 954   ldr_d(fd, target(L));
 955   b(skip_constant);
 956   align(wordSize);
 957   bind(L);
 958   emit_int32(accessor.i[0]);
 959   emit_int32(accessor.i[1]);
 960   bind(skip_constant);
 961 #else
 962   fldd(fd, Address(PC), cond);
 963   b(skip_constant);
 964   emit_int32(accessor.i[0]);
 965   emit_int32(accessor.i[1]);
 966   bind(skip_constant);
 967 #endif // AARCH64
 968 }
 969 
 970 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
 971   intptr_t addr = (intptr_t) address_of_global;
 972 #ifdef AARCH64
 973   assert((addr & 0x3) == 0, "address should be aligned");
 974 
 975   // FIXME: TODO
 976   if (false && page_reachable_from_cache(address_of_global)) {
 977     assert(false,"TODO: relocate");
 978     //relocate();
 979     adrp(reg, address_of_global);
 980     ldrsw(reg, Address(reg, addr & 0xfff));
 981   } else {
 982     mov_slow(reg, addr & ~0x3fff);
 983     ldrsw(reg, Address(reg, addr & 0x3fff));
 984   }
 985 #else
 986   mov_slow(reg, addr & ~0xfff);
 987   ldr(reg, Address(reg, addr & 0xfff));
 988 #endif
 989 }
 990 
 991 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
 992 #ifdef AARCH64
 993   intptr_t addr = (intptr_t) address_of_global;
 994   assert ((addr & 0x7) == 0, "address should be aligned");
 995   mov_slow(reg, addr & ~0x7fff);
 996   ldr(reg, Address(reg, addr & 0x7fff));
 997 #else
 998   ldr_global_s32(reg, address_of_global);
 999 #endif
1000 }
1001 
1002 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
1003   intptr_t addr = (intptr_t) address_of_global;
1004   mov_slow(reg, addr & ~0xfff);
1005   ldrb(reg, Address(reg, addr & 0xfff));
1006 }
1007 
1008 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
1009 #ifdef AARCH64
1010   switch (bits) {
1011     case  8: uxtb(rd, rn); break;
1012     case 16: uxth(rd, rn); break;
1013     case 32: mov_w(rd, rn); break;
1014     default: ShouldNotReachHere();
1015   }
1016 #else
1017   if (bits <= 8) {
1018     andr(rd, rn, (1 << bits) - 1);
1019   } else if (bits >= 24) {
1020     bic(rd, rn, -1 << bits);
1021   } else {
1022     mov(rd, AsmOperand(rn, lsl, 32 - bits));
1023     mov(rd, AsmOperand(rd, lsr, 32 - bits));
1024   }
1025 #endif
1026 }
1027 
1028 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
1029 #ifdef AARCH64
1030   switch (bits) {
1031     case  8: sxtb(rd, rn); break;
1032     case 16: sxth(rd, rn); break;
1033     case 32: sxtw(rd, rn); break;
1034     default: ShouldNotReachHere();
1035   }
1036 #else
1037   mov(rd, AsmOperand(rn, lsl, 32 - bits));
1038   mov(rd, AsmOperand(rd, asr, 32 - bits));
1039 #endif
1040 }
1041 
1042 #ifndef AARCH64
1043 
1044 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
1045                                Register rn_lo, Register rn_hi,
1046                                AsmCondition cond) {
1047   if (rd_lo != rn_hi) {
1048     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1049     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1050   } else if (rd_hi != rn_lo) {
1051     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1052     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1053   } else {
1054     eor(rd_lo, rd_hi, rd_lo, cond);
1055     eor(rd_hi, rd_lo, rd_hi, cond);
1056     eor(rd_lo, rd_hi, rd_lo, cond);
1057   }
1058 }
1059 
1060 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1061                                 Register rn_lo, Register rn_hi,
1062                                 AsmShift shift, Register count) {
1063   Register tmp;
1064   if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
1065     tmp = rd_lo;
1066   } else {
1067     tmp = rd_hi;
1068   }
1069   assert_different_registers(tmp, count, rn_lo, rn_hi);
1070 
1071   subs(tmp, count, 32);
1072   if (shift == lsl) {
1073     assert_different_registers(rd_hi, rn_lo);
1074     assert_different_registers(count, rd_hi);
1075     mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
1076     rsb(tmp, count, 32, mi);
1077     if (rd_hi == rn_hi) {
1078       mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1079       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1080     } else {
1081       mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1082       orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1083     }
1084     mov(rd_lo, AsmOperand(rn_lo, shift, count));
1085   } else {
1086     assert_different_registers(rd_lo, rn_hi);
1087     assert_different_registers(rd_lo, count);
1088     mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
1089     rsb(tmp, count, 32, mi);
1090     if (rd_lo == rn_lo) {
1091       mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1092       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1093     } else {
1094       mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1095       orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1096     }
1097     mov(rd_hi, AsmOperand(rn_hi, shift, count));
1098   }
1099 }
1100 
1101 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1102                                 Register rn_lo, Register rn_hi,
1103                                 AsmShift shift, int count) {
1104   assert(count != 0 && (count & ~63) == 0, "must be");
1105 
1106   if (shift == lsl) {
1107     assert_different_registers(rd_hi, rn_lo);
1108     if (count >= 32) {
1109       mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
1110       mov(rd_lo, 0);
1111     } else {
1112       mov(rd_hi, AsmOperand(rn_hi, lsl, count));
1113       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
1114       mov(rd_lo, AsmOperand(rn_lo, lsl, count));
1115     }
1116   } else {
1117     assert_different_registers(rd_lo, rn_hi);
1118     if (count >= 32) {
1119       if (count == 32) {
1120         mov(rd_lo, rn_hi);
1121       } else {
1122         mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
1123       }
1124       if (shift == asr) {
1125         mov(rd_hi, AsmOperand(rn_hi, asr, 0));
1126       } else {
1127         mov(rd_hi, 0);
1128       }
1129     } else {
1130       mov(rd_lo, AsmOperand(rn_lo, lsr, count));
1131       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
1132       mov(rd_hi, AsmOperand(rn_hi, shift, count));
1133     }
1134   }
1135 }
1136 #endif // !AARCH64
1137 
1138 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
1139   // This code pattern is matched in NativeIntruction::skip_verify_oop.
1140   // Update it at modifications.
1141   if (!VerifyOops) return;
1142 
1143   char buffer[64];
1144 #ifdef COMPILER1
1145   if (CommentedAssembly) {
1146     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1147     block_comment(buffer);
1148   }
1149 #endif
1150   const char* msg_buffer = NULL;
1151   {
1152     ResourceMark rm;
1153     stringStream ss;
1154     ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
1155     msg_buffer = code_string(ss.as_string());
1156   }
1157 
1158   save_all_registers();
1159 
1160   if (reg != R2) {
1161       mov(R2, reg);                              // oop to verify
1162   }
1163   mov(R1, SP);                                   // register save area
1164 
1165   Label done;
1166   InlinedString Lmsg(msg_buffer);
1167   ldr_literal(R0, Lmsg);                         // message
1168 
1169   // call indirectly to solve generation ordering problem
1170   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1171   call(Rtemp);
1172 
1173   restore_all_registers();
1174 
1175   b(done);
1176 #ifdef COMPILER2
1177   int off = offset();
1178 #endif
1179   bind_literal(Lmsg);
1180 #ifdef COMPILER2
1181   if (offset() - off == 1 * wordSize) {
1182     // no padding, so insert nop for worst-case sizing
1183     nop();
1184   }
1185 #endif
1186   bind(done);
1187 }
1188 
1189 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
1190   if (!VerifyOops) return;
1191 
1192   const char* msg_buffer = NULL;
1193   {
1194     ResourceMark rm;
1195     stringStream ss;
1196     if ((addr.base() == SP) && (addr.index()==noreg)) {
1197       ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
1198     } else {
1199       ss.print("verify_oop_addr: %s", s);
1200     }
1201     ss.print(" (%s:%d)", file, line);
1202     msg_buffer = code_string(ss.as_string());
1203   }
1204 
1205   int push_size = save_all_registers();
1206 
1207   if (addr.base() == SP) {
1208     // computes an addr that takes into account the push
1209     if (addr.index() != noreg) {
1210       Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
1211       add(new_base, SP, push_size);
1212       addr = addr.rebase(new_base);
1213     } else {
1214       addr = addr.plus_disp(push_size);
1215     }
1216   }
1217 
1218   ldr(R2, addr);                                 // oop to verify
1219   mov(R1, SP);                                   // register save area
1220 
1221   Label done;
1222   InlinedString Lmsg(msg_buffer);
1223   ldr_literal(R0, Lmsg);                         // message
1224 
1225   // call indirectly to solve generation ordering problem
1226   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1227   call(Rtemp);
1228 
1229   restore_all_registers();
1230 
1231   b(done);
1232   bind_literal(Lmsg);
1233   bind(done);
1234 }
1235 
1236 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
1237   if (needs_explicit_null_check(offset)) {
1238 #ifdef AARCH64
1239     ldr(ZR, Address(reg));
1240 #else
1241     assert_different_registers(reg, tmp);
1242     if (tmp == noreg) {
1243       tmp = Rtemp;
1244       assert((! Thread::current()->is_Compiler_thread()) ||
1245              (! (ciEnv::current()->task() == NULL)) ||
1246              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1247              "Rtemp not available in C2"); // explicit tmp register required
1248       // XXX: could we mark the code buffer as not compatible with C2 ?
1249     }
1250     ldr(tmp, Address(reg));
1251 #endif
1252   }
1253 }
1254 
1255 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1256 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1257                                  RegisterOrConstant size_expression, Label& slow_case) {
1258   if (!Universe::heap()->supports_inline_contig_alloc()) {
1259     b(slow_case);
1260     return;
1261   }
1262 
1263   CollectedHeap* ch = Universe::heap();
1264 
1265   const Register top_addr = tmp1;
1266   const Register heap_end = tmp2;
1267 
1268   if (size_expression.is_register()) {
1269     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
1270   } else {
1271     assert_different_registers(obj, obj_end, top_addr, heap_end);
1272   }
1273 
1274   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
1275   if (load_const) {
1276     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
1277   } else {
1278     ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
1279   }
1280   // Calculate new heap_top by adding the size of the object
1281   Label retry;
1282   bind(retry);
1283 
1284 #ifdef AARCH64
1285   ldxr(obj, top_addr);
1286 #else
1287   ldr(obj, Address(top_addr));
1288 #endif // AARCH64
1289 
1290   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
1291   add_rc(obj_end, obj, size_expression);
1292   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
1293   cmp(obj_end, obj);
1294   b(slow_case, lo);
1295   // Update heap_top if allocation succeeded
1296   cmp(obj_end, heap_end);
1297   b(slow_case, hi);
1298 
1299 #ifdef AARCH64
1300   stxr(heap_end/*scratched*/, obj_end, top_addr);
1301   cbnz_w(heap_end, retry);
1302 #else
1303   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
1304   b(retry, ne);
1305 #endif // AARCH64
1306 }
1307 
1308 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1309 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1310                                  RegisterOrConstant size_expression, Label& slow_case) {
1311   const Register tlab_end = tmp1;
1312   assert_different_registers(obj, obj_end, tlab_end);
1313 
1314   ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1315   ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
1316   add_rc(obj_end, obj, size_expression);
1317   cmp(obj_end, tlab_end);
1318   b(slow_case, hi);
1319   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1320 }
1321 
1322 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
1323 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1324   Label loop;
1325   const Register ptr = start;
1326 
1327 #ifdef AARCH64
1328   // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1329   const Register size = tmp;
1330   Label remaining, done;
1331 
1332   sub(size, end, start);
1333 
1334 #ifdef ASSERT
1335   { Label L;
1336     tst(size, wordSize - 1);
1337     b(L, eq);
1338     stop("size is not a multiple of wordSize");
1339     bind(L);
1340   }
1341 #endif // ASSERT
1342 
1343   subs(size, size, wordSize);
1344   b(remaining, le);
1345 
1346   // Zero by 2 words per iteration.
1347   bind(loop);
1348   subs(size, size, 2*wordSize);
1349   stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1350   b(loop, gt);
1351 
1352   bind(remaining);
1353   b(done, ne);
1354   str(ZR, Address(ptr));
1355   bind(done);
1356 #else
1357   mov(tmp, 0);
1358   bind(loop);
1359   cmp(ptr, end);
1360   str(tmp, Address(ptr, wordSize, post_indexed), lo);
1361   b(loop, lo);
1362 #endif // AARCH64
1363 }
1364 
1365 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1366 #ifdef AARCH64
1367   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1368   add_rc(tmp, tmp, size_in_bytes);
1369   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1370 #else
1371   // Bump total bytes allocated by this thread
1372   Label done;
1373 
1374   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1375   adds(tmp, tmp, size_in_bytes);
1376   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc);
1377   b(done, cc);
1378 
1379   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1380   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1381   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1382   Register low, high;
1383   // Select ether R0/R1 or R2/R3
1384 
1385   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1386     low = R2;
1387     high  = R3;
1388   } else {
1389     low = R0;
1390     high  = R1;
1391   }
1392   push(RegisterSet(low, high));
1393 
1394   ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1395   adds(low, low, size_in_bytes);
1396   adc(high, high, 0);
1397   strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1398 
1399   pop(RegisterSet(low, high));
1400 
1401   bind(done);
1402 #endif // AARCH64
1403 }
1404 
1405 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1406   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1407   if (UseStackBanging) {
1408     const int page_size = os::vm_page_size();
1409 
1410     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1411     strb(R0, Address(tmp));
1412 #ifdef AARCH64
1413     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1414       sub(tmp, tmp, page_size);
1415       strb(R0, Address(tmp));
1416     }
1417 #else
1418     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1419       strb(R0, Address(tmp, -0xff0, pre_indexed));
1420     }
1421 #endif // AARCH64
1422   }
1423 }
1424 
1425 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
1426   if (UseStackBanging) {
1427     Label loop;
1428 
1429     mov(tmp, SP);
1430     add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
1431 #ifdef AARCH64
1432     sub(tmp, tmp, Rsize);
1433     bind(loop);
1434     subs(Rsize, Rsize, os::vm_page_size());
1435     strb(ZR, Address(tmp, Rsize));
1436 #else
1437     bind(loop);
1438     subs(Rsize, Rsize, 0xff0);
1439     strb(R0, Address(tmp, -0xff0, pre_indexed));
1440 #endif // AARCH64
1441     b(loop, hi);
1442   }
1443 }
1444 
1445 void MacroAssembler::stop(const char* msg) {
1446   // This code pattern is matched in NativeIntruction::is_stop.
1447   // Update it at modifications.
1448 #ifdef COMPILER1
1449   if (CommentedAssembly) {
1450     block_comment("stop");
1451   }
1452 #endif
1453 
1454   InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1455   InlinedString Lmsg(msg);
1456 
1457   // save all registers for further inspection
1458   save_all_registers();
1459 
1460   ldr_literal(R0, Lmsg);                     // message
1461   mov(R1, SP);                               // register save area
1462 
1463 #ifdef AARCH64
1464   ldr_literal(Rtemp, Ldebug);
1465   br(Rtemp);
1466 #else
1467   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
1468 #endif // AARCH64
1469 
1470 #if defined(COMPILER2) && defined(AARCH64)
1471   int off = offset();
1472 #endif
1473   bind_literal(Lmsg);
1474   bind_literal(Ldebug);
1475 #if defined(COMPILER2) && defined(AARCH64)
1476   if (offset() - off == 2 * wordSize) {
1477     // no padding, so insert nop for worst-case sizing
1478     nop();
1479   }
1480 #endif
1481 }
1482 
1483 void MacroAssembler::warn(const char* msg) {
1484 #ifdef COMPILER1
1485   if (CommentedAssembly) {
1486     block_comment("warn");
1487   }
1488 #endif
1489 
1490   InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1491   InlinedString Lmsg(msg);
1492   Label done;
1493 
1494   int push_size = save_caller_save_registers();
1495 
1496 #ifdef AARCH64
1497   // TODO-AARCH64 - get rid of extra debug parameters
1498   mov(R1, LR);
1499   mov(R2, FP);
1500   add(R3, SP, push_size);
1501 #endif
1502 
1503   ldr_literal(R0, Lmsg);                    // message
1504   ldr_literal(LR, Lwarn);                   // call warning
1505 
1506   call(LR);
1507 
1508   restore_caller_save_registers();
1509 
1510   b(done);
1511   bind_literal(Lmsg);
1512   bind_literal(Lwarn);
1513   bind(done);
1514 }
1515 
1516 
1517 int MacroAssembler::save_all_registers() {
1518   // This code pattern is matched in NativeIntruction::is_save_all_registers.
1519   // Update it at modifications.
1520 #ifdef AARCH64
1521   const Register tmp = Rtemp;
1522   raw_push(R30, ZR);
1523   for (int i = 28; i >= 0; i -= 2) {
1524       raw_push(as_Register(i), as_Register(i+1));
1525   }
1526   mov_pc_to(tmp);
1527   str(tmp, Address(SP, 31*wordSize));
1528   ldr(tmp, Address(SP, tmp->encoding()*wordSize));
1529   return 32*wordSize;
1530 #else
1531   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1532   return 15*wordSize;
1533 #endif // AARCH64
1534 }
1535 
1536 void MacroAssembler::restore_all_registers() {
1537 #ifdef AARCH64
1538   for (int i = 0; i <= 28; i += 2) {
1539     raw_pop(as_Register(i), as_Register(i+1));
1540   }
1541   raw_pop(R30, ZR);
1542 #else
1543   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
1544   add(SP, SP, wordSize);                         // discard saved PC
1545 #endif // AARCH64
1546 }
1547 
1548 int MacroAssembler::save_caller_save_registers() {
1549 #ifdef AARCH64
1550   for (int i = 0; i <= 16; i += 2) {
1551     raw_push(as_Register(i), as_Register(i+1));
1552   }
1553   raw_push(R18, LR);
1554   return 20*wordSize;
1555 #else
1556 #if R9_IS_SCRATCHED
1557   // Save also R10 to preserve alignment
1558   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1559   return 8*wordSize;
1560 #else
1561   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1562   return 6*wordSize;
1563 #endif
1564 #endif // AARCH64
1565 }
1566 
1567 void MacroAssembler::restore_caller_save_registers() {
1568 #ifdef AARCH64
1569   raw_pop(R18, LR);
1570   for (int i = 16; i >= 0; i -= 2) {
1571     raw_pop(as_Register(i), as_Register(i+1));
1572   }
1573 #else
1574 #if R9_IS_SCRATCHED
1575   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1576 #else
1577   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1578 #endif
1579 #endif // AARCH64
1580 }
1581 
1582 void MacroAssembler::debug(const char* msg, const intx* registers) {
1583   // In order to get locks to work, we need to fake a in_VM state
1584   JavaThread* thread = JavaThread::current();
1585   thread->set_thread_state(_thread_in_vm);
1586 
1587   if (ShowMessageBoxOnError) {
1588     ttyLocker ttyl;
1589     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1590       BytecodeCounter::print();
1591     }
1592     if (os::message_box(msg, "Execution stopped, print registers?")) {
1593 #ifdef AARCH64
1594       // saved registers: R0-R30, PC
1595       const int nregs = 32;
1596 #else
1597       // saved registers: R0-R12, LR, PC
1598       const int nregs = 15;
1599       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1600 #endif // AARCH64
1601 
1602       for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
1603         tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
1604       }
1605 
1606 #ifdef AARCH64
1607       tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
1608 #endif // AARCH64
1609 
1610       // derive original SP value from the address of register save area
1611       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
1612     }
1613     BREAKPOINT;
1614   } else {
1615     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1616   }
1617   assert(false, "DEBUG MESSAGE: %s", msg);
1618   fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1619 }
1620 
1621 void MacroAssembler::unimplemented(const char* what) {
1622   const char* buf = NULL;
1623   {
1624     ResourceMark rm;
1625     stringStream ss;
1626     ss.print("unimplemented: %s", what);
1627     buf = code_string(ss.as_string());
1628   }
1629   stop(buf);
1630 }
1631 
1632 
1633 // Implementation of FixedSizeCodeBlock
1634 
1635 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1636 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1637 }
1638 
1639 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1640   if (_enabled) {
1641     address curr_pc = _masm->pc();
1642 
1643     assert(_start < curr_pc, "invalid current pc");
1644     guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1645 
1646     int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1647     for (int i = 0; i < nops_count; i++) {
1648       _masm->nop();
1649     }
1650   }
1651 }
1652 
1653 #ifdef AARCH64
1654 
1655 // Serializes memory.
1656 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
1657 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
1658   if (!os::is_MP()) return;
1659 
1660   // TODO-AARCH64 investigate dsb vs dmb effects
1661   if (order_constraint == StoreStore) {
1662     dmb(DMB_st);
1663   } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
1664     dmb(DMB_ld);
1665   } else {
1666     dmb(DMB_all);
1667   }
1668 }
1669 
1670 #else
1671 
1672 // Serializes memory. Potentially blows flags and reg.
1673 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
1674 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1675 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
1676 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1677                             Register tmp,
1678                             bool preserve_flags,
1679                             Register load_tgt) {
1680   if (!os::is_MP()) return;
1681 
1682   if (order_constraint == StoreStore) {
1683     dmb(DMB_st, tmp);
1684   } else if ((order_constraint & StoreLoad)  ||
1685              (order_constraint & LoadLoad)   ||
1686              (order_constraint & StoreStore) ||
1687              (load_tgt == noreg)             ||
1688              preserve_flags) {
1689     dmb(DMB_all, tmp);
1690   } else {
1691     // LoadStore: speculative stores reordeing is prohibited
1692 
1693     // By providing an ordered load target register, we avoid an extra memory load reference
1694     Label not_taken;
1695     bind(not_taken);
1696     cmp(load_tgt, load_tgt);
1697     b(not_taken, ne);
1698   }
1699 }
1700 
1701 #endif // AARCH64
1702 
1703 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1704 // on failure, so fall-through can only mean success.
1705 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1706 // This is only needed for C2, which for some reason does not rety,
1707 // while C1/interpreter does.
1708 // TODO: measure if it makes a difference
1709 
1710 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1711   Register base, Register tmp, Label &slow_case,
1712   bool allow_fallthrough_on_failure, bool one_shot)
1713 {
1714 
1715   bool fallthrough_is_success = false;
1716 
1717   // ARM Litmus Test example does prefetching here.
1718   // TODO: investigate if it helps performance
1719 
1720   // The last store was to the displaced header, so to prevent
1721   // reordering we must issue a StoreStore or Release barrier before
1722   // the CAS store.
1723 
1724 #ifdef AARCH64
1725 
1726   Register Rscratch = tmp;
1727   Register Roop = base;
1728   Register mark = oldval;
1729   Register Rbox = newval;
1730   Label loop;
1731 
1732   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1733 
1734   // Instead of StoreStore here, we use store-release-exclusive below
1735 
1736   bind(loop);
1737 
1738   ldaxr(tmp, base);  // acquire
1739   cmp(tmp, oldval);
1740   b(slow_case, ne);
1741   stlxr(tmp, newval, base); // release
1742   if (one_shot) {
1743     cmp_w(tmp, 0);
1744   } else {
1745     cbnz_w(tmp, loop);
1746     fallthrough_is_success = true;
1747   }
1748 
1749   // MemBarAcquireLock would normally go here, but
1750   // we already do ldaxr+stlxr above, which has
1751   // Sequential Consistency
1752 
1753 #else
1754   membar(MacroAssembler::StoreStore, noreg);
1755 
1756   if (one_shot) {
1757     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1758     cmp(tmp, oldval);
1759     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1760     cmp(tmp, 0, eq);
1761   } else {
1762     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1763   }
1764 
1765   // MemBarAcquireLock barrier
1766   // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1767   // but that doesn't prevent a load or store from floating up between
1768   // the load and store in the CAS sequence, so play it safe and
1769   // do a full fence.
1770   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1771 #endif
1772   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1773     b(slow_case, ne);
1774   }
1775 }
1776 
1777 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1778   Register base, Register tmp, Label &slow_case,
1779   bool allow_fallthrough_on_failure, bool one_shot)
1780 {
1781 
1782   bool fallthrough_is_success = false;
1783 
1784   assert_different_registers(oldval,newval,base,tmp);
1785 
1786 #ifdef AARCH64
1787   Label loop;
1788 
1789   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1790 
1791   bind(loop);
1792   ldxr(tmp, base);
1793   cmp(tmp, oldval);
1794   b(slow_case, ne);
1795   // MemBarReleaseLock barrier
1796   stlxr(tmp, newval, base);
1797   if (one_shot) {
1798     cmp_w(tmp, 0);
1799   } else {
1800     cbnz_w(tmp, loop);
1801     fallthrough_is_success = true;
1802   }
1803 #else
1804   // MemBarReleaseLock barrier
1805   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1806   // but that doesn't prevent a load or store from floating down between
1807   // the load and store in the CAS sequence, so play it safe and
1808   // do a full fence.
1809   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1810 
1811   if (one_shot) {
1812     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1813     cmp(tmp, oldval);
1814     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1815     cmp(tmp, 0, eq);
1816   } else {
1817     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1818   }
1819 #endif
1820   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1821     b(slow_case, ne);
1822   }
1823 
1824   // ExitEnter
1825   // According to JSR-133 Cookbook, this should be StoreLoad, the same
1826   // barrier that follows volatile store.
1827   // TODO: Should be able to remove on armv8 if volatile loads
1828   // use the load-acquire instruction.
1829   membar(StoreLoad, noreg);
1830 }
1831 
1832 #ifndef PRODUCT
1833 
1834 // Preserves flags and all registers.
1835 // On SMP the updated value might not be visible to external observers without a sychronization barrier
1836 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1837   if (counter_addr != NULL) {
1838     InlinedAddress counter_addr_literal((address)counter_addr);
1839     Label done, retry;
1840     if (cond != al) {
1841       b(done, inverse(cond));
1842     }
1843 
1844 #ifdef AARCH64
1845     raw_push(R0, R1);
1846     raw_push(R2, ZR);
1847 
1848     ldr_literal(R0, counter_addr_literal);
1849 
1850     bind(retry);
1851     ldxr_w(R1, R0);
1852     add_w(R1, R1, 1);
1853     stxr_w(R2, R1, R0);
1854     cbnz_w(R2, retry);
1855 
1856     raw_pop(R2, ZR);
1857     raw_pop(R0, R1);
1858 #else
1859     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1860     ldr_literal(R0, counter_addr_literal);
1861 
1862     mrs(CPSR, Rtemp);
1863 
1864     bind(retry);
1865     ldr_s32(R1, Address(R0));
1866     add(R2, R1, 1);
1867     atomic_cas_bool(R1, R2, R0, 0, R3);
1868     b(retry, ne);
1869 
1870     msr(CPSR_fsxc, Rtemp);
1871 
1872     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1873 #endif // AARCH64
1874 
1875     b(done);
1876     bind_literal(counter_addr_literal);
1877 
1878     bind(done);
1879   }
1880 }
1881 
1882 #endif // !PRODUCT
1883 
1884 
1885 // Building block for CAS cases of biased locking: makes CAS and records statistics.
1886 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
1887 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
1888                                                  Register tmp, Label& slow_case, int* counter_addr) {
1889 
1890   cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
1891 #ifdef ASSERT
1892   breakpoint(ne); // Fallthrough only on success
1893 #endif
1894 #ifndef PRODUCT
1895   if (counter_addr != NULL) {
1896     cond_atomic_inc32(al, counter_addr);
1897   }
1898 #endif // !PRODUCT
1899 }
1900 
1901 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
1902                                          bool swap_reg_contains_mark,
1903                                          Register tmp2,
1904                                          Label& done, Label& slow_case,
1905                                          BiasedLockingCounters* counters) {
1906   // obj_reg must be preserved (at least) if the bias locking fails
1907   // tmp_reg is a temporary register
1908   // swap_reg was used as a temporary but contained a value
1909   //   that was used afterwards in some call pathes. Callers
1910   //   have been fixed so that swap_reg no longer needs to be
1911   //   saved.
1912   // Rtemp in no longer scratched
1913 
1914   assert(UseBiasedLocking, "why call this otherwise?");
1915   assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
1916   guarantee(swap_reg!=tmp_reg, "invariant");
1917   assert(tmp_reg != noreg, "must supply tmp_reg");
1918 
1919 #ifndef PRODUCT
1920   if (PrintBiasedLockingStatistics && (counters == NULL)) {
1921     counters = BiasedLocking::counters();
1922   }
1923 #endif
1924 
1925   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1926   Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
1927 
1928   // Biased locking
1929   // See whether the lock is currently biased toward our thread and
1930   // whether the epoch is still valid
1931   // Note that the runtime guarantees sufficient alignment of JavaThread
1932   // pointers to allow age to be placed into low bits
1933   // First check to see whether biasing is even enabled for this object
1934   Label cas_label;
1935 
1936   // The null check applies to the mark loading, if we need to load it.
1937   // If the mark has already been loaded in swap_reg then it has already
1938   // been performed and the offset is irrelevant.
1939   int null_check_offset = offset();
1940   if (!swap_reg_contains_mark) {
1941     ldr(swap_reg, mark_addr);
1942   }
1943 
1944   // On MP platform loads could return 'stale' values in some cases.
1945   // That is acceptable since either CAS or slow case path is taken in the worst case.
1946 
1947   andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1948   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
1949 
1950   b(cas_label, ne);
1951 
1952   // The bias pattern is present in the object's header. Need to check
1953   // whether the bias owner and the epoch are both still current.
1954   load_klass(tmp_reg, obj_reg);
1955   ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
1956   orr(tmp_reg, tmp_reg, Rthread);
1957   eor(tmp_reg, tmp_reg, swap_reg);
1958 
1959 #ifdef AARCH64
1960   ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
1961 #else
1962   bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
1963 #endif // AARCH64
1964 
1965 #ifndef PRODUCT
1966   if (counters != NULL) {
1967     cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
1968   }
1969 #endif // !PRODUCT
1970 
1971   b(done, eq);
1972 
1973   Label try_revoke_bias;
1974   Label try_rebias;
1975 
1976   // At this point we know that the header has the bias pattern and
1977   // that we are not the bias owner in the current epoch. We need to
1978   // figure out more details about the state of the header in order to
1979   // know what operations can be legally performed on the object's
1980   // header.
1981 
1982   // If the low three bits in the xor result aren't clear, that means
1983   // the prototype header is no longer biased and we have to revoke
1984   // the bias on this object.
1985   tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1986   b(try_revoke_bias, ne);
1987 
1988   // Biasing is still enabled for this data type. See whether the
1989   // epoch of the current bias is still valid, meaning that the epoch
1990   // bits of the mark word are equal to the epoch bits of the
1991   // prototype header. (Note that the prototype header's epoch bits
1992   // only change at a safepoint.) If not, attempt to rebias the object
1993   // toward the current thread. Note that we must be absolutely sure
1994   // that the current epoch is invalid in order to do this because
1995   // otherwise the manipulations it performs on the mark word are
1996   // illegal.
1997   tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
1998   b(try_rebias, ne);
1999 
2000   // tmp_reg has the age, epoch and pattern bits cleared
2001   // The remaining (owner) bits are (Thread ^ current_owner)
2002 
2003   // The epoch of the current bias is still valid but we know nothing
2004   // about the owner; it might be set or it might be clear. Try to
2005   // acquire the bias of the object using an atomic operation. If this
2006   // fails we will go in to the runtime to revoke the object's bias.
2007   // Note that we first construct the presumed unbiased header so we
2008   // don't accidentally blow away another thread's valid bias.
2009 
2010   // Note that we know the owner is not ourself. Hence, success can
2011   // only happen when the owner bits is 0
2012 
2013 #ifdef AARCH64
2014   // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
2015   // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
2016   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2017   andr(swap_reg, swap_reg, tmp2);
2018 #else
2019   // until the assembler can be made smarter, we need to make some assumptions about the values
2020   // so we can optimize this:
2021   assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
2022 
2023   mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
2024   mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
2025 #endif // AARCH64
2026 
2027   orr(tmp_reg, swap_reg, Rthread); // new mark
2028 
2029   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2030         (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
2031 
2032   // If the biasing toward our thread failed, this means that
2033   // another thread succeeded in biasing it toward itself and we
2034   // need to revoke that bias. The revocation will occur in the
2035   // interpreter runtime in the slow case.
2036 
2037   b(done);
2038 
2039   bind(try_rebias);
2040 
2041   // At this point we know the epoch has expired, meaning that the
2042   // current "bias owner", if any, is actually invalid. Under these
2043   // circumstances _only_, we are allowed to use the current header's
2044   // value as the comparison value when doing the cas to acquire the
2045   // bias in the current epoch. In other words, we allow transfer of
2046   // the bias from one thread to another directly in this situation.
2047 
2048   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2049 
2050   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2051 
2052   // owner bits 'random'. Set them to Rthread.
2053 #ifdef AARCH64
2054   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2055   andr(tmp_reg, tmp_reg, tmp2);
2056 #else
2057   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2058   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2059 #endif // AARCH64
2060 
2061   orr(tmp_reg, tmp_reg, Rthread); // new mark
2062 
2063   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2064         (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
2065 
2066   // If the biasing toward our thread failed, then another thread
2067   // succeeded in biasing it toward itself and we need to revoke that
2068   // bias. The revocation will occur in the runtime in the slow case.
2069 
2070   b(done);
2071 
2072   bind(try_revoke_bias);
2073 
2074   // The prototype mark in the klass doesn't have the bias bit set any
2075   // more, indicating that objects of this data type are not supposed
2076   // to be biased any more. We are going to try to reset the mark of
2077   // this object to the prototype value and fall through to the
2078   // CAS-based locking scheme. Note that if our CAS fails, it means
2079   // that another thread raced us for the privilege of revoking the
2080   // bias of this particular object, so it's okay to continue in the
2081   // normal locking code.
2082 
2083   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2084 
2085   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2086 
2087   // owner bits 'random'. Clear them
2088 #ifdef AARCH64
2089   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2090   andr(tmp_reg, tmp_reg, tmp2);
2091 #else
2092   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2093   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2094 #endif // AARCH64
2095 
2096   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
2097         (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
2098 
2099   // Fall through to the normal CAS-based lock, because no matter what
2100   // the result of the above CAS, some thread must have succeeded in
2101   // removing the bias bit from the object's header.
2102 
2103   bind(cas_label);
2104 
2105   return null_check_offset;
2106 }
2107 
2108 
2109 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
2110   assert(UseBiasedLocking, "why call this otherwise?");
2111 
2112   // Check for biased locking unlock case, which is a no-op
2113   // Note: we do not have to check the thread ID for two reasons.
2114   // First, the interpreter checks for IllegalMonitorStateException at
2115   // a higher level. Second, if the bias was revoked while we held the
2116   // lock, the object could not be rebiased toward another thread, so
2117   // the bias bit would be clear.
2118   ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2119 
2120   andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2121   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
2122   b(done, eq);
2123 }
2124 
2125 
2126 void MacroAssembler::resolve_jobject(Register value,
2127                                      Register tmp1,
2128                                      Register tmp2) {
2129   assert_different_registers(value, tmp1, tmp2);
2130   Label done, not_weak;
2131   cbz(value, done);             // Use NULL as-is.
2132   STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
2133   tbz(value, 0, not_weak);      // Test for jweak tag.
2134   // Resolve jweak.
2135   ldr(value, Address(value, -JNIHandles::weak_tag_value));
2136   verify_oop(value);
2137 #if INCLUDE_G1GC
2138   if (UseG1GC) {
2139     g1_write_barrier_pre(noreg, // store_addr
2140                          noreg, // new_val
2141                          value, // pre_val
2142                          tmp1,  // tmp1
2143                          tmp2); // tmp2
2144     }
2145 #endif // INCLUDE_G1GC
2146   b(done);
2147   bind(not_weak);
2148   // Resolve (untagged) jobject.
2149   ldr(value, Address(value));
2150   verify_oop(value);
2151   bind(done);
2152 }
2153 
2154 
2155 //////////////////////////////////////////////////////////////////////////////////
2156 
2157 #if INCLUDE_G1GC
2158 
2159 // G1 pre-barrier.
2160 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
2161 // If store_addr != noreg, then previous value is loaded from [store_addr];
2162 // in such case store_addr and new_val registers are preserved;
2163 // otherwise pre_val register is preserved.
2164 void MacroAssembler::g1_write_barrier_pre(Register store_addr,
2165                                           Register new_val,
2166                                           Register pre_val,
2167                                           Register tmp1,
2168                                           Register tmp2) {
2169   Label done;
2170   Label runtime;
2171 
2172   if (store_addr != noreg) {
2173     assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg);
2174   } else {
2175     assert (new_val == noreg, "should be");
2176     assert_different_registers(pre_val, tmp1, tmp2, noreg);
2177   }
2178 
2179   Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
2180   Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
2181   Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
2182 
2183   // Is marking active?
2184   assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code");
2185   ldrb(tmp1, in_progress);
2186   cbz(tmp1, done);
2187 
2188   // Do we need to load the previous value?
2189   if (store_addr != noreg) {
2190     load_heap_oop(pre_val, Address(store_addr, 0));
2191   }
2192 
2193   // Is the previous value null?
2194   cbz(pre_val, done);
2195 
2196   // Can we store original value in the thread's buffer?
2197   // Is index == 0?
2198   // (The index field is typed as size_t.)
2199 
2200   ldr(tmp1, index);           // tmp1 := *index_adr
2201   ldr(tmp2, buffer);
2202 
2203   subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize
2204   b(runtime, lt);             // If negative, goto runtime
2205 
2206   str(tmp1, index);           // *index_adr := tmp1
2207 
2208   // Record the previous value
2209   str(pre_val, Address(tmp2, tmp1));
2210   b(done);
2211 
2212   bind(runtime);
2213 
2214   // save the live input values
2215 #ifdef AARCH64
2216   if (store_addr != noreg) {
2217     raw_push(store_addr, new_val);
2218   } else {
2219     raw_push(pre_val, ZR);
2220   }
2221 #else
2222   if (store_addr != noreg) {
2223     // avoid raw_push to support any ordering of store_addr and new_val
2224     push(RegisterSet(store_addr) | RegisterSet(new_val));
2225   } else {
2226     push(pre_val);
2227   }
2228 #endif // AARCH64
2229 
2230   if (pre_val != R0) {
2231     mov(R0, pre_val);
2232   }
2233   mov(R1, Rthread);
2234 
2235   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1);
2236 
2237 #ifdef AARCH64
2238   if (store_addr != noreg) {
2239     raw_pop(store_addr, new_val);
2240   } else {
2241     raw_pop(pre_val, ZR);
2242   }
2243 #else
2244   if (store_addr != noreg) {
2245     pop(RegisterSet(store_addr) | RegisterSet(new_val));
2246   } else {
2247     pop(pre_val);
2248   }
2249 #endif // AARCH64
2250 
2251   bind(done);
2252 }
2253 
2254 // G1 post-barrier.
2255 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
2256 void MacroAssembler::g1_write_barrier_post(Register store_addr,
2257                                            Register new_val,
2258                                            Register tmp1,
2259                                            Register tmp2,
2260                                            Register tmp3) {
2261 
2262   Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
2263   Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
2264 
2265   BarrierSet* bs = BarrierSet::barrier_set();
2266   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
2267   CardTable* ct = ctbs->card_table();
2268   Label done;
2269   Label runtime;
2270 
2271   // Does store cross heap regions?
2272 
2273   eor(tmp1, store_addr, new_val);
2274 #ifdef AARCH64
2275   logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
2276   cbz(tmp1, done);
2277 #else
2278   movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes));
2279   b(done, eq);
2280 #endif
2281 
2282   // crosses regions, storing NULL?
2283 
2284   cbz(new_val, done);
2285 
2286   // storing region crossing non-NULL, is card already dirty?
2287   const Register card_addr = tmp1;
2288   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
2289 
2290   mov_address(tmp2, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference);
2291   add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift));
2292 
2293   ldrb(tmp2, Address(card_addr));
2294   cmp(tmp2, (int)G1CardTable::g1_young_card_val());
2295   b(done, eq);
2296 
2297   membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
2298 
2299   assert(CardTable::dirty_card_val() == 0, "adjust this code");
2300   ldrb(tmp2, Address(card_addr));
2301   cbz(tmp2, done);
2302 
2303   // storing a region crossing, non-NULL oop, card is clean.
2304   // dirty card and log.
2305 
2306   strb(zero_register(tmp2), Address(card_addr));
2307 
2308   ldr(tmp2, queue_index);
2309   ldr(tmp3, buffer);
2310 
2311   subs(tmp2, tmp2, wordSize);
2312   b(runtime, lt); // go to runtime if now negative
2313 
2314   str(tmp2, queue_index);
2315 
2316   str(card_addr, Address(tmp3, tmp2));
2317   b(done);
2318 
2319   bind(runtime);
2320 
2321   if (card_addr != R0) {
2322     mov(R0, card_addr);
2323   }
2324   mov(R1, Rthread);
2325   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1);
2326 
2327   bind(done);
2328 }
2329 
2330 #endif // INCLUDE_G1GC
2331 
2332 //////////////////////////////////////////////////////////////////////////////////
2333 
2334 #ifdef AARCH64
2335 
2336 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
2337   switch (size_in_bytes) {
2338     case  8: ldr(dst, src); break;
2339     case  4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
2340     case  2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
2341     case  1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
2342     default: ShouldNotReachHere();
2343   }
2344 }
2345 
2346 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2347   switch (size_in_bytes) {
2348     case  8: str(src, dst);    break;
2349     case  4: str_32(src, dst); break;
2350     case  2: strh(src, dst);   break;
2351     case  1: strb(src, dst);   break;
2352     default: ShouldNotReachHere();
2353   }
2354 }
2355 
2356 #else
2357 
2358 void MacroAssembler::load_sized_value(Register dst, Address src,
2359                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
2360   switch (size_in_bytes) {
2361     case  4: ldr(dst, src, cond); break;
2362     case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
2363     case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
2364     default: ShouldNotReachHere();
2365   }
2366 }
2367 
2368 
2369 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
2370   switch (size_in_bytes) {
2371     case  4: str(src, dst, cond); break;
2372     case  2: strh(src, dst, cond);   break;
2373     case  1: strb(src, dst, cond);   break;
2374     default: ShouldNotReachHere();
2375   }
2376 }
2377 #endif // AARCH64
2378 
2379 // Look up the method for a megamorphic invokeinterface call.
2380 // The target method is determined by <Rinterf, Rindex>.
2381 // The receiver klass is in Rklass.
2382 // On success, the result will be in method_result, and execution falls through.
2383 // On failure, execution transfers to the given label.
2384 void MacroAssembler::lookup_interface_method(Register Rklass,
2385                                              Register Rintf,
2386                                              RegisterOrConstant itable_index,
2387                                              Register method_result,
2388                                              Register Rscan,
2389                                              Register Rtmp,
2390                                              Label& L_no_such_interface) {
2391 
2392   assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
2393 
2394   const int entry_size = itableOffsetEntry::size() * HeapWordSize;
2395   assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience");
2396 
2397   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2398   const int base = in_bytes(Klass::vtable_start_offset());
2399   const int scale = exact_log2(vtableEntry::size_in_bytes());
2400   ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
2401   add(Rscan, Rklass, base);
2402   add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
2403 
2404   // Search through the itable for an interface equal to incoming Rintf
2405   // itable looks like [intface][offset][intface][offset][intface][offset]
2406 
2407   Label loop;
2408   bind(loop);
2409   ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
2410 #ifdef AARCH64
2411   Label found;
2412   cmp(Rtmp, Rintf);
2413   b(found, eq);
2414   cbnz(Rtmp, loop);
2415 #else
2416   cmp(Rtmp, Rintf);  // set ZF and CF if interface is found
2417   cmn(Rtmp, 0, ne);  // check if tmp == 0 and clear CF if it is
2418   b(loop, ne);
2419 #endif // AARCH64
2420 
2421 #ifdef AARCH64
2422   b(L_no_such_interface);
2423   bind(found);
2424 #else
2425   // CF == 0 means we reached the end of itable without finding icklass
2426   b(L_no_such_interface, cc);
2427 #endif // !AARCH64
2428 
2429   if (method_result != noreg) {
2430     // Interface found at previous position of Rscan, now load the method
2431     ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size));
2432     if (itable_index.is_register()) {
2433       add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
2434       assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
2435       assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
2436       ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
2437     } else {
2438       int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
2439                           itableMethodEntry::method_offset_in_bytes();
2440       add_slow(method_result, Rklass, method_offset);
2441       ldr(method_result, Address(method_result, Rtmp));
2442     }
2443   }
2444 }
2445 
2446 #ifdef COMPILER2
2447 // TODO: 8 bytes at a time? pre-fetch?
2448 // Compare char[] arrays aligned to 4 bytes.
2449 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
2450                                         Register limit, Register result,
2451                                       Register chr1, Register chr2, Label& Ldone) {
2452   Label Lvector, Lloop;
2453 
2454   // Note: limit contains number of bytes (2*char_elements) != 0.
2455   tst(limit, 0x2); // trailing character ?
2456   b(Lvector, eq);
2457 
2458   // compare the trailing char
2459   sub(limit, limit, sizeof(jchar));
2460   ldrh(chr1, Address(ary1, limit));
2461   ldrh(chr2, Address(ary2, limit));
2462   cmp(chr1, chr2);
2463   mov(result, 0, ne);     // not equal
2464   b(Ldone, ne);
2465 
2466   // only one char ?
2467   tst(limit, limit);
2468   mov(result, 1, eq);
2469   b(Ldone, eq);
2470 
2471   // word by word compare, dont't need alignment check
2472   bind(Lvector);
2473 
2474   // Shift ary1 and ary2 to the end of the arrays, negate limit
2475   add(ary1, limit, ary1);
2476   add(ary2, limit, ary2);
2477   neg(limit, limit);
2478 
2479   bind(Lloop);
2480   ldr_u32(chr1, Address(ary1, limit));
2481   ldr_u32(chr2, Address(ary2, limit));
2482   cmp_32(chr1, chr2);
2483   mov(result, 0, ne);     // not equal
2484   b(Ldone, ne);
2485   adds(limit, limit, 2*sizeof(jchar));
2486   b(Lloop, ne);
2487 
2488   // Caller should set it:
2489   // mov(result_reg, 1);  //equal
2490 }
2491 #endif
2492 
2493 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
2494   mov_slow(tmpreg1, counter_addr);
2495   ldr_s32(tmpreg2, tmpreg1);
2496   add_32(tmpreg2, tmpreg2, 1);
2497   str_32(tmpreg2, tmpreg1);
2498 }
2499 
2500 void MacroAssembler::floating_cmp(Register dst) {
2501 #ifdef AARCH64
2502   NOT_TESTED();
2503   cset(dst, gt);            // 1 if '>', else 0
2504   csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
2505 #else
2506   vmrs(dst, FPSCR);
2507   orr(dst, dst, 0x08000000);
2508   eor(dst, dst, AsmOperand(dst, lsl, 3));
2509   mov(dst, AsmOperand(dst, asr, 30));
2510 #endif
2511 }
2512 
2513 void MacroAssembler::restore_default_fp_mode() {
2514 #ifdef AARCH64
2515   msr(SysReg_FPCR, ZR);
2516 #else
2517 #ifndef __SOFTFP__
2518   // Round to Near mode, IEEE compatible, masked exceptions
2519   mov(Rtemp, 0);
2520   vmsr(FPSCR, Rtemp);
2521 #endif // !__SOFTFP__
2522 #endif // AARCH64
2523 }
2524 
2525 #ifndef AARCH64
2526 // 24-bit word range == 26-bit byte range
2527 bool check26(int offset) {
2528   // this could be simplified, but it mimics encoding and decoding
2529   // an actual branch insrtuction
2530   int off1 = offset << 6 >> 8;
2531   int encoded = off1 & ((1<<24)-1);
2532   int decoded = encoded << 8 >> 6;
2533   return offset == decoded;
2534 }
2535 #endif // !AARCH64
2536 
2537 // Perform some slight adjustments so the default 32MB code cache
2538 // is fully reachable.
2539 static inline address first_cache_address() {
2540   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
2541 }
2542 static inline address last_cache_address() {
2543   return CodeCache::high_bound() - Assembler::InstructionSize;
2544 }
2545 
2546 #ifdef AARCH64
2547 // Can we reach target using ADRP?
2548 bool MacroAssembler::page_reachable_from_cache(address target) {
2549   intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
2550   intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
2551   intptr_t addr = (intptr_t)target & ~0xfff;
2552 
2553   intptr_t loffset = addr - cl;
2554   intptr_t hoffset = addr - ch;
2555   return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
2556 }
2557 #endif
2558 
2559 // Can we reach target using unconditional branch or call from anywhere
2560 // in the code cache (because code can be relocated)?
2561 bool MacroAssembler::_reachable_from_cache(address target) {
2562 #ifdef __thumb__
2563   if ((1 & (intptr_t)target) != 0) {
2564     // Return false to avoid 'b' if we need switching to THUMB mode.
2565     return false;
2566   }
2567 #endif
2568 
2569   address cl = first_cache_address();
2570   address ch = last_cache_address();
2571 
2572   if (ForceUnreachable) {
2573     // Only addresses from CodeCache can be treated as reachable.
2574     if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
2575       return false;
2576     }
2577   }
2578 
2579   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
2580   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
2581 
2582 #ifdef AARCH64
2583   return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
2584 #else
2585   return check26(loffset - 8) && check26(hoffset - 8);
2586 #endif
2587 }
2588 
2589 bool MacroAssembler::reachable_from_cache(address target) {
2590   assert(CodeCache::contains(pc()), "not supported");
2591   return _reachable_from_cache(target);
2592 }
2593 
2594 // Can we reach the entire code cache from anywhere else in the code cache?
2595 bool MacroAssembler::_cache_fully_reachable() {
2596   address cl = first_cache_address();
2597   address ch = last_cache_address();
2598   return _reachable_from_cache(cl) && _reachable_from_cache(ch);
2599 }
2600 
2601 bool MacroAssembler::cache_fully_reachable() {
2602   assert(CodeCache::contains(pc()), "not supported");
2603   return _cache_fully_reachable();
2604 }
2605 
2606 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2607   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2608   if (reachable_from_cache(target)) {
2609     relocate(rtype);
2610     b(target NOT_AARCH64_ARG(cond));
2611     return;
2612   }
2613 
2614   // Note: relocate is not needed for the code below,
2615   // encoding targets in absolute format.
2616   if (ignore_non_patchable_relocations()) {
2617     rtype = relocInfo::none;
2618   }
2619 
2620 #ifdef AARCH64
2621   assert (scratch != noreg, "should be specified");
2622   InlinedAddress address_literal(target, rtype);
2623   ldr_literal(scratch, address_literal);
2624   br(scratch);
2625   int off = offset();
2626   bind_literal(address_literal);
2627 #ifdef COMPILER2
2628   if (offset() - off == wordSize) {
2629     // no padding, so insert nop for worst-case sizing
2630     nop();
2631   }
2632 #endif
2633 #else
2634   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
2635     // Note: this version cannot be (atomically) patched
2636     mov_slow(scratch, (intptr_t)target, cond);
2637     bx(scratch, cond);
2638   } else {
2639     Label skip;
2640     InlinedAddress address_literal(target);
2641     if (cond != al) {
2642       b(skip, inverse(cond));
2643     }
2644     relocate(rtype);
2645     ldr_literal(PC, address_literal);
2646     bind_literal(address_literal);
2647     bind(skip);
2648   }
2649 #endif // AARCH64
2650 }
2651 
2652 // Similar to jump except that:
2653 // - near calls are valid only if any destination in the cache is near
2654 // - no movt/movw (not atomically patchable)
2655 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2656   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2657   if (cache_fully_reachable()) {
2658     // Note: this assumes that all possible targets (the initial one
2659     // and the addressed patched to) are all in the code cache.
2660     assert(CodeCache::contains(target), "target might be too far");
2661     relocate(rtype);
2662     b(target NOT_AARCH64_ARG(cond));
2663     return;
2664   }
2665 
2666   // Discard the relocation information if not needed for CacheCompiledCode
2667   // since the next encodings are all in absolute format.
2668   if (ignore_non_patchable_relocations()) {
2669     rtype = relocInfo::none;
2670   }
2671 
2672 #ifdef AARCH64
2673   assert (scratch != noreg, "should be specified");
2674   InlinedAddress address_literal(target);
2675   relocate(rtype);
2676   ldr_literal(scratch, address_literal);
2677   br(scratch);
2678   int off = offset();
2679   bind_literal(address_literal);
2680 #ifdef COMPILER2
2681   if (offset() - off == wordSize) {
2682     // no padding, so insert nop for worst-case sizing
2683     nop();
2684   }
2685 #endif
2686 #else
2687   {
2688     Label skip;
2689     InlinedAddress address_literal(target);
2690     if (cond != al) {
2691       b(skip, inverse(cond));
2692     }
2693     relocate(rtype);
2694     ldr_literal(PC, address_literal);
2695     bind_literal(address_literal);
2696     bind(skip);
2697   }
2698 #endif // AARCH64
2699 }
2700 
2701 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
2702   Register scratch = LR;
2703   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
2704   if (reachable_from_cache(target)) {
2705     relocate(rspec);
2706     bl(target NOT_AARCH64_ARG(cond));
2707     return;
2708   }
2709 
2710   // Note: relocate is not needed for the code below,
2711   // encoding targets in absolute format.
2712   if (ignore_non_patchable_relocations()) {
2713     // This assumes the information was needed only for relocating the code.
2714     rspec = RelocationHolder::none;
2715   }
2716 
2717 #ifndef AARCH64
2718   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
2719     // Note: this version cannot be (atomically) patched
2720     mov_slow(scratch, (intptr_t)target, cond);
2721     blx(scratch, cond);
2722     return;
2723   }
2724 #endif
2725 
2726   {
2727     Label ret_addr;
2728 #ifndef AARCH64
2729     if (cond != al) {
2730       b(ret_addr, inverse(cond));
2731     }
2732 #endif
2733 
2734 
2735 #ifdef AARCH64
2736     // TODO-AARCH64: make more optimal implementation
2737     // [ Keep in sync with MacroAssembler::call_size ]
2738     assert(rspec.type() == relocInfo::none, "call reloc not implemented");
2739     mov_slow(scratch, target);
2740     blr(scratch);
2741 #else
2742     InlinedAddress address_literal(target);
2743     relocate(rspec);
2744     adr(LR, ret_addr);
2745     ldr_literal(PC, address_literal);
2746 
2747     bind_literal(address_literal);
2748     bind(ret_addr);
2749 #endif
2750   }
2751 }
2752 
2753 #if defined(AARCH64) && defined(COMPILER2)
2754 int MacroAssembler::call_size(address target, bool far, bool patchable) {
2755   // FIXME: mov_slow is variable-length
2756   if (!far) return 1; // bl
2757   if (patchable) return 2;  // ldr; blr
2758   return instr_count_for_mov_slow((intptr_t)target) + 1;
2759 }
2760 #endif
2761 
2762 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
2763   assert(rspec.type() == relocInfo::static_call_type ||
2764          rspec.type() == relocInfo::none ||
2765          rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
2766 
2767   // Always generate the relocation information, needed for patching
2768   relocate(rspec); // used by NativeCall::is_call_before()
2769   if (cache_fully_reachable()) {
2770     // Note: this assumes that all possible targets (the initial one
2771     // and the addresses patched to) are all in the code cache.
2772     assert(CodeCache::contains(target), "target might be too far");
2773     bl(target);
2774   } else {
2775 #if defined(AARCH64) && defined(COMPILER2)
2776     if (c2) {
2777       // return address needs to match call_size().
2778       // no need to trash Rtemp
2779       int off = offset();
2780       Label skip_literal;
2781       InlinedAddress address_literal(target);
2782       ldr_literal(LR, address_literal);
2783       blr(LR);
2784       int ret_addr_offset = offset();
2785       assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
2786       b(skip_literal);
2787       int off2 = offset();
2788       bind_literal(address_literal);
2789       if (offset() - off2 == wordSize) {
2790         // no padding, so insert nop for worst-case sizing
2791         nop();
2792       }
2793       bind(skip_literal);
2794       return ret_addr_offset;
2795     }
2796 #endif
2797     Label ret_addr;
2798     InlinedAddress address_literal(target);
2799 #ifdef AARCH64
2800     ldr_literal(Rtemp, address_literal);
2801     adr(LR, ret_addr);
2802     br(Rtemp);
2803 #else
2804     adr(LR, ret_addr);
2805     ldr_literal(PC, address_literal);
2806 #endif
2807     bind_literal(address_literal);
2808     bind(ret_addr);
2809   }
2810   return offset();
2811 }
2812 
2813 // ((OopHandle)result).resolve();
2814 void MacroAssembler::resolve_oop_handle(Register result) {
2815   // OopHandle::resolve is an indirection.
2816   ldr(result, Address(result, 0));
2817 }
2818 
2819 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
2820   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2821   ldr(tmp, Address(method, Method::const_offset()));
2822   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
2823   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
2824   ldr(mirror, Address(tmp, mirror_offset));
2825   resolve_oop_handle(mirror);
2826 }
2827 
2828 
2829 ///////////////////////////////////////////////////////////////////////////////
2830 
2831 // Compressed pointers
2832 
2833 #ifdef AARCH64
2834 
2835 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
2836   if (UseCompressedClassPointers) {
2837     ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2838     decode_klass_not_null(dst_klass);
2839   } else {
2840     ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2841   }
2842 }
2843 
2844 #else
2845 
2846 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
2847   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
2848 }
2849 
2850 #endif // AARCH64
2851 
2852 // Blows src_klass.
2853 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
2854 #ifdef AARCH64
2855   if (UseCompressedClassPointers) {
2856     assert(src_klass != dst_oop, "not enough registers");
2857     encode_klass_not_null(src_klass);
2858     str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2859     return;
2860   }
2861 #endif // AARCH64
2862   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2863 }
2864 
2865 #ifdef AARCH64
2866 
2867 void MacroAssembler::store_klass_gap(Register dst) {
2868   if (UseCompressedClassPointers) {
2869     str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2870   }
2871 }
2872 
2873 #endif // AARCH64
2874 
2875 
2876 void MacroAssembler::load_heap_oop(Register dst, Address src) {
2877 #ifdef AARCH64
2878   if (UseCompressedOops) {
2879     ldr_w(dst, src);
2880     decode_heap_oop(dst);
2881     return;
2882   }
2883 #endif // AARCH64
2884   ldr(dst, src);
2885 }
2886 
2887 // Blows src and flags.
2888 void MacroAssembler::store_heap_oop(Register src, Address dst) {
2889 #ifdef AARCH64
2890   if (UseCompressedOops) {
2891     assert(!dst.uses(src), "not enough registers");
2892     encode_heap_oop(src);
2893     str_w(src, dst);
2894     return;
2895   }
2896 #endif // AARCH64
2897   str(src, dst);
2898 }
2899 
2900 void MacroAssembler::store_heap_oop_null(Register src, Address dst) {
2901 #ifdef AARCH64
2902   if (UseCompressedOops) {
2903     str_w(src, dst);
2904     return;
2905   }
2906 #endif // AARCH64
2907   str(src, dst);
2908 }
2909 
2910 
2911 #ifdef AARCH64
2912 
2913 // Algorithm must match oop.inline.hpp encode_heap_oop.
2914 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
2915   // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
2916   // Update it at modifications.
2917   assert (UseCompressedOops, "must be compressed");
2918   assert (Universe::heap() != NULL, "java heap should be initialized");
2919 #ifdef ASSERT
2920   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2921 #endif
2922   verify_oop(src);
2923   if (Universe::narrow_oop_base() == NULL) {
2924     if (Universe::narrow_oop_shift() != 0) {
2925       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2926       _lsr(dst, src, Universe::narrow_oop_shift());
2927     } else if (dst != src) {
2928       mov(dst, src);
2929     }
2930   } else {
2931     tst(src, src);
2932     csel(dst, Rheap_base, src, eq);
2933     sub(dst, dst, Rheap_base);
2934     if (Universe::narrow_oop_shift() != 0) {
2935       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2936       _lsr(dst, dst, Universe::narrow_oop_shift());
2937     }
2938   }
2939 }
2940 
2941 // Same algorithm as oop.inline.hpp decode_heap_oop.
2942 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
2943 #ifdef ASSERT
2944   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2945 #endif
2946   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2947   if (Universe::narrow_oop_base() != NULL) {
2948     tst(src, src);
2949     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2950     csel(dst, dst, ZR, ne);
2951   } else {
2952     _lsl(dst, src, Universe::narrow_oop_shift());
2953   }
2954   verify_oop(dst);
2955 }
2956 
2957 #ifdef COMPILER2
2958 // Algorithm must match oop.inline.hpp encode_heap_oop.
2959 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule
2960 // must be changed.
2961 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
2962   assert (UseCompressedOops, "must be compressed");
2963   assert (Universe::heap() != NULL, "java heap should be initialized");
2964 #ifdef ASSERT
2965   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2966 #endif
2967   verify_oop(src);
2968   if (Universe::narrow_oop_base() == NULL) {
2969     if (Universe::narrow_oop_shift() != 0) {
2970       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2971       _lsr(dst, src, Universe::narrow_oop_shift());
2972     } else if (dst != src) {
2973           mov(dst, src);
2974     }
2975   } else {
2976     sub(dst, src, Rheap_base);
2977     if (Universe::narrow_oop_shift() != 0) {
2978       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2979       _lsr(dst, dst, Universe::narrow_oop_shift());
2980     }
2981   }
2982 }
2983 
2984 // Same algorithm as oops.inline.hpp decode_heap_oop.
2985 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule
2986 // must be changed.
2987 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
2988 #ifdef ASSERT
2989   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2990 #endif
2991   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2992   if (Universe::narrow_oop_base() != NULL) {
2993     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2994   } else {
2995     _lsl(dst, src, Universe::narrow_oop_shift());
2996   }
2997   verify_oop(dst);
2998 }
2999 
3000 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
3001   assert(UseCompressedClassPointers, "should only be used for compressed header");
3002   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
3003   int klass_index = oop_recorder()->find_index(k);
3004   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
3005 
3006   // Relocation with special format (see relocInfo_arm.hpp).
3007   relocate(rspec);
3008   narrowKlass encoded_k = Klass::encode_klass(k);
3009   movz(dst, encoded_k & 0xffff, 0);
3010   movk(dst, (encoded_k >> 16) & 0xffff, 16);
3011 }
3012 
3013 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
3014   assert(UseCompressedOops, "should only be used for compressed header");
3015   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
3016   int oop_index = oop_recorder()->find_index(obj);
3017   RelocationHolder rspec = oop_Relocation::spec(oop_index);
3018 
3019   relocate(rspec);
3020   movz(dst, 0xffff, 0);
3021   movk(dst, 0xffff, 16);
3022 }
3023 
3024 #endif // COMPILER2
3025 // Must preserve condition codes, or C2 encodeKlass_not_null rule
3026 // must be changed.
3027 void MacroAssembler::encode_klass_not_null(Register r) {
3028   if (Universe::narrow_klass_base() != NULL) {
3029     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
3030     assert(r != Rheap_base, "Encoding a klass in Rheap_base");
3031     mov_slow(Rheap_base, Universe::narrow_klass_base());
3032     sub(r, r, Rheap_base);
3033   }
3034   if (Universe::narrow_klass_shift() != 0) {
3035     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3036     _lsr(r, r, Universe::narrow_klass_shift());
3037   }
3038   if (Universe::narrow_klass_base() != NULL) {
3039     reinit_heapbase();
3040   }
3041 }
3042 
3043 // Must preserve condition codes, or C2 encodeKlass_not_null rule
3044 // must be changed.
3045 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3046   if (dst == src) {
3047     encode_klass_not_null(src);
3048     return;
3049   }
3050   if (Universe::narrow_klass_base() != NULL) {
3051     mov_slow(dst, (int64_t)Universe::narrow_klass_base());
3052     sub(dst, src, dst);
3053     if (Universe::narrow_klass_shift() != 0) {
3054       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3055       _lsr(dst, dst, Universe::narrow_klass_shift());
3056     }
3057   } else {
3058     if (Universe::narrow_klass_shift() != 0) {
3059       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3060       _lsr(dst, src, Universe::narrow_klass_shift());
3061     } else {
3062       mov(dst, src);
3063     }
3064   }
3065 }
3066 
3067 // Function instr_count_for_decode_klass_not_null() counts the instructions
3068 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3069 // when (Universe::heap() != NULL).  Hence, if the instructions they
3070 // generate change, then this method needs to be updated.
3071 int MacroAssembler::instr_count_for_decode_klass_not_null() {
3072   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3073   assert(Universe::heap() != NULL, "java heap should be initialized");
3074   if (Universe::narrow_klass_base() != NULL) {
3075     return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
3076       1 +                                                                 // add
3077       instr_count_for_mov_slow(Universe::narrow_ptrs_base());   // reinit_heapbase() = mov_slow
3078   } else {
3079     if (Universe::narrow_klass_shift() != 0) {
3080       return 1;
3081     }
3082   }
3083   return 0;
3084 }
3085 
3086 // Must preserve condition codes, or C2 decodeKlass_not_null rule
3087 // must be changed.
3088 void MacroAssembler::decode_klass_not_null(Register r) {
3089   int off = offset();
3090   assert(UseCompressedClassPointers, "should only be used for compressed headers");
3091   assert(Universe::heap() != NULL, "java heap should be initialized");
3092   assert(r != Rheap_base, "Decoding a klass in Rheap_base");
3093   // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
3094   // Also do not verify_oop as this is called by verify_oop.
3095   if (Universe::narrow_klass_base() != NULL) {
3096     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
3097     mov_slow(Rheap_base, Universe::narrow_klass_base());
3098     add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
3099     reinit_heapbase();
3100   } else {
3101     if (Universe::narrow_klass_shift() != 0) {
3102       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3103       _lsl(r, r, Universe::narrow_klass_shift());
3104     }
3105   }
3106   assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
3107 }
3108 
3109 // Must preserve condition codes, or C2 decodeKlass_not_null rule
3110 // must be changed.
3111 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3112   if (src == dst) {
3113     decode_klass_not_null(src);
3114     return;
3115   }
3116 
3117   assert(UseCompressedClassPointers, "should only be used for compressed headers");
3118   assert(Universe::heap() != NULL, "java heap should be initialized");
3119   assert(src != Rheap_base, "Decoding a klass in Rheap_base");
3120   assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
3121   // Also do not verify_oop as this is called by verify_oop.
3122   if (Universe::narrow_klass_base() != NULL) {
3123     mov_slow(dst, Universe::narrow_klass_base());
3124     add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
3125   } else {
3126     _lsl(dst, src, Universe::narrow_klass_shift());
3127   }
3128 }
3129 
3130 
3131 void MacroAssembler::reinit_heapbase() {
3132   if (UseCompressedOops || UseCompressedClassPointers) {
3133     if (Universe::heap() != NULL) {
3134       mov_slow(Rheap_base, Universe::narrow_ptrs_base());
3135     } else {
3136       ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
3137     }
3138   }
3139 }
3140 
3141 #ifdef ASSERT
3142 void MacroAssembler::verify_heapbase(const char* msg) {
3143   // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
3144   // Update it at modifications.
3145   assert (UseCompressedOops, "should be compressed");
3146   assert (Universe::heap() != NULL, "java heap should be initialized");
3147   if (CheckCompressedOops) {
3148     Label ok;
3149     str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
3150     raw_push(Rtemp, ZR);
3151     mrs(Rtemp, Assembler::SysReg_NZCV);
3152     str(Rtemp, Address(SP, 1 * wordSize));
3153     mov_slow(Rtemp, Universe::narrow_ptrs_base());
3154     cmp(Rheap_base, Rtemp);
3155     b(ok, eq);
3156     stop(msg);
3157     bind(ok);
3158     ldr(Rtemp, Address(SP, 1 * wordSize));
3159     msr(Assembler::SysReg_NZCV, Rtemp);
3160     raw_pop(Rtemp, ZR);
3161     str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
3162   }
3163 }
3164 #endif // ASSERT
3165 
3166 #endif // AARCH64
3167 
3168 #ifdef COMPILER2
3169 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
3170 {
3171   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3172 
3173   Register Rmark      = Rscratch2;
3174 
3175   assert(Roop != Rscratch, "");
3176   assert(Roop != Rmark, "");
3177   assert(Rbox != Rscratch, "");
3178   assert(Rbox != Rmark, "");
3179 
3180   Label fast_lock, done;
3181 
3182   if (UseBiasedLocking && !UseOptoBiasInlining) {
3183     Label failed;
3184 #ifdef AARCH64
3185     biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed);
3186 #else
3187     biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
3188 #endif
3189     bind(failed);
3190   }
3191 
3192   ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
3193   tst(Rmark, markOopDesc::unlocked_value);
3194   b(fast_lock, ne);
3195 
3196   // Check for recursive lock
3197   // See comments in InterpreterMacroAssembler::lock_object for
3198   // explanations on the fast recursive locking check.
3199 #ifdef AARCH64
3200   intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
3201   Assembler::LogicalImmediate imm(mask, false);
3202   mov(Rscratch, SP);
3203   sub(Rscratch, Rmark, Rscratch);
3204   ands(Rscratch, Rscratch, imm);
3205   b(done, ne); // exit with failure
3206   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero
3207   b(done);
3208 
3209 #else
3210   // -1- test low 2 bits
3211   movs(Rscratch, AsmOperand(Rmark, lsl, 30));
3212   // -2- test (hdr - SP) if the low two bits are 0
3213   sub(Rscratch, Rmark, SP, eq);
3214   movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
3215   // If still 'eq' then recursive locking OK
3216   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero
3217   b(done);
3218 #endif
3219 
3220   bind(fast_lock);
3221   str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3222 
3223   bool allow_fallthrough_on_failure = true;
3224   bool one_shot = true;
3225   cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3226 
3227   bind(done);
3228 
3229 }
3230 
3231 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2  AARCH64_ONLY_ARG(Register Rscratch3))
3232 {
3233   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3234 
3235   Register Rmark      = Rscratch2;
3236 
3237   assert(Roop != Rscratch, "");
3238   assert(Roop != Rmark, "");
3239   assert(Rbox != Rscratch, "");
3240   assert(Rbox != Rmark, "");
3241 
3242   Label done;
3243 
3244   if (UseBiasedLocking && !UseOptoBiasInlining) {
3245     biased_locking_exit(Roop, Rscratch, done);
3246   }
3247 
3248   ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3249   // If hdr is NULL, we've got recursive locking and there's nothing more to do
3250   cmp(Rmark, 0);
3251   b(done, eq);
3252 
3253   // Restore the object header
3254   bool allow_fallthrough_on_failure = true;
3255   bool one_shot = true;
3256   cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3257 
3258   bind(done);
3259 
3260 }
3261 #endif // COMPILER2