1 /*
   2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "ci/ciEnv.hpp"
  30 #include "code/nativeInst.hpp"
  31 #include "compiler/disassembler.hpp"
  32 #include "gc/shared/cardTable.hpp"
  33 #include "gc/shared/cardTableModRefBS.hpp"
  34 #include "gc/shared/collectedHeap.inline.hpp"
  35 #include "interpreter/interpreter.hpp"
  36 #include "memory/resourceArea.hpp"
  37 #include "oops/klass.inline.hpp"
  38 #include "prims/methodHandles.hpp"
  39 #include "runtime/biasedLocking.hpp"
  40 #include "runtime/interfaceSupport.hpp"
  41 #include "runtime/objectMonitor.hpp"
  42 #include "runtime/os.hpp"
  43 #include "runtime/sharedRuntime.hpp"
  44 #include "runtime/stubRoutines.hpp"
  45 #include "utilities/macros.hpp"
  46 #if INCLUDE_ALL_GCS
  47 #include "gc/g1/g1BarrierSet.hpp"
  48 #include "gc/g1/g1CardTable.hpp"
  49 #include "gc/g1/g1CollectedHeap.inline.hpp"
  50 #include "gc/g1/heapRegion.hpp"
  51 #endif
  52 
  53 // Implementation of AddressLiteral
  54 
  55 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
  56   switch (rtype) {
  57   case relocInfo::oop_type:
  58     // Oops are a special case. Normally they would be their own section
  59     // but in cases like icBuffer they are literals in the code stream that
  60     // we don't have a section for. We use none so that we get a literal address
  61     // which is always patchable.
  62     break;
  63   case relocInfo::external_word_type:
  64     _rspec = external_word_Relocation::spec(_target);
  65     break;
  66   case relocInfo::internal_word_type:
  67     _rspec = internal_word_Relocation::spec(_target);
  68     break;
  69   case relocInfo::opt_virtual_call_type:
  70     _rspec = opt_virtual_call_Relocation::spec();
  71     break;
  72   case relocInfo::static_call_type:
  73     _rspec = static_call_Relocation::spec();
  74     break;
  75   case relocInfo::runtime_call_type:
  76     _rspec = runtime_call_Relocation::spec();
  77     break;
  78   case relocInfo::poll_type:
  79   case relocInfo::poll_return_type:
  80     _rspec = Relocation::spec_simple(rtype);
  81     break;
  82   case relocInfo::none:
  83     break;
  84   default:
  85     ShouldNotReachHere();
  86     break;
  87   }
  88 }
  89 
  90 // Initially added to the Assembler interface as a pure virtual:
  91 //   RegisterConstant delayed_value(..)
  92 // for:
  93 //   6812678 macro assembler needs delayed binding of a few constants (for 6655638)
  94 // this was subsequently modified to its present name and return type
  95 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  96                                                       Register tmp,
  97                                                       int offset) {
  98   ShouldNotReachHere();
  99   return RegisterOrConstant(-1);
 100 }
 101 
 102 
 103 #ifdef AARCH64
 104 // Note: ARM32 version is OS dependent
 105 void MacroAssembler::breakpoint(AsmCondition cond) {
 106   if (cond == al) {
 107     brk();
 108   } else {
 109     Label L;
 110     b(L, inverse(cond));
 111     brk();
 112     bind(L);
 113   }
 114 }
 115 #endif // AARCH64
 116 
 117 
 118 // virtual method calling
 119 void MacroAssembler::lookup_virtual_method(Register recv_klass,
 120                                            Register vtable_index,
 121                                            Register method_result) {
 122   const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
 123   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
 124   add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
 125   ldr(method_result, Address(recv_klass, base_offset));
 126 }
 127 
 128 
 129 // Simplified, combined version, good for typical uses.
 130 // Falls through on failure.
 131 void MacroAssembler::check_klass_subtype(Register sub_klass,
 132                                          Register super_klass,
 133                                          Register temp_reg,
 134                                          Register temp_reg2,
 135                                          Register temp_reg3,
 136                                          Label& L_success) {
 137   Label L_failure;
 138   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
 139   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
 140   bind(L_failure);
 141 };
 142 
 143 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
 144                                                    Register super_klass,
 145                                                    Register temp_reg,
 146                                                    Register temp_reg2,
 147                                                    Label* L_success,
 148                                                    Label* L_failure,
 149                                                    Label* L_slow_path) {
 150 
 151   assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
 152   const Register super_check_offset = temp_reg2;
 153 
 154   Label L_fallthrough;
 155   int label_nulls = 0;
 156   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 157   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 158   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
 159   assert(label_nulls <= 1, "at most one NULL in the batch");
 160 
 161   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 162   int sco_offset = in_bytes(Klass::super_check_offset_offset());
 163   Address super_check_offset_addr(super_klass, sco_offset);
 164 
 165   // If the pointers are equal, we are done (e.g., String[] elements).
 166   // This self-check enables sharing of secondary supertype arrays among
 167   // non-primary types such as array-of-interface.  Otherwise, each such
 168   // type would need its own customized SSA.
 169   // We move this check to the front of the fast path because many
 170   // type checks are in fact trivially successful in this manner,
 171   // so we get a nicely predicted branch right at the start of the check.
 172   cmp(sub_klass, super_klass);
 173   b(*L_success, eq);
 174 
 175   // Check the supertype display:
 176   ldr_u32(super_check_offset, super_check_offset_addr);
 177 
 178   Address super_check_addr(sub_klass, super_check_offset);
 179   ldr(temp_reg, super_check_addr);
 180   cmp(super_klass, temp_reg); // load displayed supertype
 181 
 182   // This check has worked decisively for primary supers.
 183   // Secondary supers are sought in the super_cache ('super_cache_addr').
 184   // (Secondary supers are interfaces and very deeply nested subtypes.)
 185   // This works in the same check above because of a tricky aliasing
 186   // between the super_cache and the primary super display elements.
 187   // (The 'super_check_addr' can address either, as the case requires.)
 188   // Note that the cache is updated below if it does not help us find
 189   // what we need immediately.
 190   // So if it was a primary super, we can just fail immediately.
 191   // Otherwise, it's the slow path for us (no success at this point).
 192 
 193   b(*L_success, eq);
 194   cmp_32(super_check_offset, sc_offset);
 195   if (L_failure == &L_fallthrough) {
 196     b(*L_slow_path, eq);
 197   } else {
 198     b(*L_failure, ne);
 199     if (L_slow_path != &L_fallthrough) {
 200       b(*L_slow_path);
 201     }
 202   }
 203 
 204   bind(L_fallthrough);
 205 }
 206 
 207 
 208 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
 209                                                    Register super_klass,
 210                                                    Register temp_reg,
 211                                                    Register temp2_reg,
 212                                                    Register temp3_reg,
 213                                                    Label* L_success,
 214                                                    Label* L_failure,
 215                                                    bool set_cond_codes) {
 216 #ifdef AARCH64
 217   NOT_IMPLEMENTED();
 218 #else
 219   // Note: if used by code that expects a register to be 0 on success,
 220   // this register must be temp_reg and set_cond_codes must be true
 221 
 222   Register saved_reg = noreg;
 223 
 224   // get additional tmp registers
 225   if (temp3_reg == noreg) {
 226     saved_reg = temp3_reg = LR;
 227     push(saved_reg);
 228   }
 229 
 230   assert(temp2_reg != noreg, "need all the temporary registers");
 231   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
 232 
 233   Register cmp_temp = temp_reg;
 234   Register scan_temp = temp3_reg;
 235   Register count_temp = temp2_reg;
 236 
 237   Label L_fallthrough;
 238   int label_nulls = 0;
 239   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 240   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 241   assert(label_nulls <= 1, "at most one NULL in the batch");
 242 
 243   // a couple of useful fields in sub_klass:
 244   int ss_offset = in_bytes(Klass::secondary_supers_offset());
 245   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 246   Address secondary_supers_addr(sub_klass, ss_offset);
 247   Address super_cache_addr(     sub_klass, sc_offset);
 248 
 249 #ifndef PRODUCT
 250   inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
 251 #endif
 252 
 253   // We will consult the secondary-super array.
 254   ldr(scan_temp, Address(sub_klass, ss_offset));
 255 
 256   assert(! UseCompressedOops, "search_key must be the compressed super_klass");
 257   // else search_key is the
 258   Register search_key = super_klass;
 259 
 260   // Load the array length.
 261   ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
 262   add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
 263 
 264   add(count_temp, count_temp, 1);
 265 
 266   Label L_loop, L_setnz_and_fail, L_fail;
 267 
 268   // Top of search loop
 269   bind(L_loop);
 270   // Notes:
 271   //  scan_temp starts at the array elements
 272   //  count_temp is 1+size
 273   subs(count_temp, count_temp, 1);
 274   if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
 275     // direct jump to L_failure if failed and no cleanup needed
 276     b(*L_failure, eq); // not found and
 277   } else {
 278     b(L_fail, eq); // not found in the array
 279   }
 280 
 281   // Load next super to check
 282   // In the array of super classes elements are pointer sized.
 283   int element_size = wordSize;
 284   ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
 285 
 286   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
 287   subs(cmp_temp, cmp_temp, search_key);
 288 
 289   // A miss means we are NOT a subtype and need to keep looping
 290   b(L_loop, ne);
 291 
 292   // Falling out the bottom means we found a hit; we ARE a subtype
 293 
 294   // Note: temp_reg/cmp_temp is already 0 and flag Z is set
 295 
 296   // Success.  Cache the super we found and proceed in triumph.
 297   str(super_klass, Address(sub_klass, sc_offset));
 298 
 299   if (saved_reg != noreg) {
 300     // Return success
 301     pop(saved_reg);
 302   }
 303 
 304   b(*L_success);
 305 
 306   bind(L_fail);
 307   // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
 308   if (set_cond_codes) {
 309     movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
 310   }
 311   if (saved_reg != noreg) {
 312     pop(saved_reg);
 313   }
 314   if (L_failure != &L_fallthrough) {
 315     b(*L_failure);
 316   }
 317 
 318   bind(L_fallthrough);
 319 #endif
 320 }
 321 
 322 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
 323 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
 324   assert_different_registers(params_base, params_count);
 325   add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
 326   return Address(tmp, -Interpreter::stackElementSize);
 327 }
 328 
 329 
 330 void MacroAssembler::align(int modulus) {
 331   while (offset() % modulus != 0) {
 332     nop();
 333   }
 334 }
 335 
 336 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
 337                                         Register last_java_fp,
 338                                         bool save_last_java_pc,
 339                                         Register tmp) {
 340   int pc_offset;
 341   if (last_java_fp != noreg) {
 342     // optional
 343     str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
 344     _fp_saved = true;
 345   } else {
 346     _fp_saved = false;
 347   }
 348   if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
 349 #ifdef AARCH64
 350     pc_offset = mov_pc_to(tmp);
 351     str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
 352 #else
 353     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
 354     pc_offset = offset() + VM_Version::stored_pc_adjustment();
 355 #endif
 356     _pc_saved = true;
 357   } else {
 358     _pc_saved = false;
 359     pc_offset = -1;
 360   }
 361   // According to comment in javaFrameAnchorm SP must be saved last, so that other
 362   // entries are valid when SP is set.
 363 
 364   // However, this is probably not a strong constrainst since for instance PC is
 365   // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
 366   // we now write the fields in the expected order but we have not added a StoreStore
 367   // barrier.
 368 
 369   // XXX: if the ordering is really important, PC should always be saved (without forgetting
 370   // to update oop_map offsets) and a StoreStore barrier might be needed.
 371 
 372   if (last_java_sp == noreg) {
 373     last_java_sp = SP; // always saved
 374   }
 375 #ifdef AARCH64
 376   if (last_java_sp == SP) {
 377     mov(tmp, SP);
 378     str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 379   } else {
 380     str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 381   }
 382 #else
 383   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 384 #endif
 385 
 386   return pc_offset; // for oopmaps
 387 }
 388 
 389 void MacroAssembler::reset_last_Java_frame(Register tmp) {
 390   const Register Rzero = zero_register(tmp);
 391   str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
 392   if (_fp_saved) {
 393     str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
 394   }
 395   if (_pc_saved) {
 396     str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
 397   }
 398 }
 399 
 400 
 401 // Implementation of call_VM versions
 402 
 403 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
 404   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 405   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
 406 
 407 #ifndef AARCH64
 408   // Safer to save R9 here since callers may have been written
 409   // assuming R9 survives. This is suboptimal but is not worth
 410   // optimizing for the few platforms where R9 is scratched.
 411   push(RegisterSet(R4) | R9ifScratched);
 412   mov(R4, SP);
 413   bic(SP, SP, StackAlignmentInBytes - 1);
 414 #endif // AARCH64
 415   call(entry_point, relocInfo::runtime_call_type);
 416 #ifndef AARCH64
 417   mov(SP, R4);
 418   pop(RegisterSet(R4) | R9ifScratched);
 419 #endif // AARCH64
 420 }
 421 
 422 
 423 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
 424   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 425   assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
 426 
 427   const Register tmp = Rtemp;
 428   assert_different_registers(oop_result, tmp);
 429 
 430   set_last_Java_frame(SP, FP, true, tmp);
 431 
 432 #ifdef ASSERT
 433   AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
 434 #endif // ASSERT
 435 
 436 #ifndef AARCH64
 437 #if R9_IS_SCRATCHED
 438   // Safer to save R9 here since callers may have been written
 439   // assuming R9 survives. This is suboptimal but is not worth
 440   // optimizing for the few platforms where R9 is scratched.
 441 
 442   // Note: cannot save R9 above the saved SP (some calls expect for
 443   // instance the Java stack top at the saved SP)
 444   // => once saved (with set_last_Java_frame), decrease SP before rounding to
 445   // ensure the slot at SP will be free for R9).
 446   sub(SP, SP, 4);
 447   bic(SP, SP, StackAlignmentInBytes - 1);
 448   str(R9, Address(SP, 0));
 449 #else
 450   bic(SP, SP, StackAlignmentInBytes - 1);
 451 #endif // R9_IS_SCRATCHED
 452 #endif
 453 
 454   mov(R0, Rthread);
 455   call(entry_point, relocInfo::runtime_call_type);
 456 
 457 #ifndef AARCH64
 458 #if R9_IS_SCRATCHED
 459   ldr(R9, Address(SP, 0));
 460 #endif
 461   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
 462 #endif
 463 
 464   reset_last_Java_frame(tmp);
 465 
 466   // C++ interp handles this in the interpreter
 467   check_and_handle_popframe();
 468   check_and_handle_earlyret();
 469 
 470   if (check_exceptions) {
 471     // check for pending exceptions
 472     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
 473 #ifdef AARCH64
 474     Label L;
 475     cbz(tmp, L);
 476     mov_pc_to(Rexception_pc);
 477     b(StubRoutines::forward_exception_entry());
 478     bind(L);
 479 #else
 480     cmp(tmp, 0);
 481     mov(Rexception_pc, PC, ne);
 482     b(StubRoutines::forward_exception_entry(), ne);
 483 #endif // AARCH64
 484   }
 485 
 486   // get oop result if there is one and reset the value in the thread
 487   if (oop_result->is_valid()) {
 488     get_vm_result(oop_result, tmp);
 489   }
 490 }
 491 
 492 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
 493   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 494 }
 495 
 496 
 497 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
 498   assert (arg_1 == R1, "fixed register for arg_1");
 499   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 500 }
 501 
 502 
 503 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 504   assert (arg_1 == R1, "fixed register for arg_1");
 505   assert (arg_2 == R2, "fixed register for arg_2");
 506   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 507 }
 508 
 509 
 510 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 511   assert (arg_1 == R1, "fixed register for arg_1");
 512   assert (arg_2 == R2, "fixed register for arg_2");
 513   assert (arg_3 == R3, "fixed register for arg_3");
 514   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 515 }
 516 
 517 
 518 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
 519   // Not used on ARM
 520   Unimplemented();
 521 }
 522 
 523 
 524 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
 525   // Not used on ARM
 526   Unimplemented();
 527 }
 528 
 529 
 530 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 531 // Not used on ARM
 532   Unimplemented();
 533 }
 534 
 535 
 536 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 537   // Not used on ARM
 538   Unimplemented();
 539 }
 540 
 541 // Raw call, without saving/restoring registers, exception handling, etc.
 542 // Mainly used from various stubs.
 543 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
 544   const Register tmp = Rtemp; // Rtemp free since scratched by call
 545   set_last_Java_frame(SP, FP, true, tmp);
 546 #if R9_IS_SCRATCHED
 547   if (save_R9_if_scratched) {
 548     // Note: Saving also R10 for alignment.
 549     push(RegisterSet(R9, R10));
 550   }
 551 #endif
 552   mov(R0, Rthread);
 553   call(entry_point, relocInfo::runtime_call_type);
 554 #if R9_IS_SCRATCHED
 555   if (save_R9_if_scratched) {
 556     pop(RegisterSet(R9, R10));
 557   }
 558 #endif
 559   reset_last_Java_frame(tmp);
 560 }
 561 
 562 void MacroAssembler::call_VM_leaf(address entry_point) {
 563   call_VM_leaf_helper(entry_point, 0);
 564 }
 565 
 566 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
 567   assert (arg_1 == R0, "fixed register for arg_1");
 568   call_VM_leaf_helper(entry_point, 1);
 569 }
 570 
 571 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
 572   assert (arg_1 == R0, "fixed register for arg_1");
 573   assert (arg_2 == R1, "fixed register for arg_2");
 574   call_VM_leaf_helper(entry_point, 2);
 575 }
 576 
 577 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
 578   assert (arg_1 == R0, "fixed register for arg_1");
 579   assert (arg_2 == R1, "fixed register for arg_2");
 580   assert (arg_3 == R2, "fixed register for arg_3");
 581   call_VM_leaf_helper(entry_point, 3);
 582 }
 583 
 584 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
 585   assert (arg_1 == R0, "fixed register for arg_1");
 586   assert (arg_2 == R1, "fixed register for arg_2");
 587   assert (arg_3 == R2, "fixed register for arg_3");
 588   assert (arg_4 == R3, "fixed register for arg_4");
 589   call_VM_leaf_helper(entry_point, 4);
 590 }
 591 
 592 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
 593   assert_different_registers(oop_result, tmp);
 594   ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
 595   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
 596   verify_oop(oop_result);
 597 }
 598 
 599 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
 600   assert_different_registers(metadata_result, tmp);
 601   ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
 602   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
 603 }
 604 
 605 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
 606   if (arg2.is_register()) {
 607     add(dst, arg1, arg2.as_register());
 608   } else {
 609     add(dst, arg1, arg2.as_constant());
 610   }
 611 }
 612 
 613 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
 614 #ifdef AARCH64
 615   if (c == 0) {
 616     if (rd != rn) {
 617       mov(rd, rn);
 618     }
 619     return;
 620   }
 621   if (c < 0) {
 622     sub_slow(rd, rn, -c);
 623     return;
 624   }
 625   if (c > right_n_bits(24)) {
 626     guarantee(rd != rn, "no large add_slow with only one register");
 627     mov_slow(rd, c);
 628     add(rd, rn, rd);
 629   } else {
 630     int lo = c & right_n_bits(12);
 631     int hi = (c >> 12) & right_n_bits(12);
 632     if (lo != 0) {
 633       add(rd, rn, lo, lsl0);
 634     }
 635     if (hi != 0) {
 636       add(rd, (lo == 0) ? rn : rd, hi, lsl12);
 637     }
 638   }
 639 #else
 640   // This function is used in compiler for handling large frame offsets
 641   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 642     return sub(rd, rn, (-c));
 643   }
 644   int low = c & 0x3fc;
 645   if (low != 0) {
 646     add(rd, rn, low);
 647     rn = rd;
 648   }
 649   if (c & ~0x3fc) {
 650     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
 651     add(rd, rn, c & ~0x3fc);
 652   } else if (rd != rn) {
 653     assert(c == 0, "");
 654     mov(rd, rn); // need to generate at least one move!
 655   }
 656 #endif // AARCH64
 657 }
 658 
 659 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
 660 #ifdef AARCH64
 661   if (c <= 0) {
 662     add_slow(rd, rn, -c);
 663     return;
 664   }
 665   if (c > right_n_bits(24)) {
 666     guarantee(rd != rn, "no large sub_slow with only one register");
 667     mov_slow(rd, c);
 668     sub(rd, rn, rd);
 669   } else {
 670     int lo = c & right_n_bits(12);
 671     int hi = (c >> 12) & right_n_bits(12);
 672     if (lo != 0) {
 673       sub(rd, rn, lo, lsl0);
 674     }
 675     if (hi != 0) {
 676       sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
 677     }
 678   }
 679 #else
 680   // This function is used in compiler for handling large frame offsets
 681   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 682     return add(rd, rn, (-c));
 683   }
 684   int low = c & 0x3fc;
 685   if (low != 0) {
 686     sub(rd, rn, low);
 687     rn = rd;
 688   }
 689   if (c & ~0x3fc) {
 690     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
 691     sub(rd, rn, c & ~0x3fc);
 692   } else if (rd != rn) {
 693     assert(c == 0, "");
 694     mov(rd, rn); // need to generate at least one move!
 695   }
 696 #endif // AARCH64
 697 }
 698 
 699 void MacroAssembler::mov_slow(Register rd, address addr) {
 700   // do *not* call the non relocated mov_related_address
 701   mov_slow(rd, (intptr_t)addr);
 702 }
 703 
 704 void MacroAssembler::mov_slow(Register rd, const char *str) {
 705   mov_slow(rd, (intptr_t)str);
 706 }
 707 
 708 #ifdef AARCH64
 709 
 710 // Common code for mov_slow and instr_count_for_mov_slow.
 711 // Returns number of instructions of mov_slow pattern,
 712 // generating it if non-null MacroAssembler is given.
 713 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
 714   // This code pattern is matched in NativeIntruction::is_mov_slow.
 715   // Update it at modifications.
 716 
 717   const intx mask = right_n_bits(16);
 718   // 1 movz instruction
 719   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 720     if ((c & ~(mask << base_shift)) == 0) {
 721       if (masm != NULL) {
 722         masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
 723       }
 724       return 1;
 725     }
 726   }
 727   // 1 movn instruction
 728   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 729     if (((~c) & ~(mask << base_shift)) == 0) {
 730       if (masm != NULL) {
 731         masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
 732       }
 733       return 1;
 734     }
 735   }
 736   // 1 orr instruction
 737   {
 738     LogicalImmediate imm(c, false);
 739     if (imm.is_encoded()) {
 740       if (masm != NULL) {
 741         masm->orr(rd, ZR, imm);
 742       }
 743       return 1;
 744     }
 745   }
 746   // 1 movz/movn + up to 3 movk instructions
 747   int zeroes = 0;
 748   int ones = 0;
 749   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 750     int part = (c >> base_shift) & mask;
 751     if (part == 0) {
 752       ++zeroes;
 753     } else if (part == mask) {
 754       ++ones;
 755     }
 756   }
 757   int def_bits = 0;
 758   if (ones > zeroes) {
 759     def_bits = mask;
 760   }
 761   int inst_count = 0;
 762   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 763     int part = (c >> base_shift) & mask;
 764     if (part != def_bits) {
 765       if (masm != NULL) {
 766         if (inst_count > 0) {
 767           masm->movk(rd, part, base_shift);
 768         } else {
 769           if (def_bits == 0) {
 770             masm->movz(rd, part, base_shift);
 771           } else {
 772             masm->movn(rd, ~part & mask, base_shift);
 773           }
 774         }
 775       }
 776       inst_count++;
 777     }
 778   }
 779   assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
 780   return inst_count;
 781 }
 782 
 783 void MacroAssembler::mov_slow(Register rd, intptr_t c) {
 784 #ifdef ASSERT
 785   int off = offset();
 786 #endif
 787   (void) mov_slow_helper(rd, c, this);
 788   assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
 789 }
 790 
 791 // Counts instructions generated by mov_slow(rd, c).
 792 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
 793   return mov_slow_helper(noreg, c, NULL);
 794 }
 795 
 796 int MacroAssembler::instr_count_for_mov_slow(address c) {
 797   return mov_slow_helper(noreg, (intptr_t)c, NULL);
 798 }
 799 
 800 #else
 801 
 802 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
 803   if (AsmOperand::is_rotated_imm(c)) {
 804     mov(rd, c, cond);
 805   } else if (AsmOperand::is_rotated_imm(~c)) {
 806     mvn(rd, ~c, cond);
 807   } else if (VM_Version::supports_movw()) {
 808     movw(rd, c & 0xffff, cond);
 809     if ((unsigned int)c >> 16) {
 810       movt(rd, (unsigned int)c >> 16, cond);
 811     }
 812   } else {
 813     // Find first non-zero bit
 814     int shift = 0;
 815     while ((c & (3 << shift)) == 0) {
 816       shift += 2;
 817     }
 818     // Put the least significant part of the constant
 819     int mask = 0xff << shift;
 820     mov(rd, c & mask, cond);
 821     // Add up to 3 other parts of the constant;
 822     // each of them can be represented as rotated_imm
 823     if (c & (mask << 8)) {
 824       orr(rd, rd, c & (mask << 8), cond);
 825     }
 826     if (c & (mask << 16)) {
 827       orr(rd, rd, c & (mask << 16), cond);
 828     }
 829     if (c & (mask << 24)) {
 830       orr(rd, rd, c & (mask << 24), cond);
 831     }
 832   }
 833 }
 834 
 835 #endif // AARCH64
 836 
 837 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
 838 #ifdef AARCH64
 839                              bool patchable
 840 #else
 841                              AsmCondition cond
 842 #endif
 843                              ) {
 844 
 845   if (o == NULL) {
 846 #ifdef AARCH64
 847     if (patchable) {
 848       nop();
 849     }
 850     mov(rd, ZR);
 851 #else
 852     mov(rd, 0, cond);
 853 #endif
 854     return;
 855   }
 856 
 857   if (oop_index == 0) {
 858     oop_index = oop_recorder()->allocate_oop_index(o);
 859   }
 860   relocate(oop_Relocation::spec(oop_index));
 861 
 862 #ifdef AARCH64
 863   if (patchable) {
 864     nop();
 865   }
 866   ldr(rd, pc());
 867 #else
 868   if (VM_Version::supports_movw()) {
 869     movw(rd, 0, cond);
 870     movt(rd, 0, cond);
 871   } else {
 872     ldr(rd, Address(PC), cond);
 873     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
 874     nop();
 875   }
 876 #endif
 877 }
 878 
 879 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
 880   if (o == NULL) {
 881 #ifdef AARCH64
 882     if (patchable) {
 883       nop();
 884     }
 885 #endif
 886     mov(rd, 0);
 887     return;
 888   }
 889 
 890   if (metadata_index == 0) {
 891     metadata_index = oop_recorder()->allocate_metadata_index(o);
 892   }
 893   relocate(metadata_Relocation::spec(metadata_index));
 894 
 895 #ifdef AARCH64
 896   if (patchable) {
 897     nop();
 898   }
 899 #ifdef COMPILER2
 900   if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
 901     mov_slow(rd, (address)o);
 902     return;
 903   }
 904 #endif
 905   ldr(rd, pc());
 906 #else
 907   if (VM_Version::supports_movw()) {
 908     movw(rd, ((int)o) & 0xffff);
 909     movt(rd, (unsigned int)o >> 16);
 910   } else {
 911     ldr(rd, Address(PC));
 912     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
 913     nop();
 914   }
 915 #endif // AARCH64
 916 }
 917 
 918 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
 919   Label skip_constant;
 920   union {
 921     jfloat f;
 922     jint i;
 923   } accessor;
 924   accessor.f = c;
 925 
 926 #ifdef AARCH64
 927   // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
 928   Label L;
 929   ldr_s(fd, target(L));
 930   b(skip_constant);
 931   bind(L);
 932   emit_int32(accessor.i);
 933   bind(skip_constant);
 934 #else
 935   flds(fd, Address(PC), cond);
 936   b(skip_constant);
 937   emit_int32(accessor.i);
 938   bind(skip_constant);
 939 #endif // AARCH64
 940 }
 941 
 942 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
 943   Label skip_constant;
 944   union {
 945     jdouble d;
 946     jint i[2];
 947   } accessor;
 948   accessor.d = c;
 949 
 950 #ifdef AARCH64
 951   // TODO-AARCH64 - try to optimize loading of double constants with fmov
 952   Label L;
 953   ldr_d(fd, target(L));
 954   b(skip_constant);
 955   align(wordSize);
 956   bind(L);
 957   emit_int32(accessor.i[0]);
 958   emit_int32(accessor.i[1]);
 959   bind(skip_constant);
 960 #else
 961   fldd(fd, Address(PC), cond);
 962   b(skip_constant);
 963   emit_int32(accessor.i[0]);
 964   emit_int32(accessor.i[1]);
 965   bind(skip_constant);
 966 #endif // AARCH64
 967 }
 968 
 969 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
 970   intptr_t addr = (intptr_t) address_of_global;
 971 #ifdef AARCH64
 972   assert((addr & 0x3) == 0, "address should be aligned");
 973 
 974   // FIXME: TODO
 975   if (false && page_reachable_from_cache(address_of_global)) {
 976     assert(false,"TODO: relocate");
 977     //relocate();
 978     adrp(reg, address_of_global);
 979     ldrsw(reg, Address(reg, addr & 0xfff));
 980   } else {
 981     mov_slow(reg, addr & ~0x3fff);
 982     ldrsw(reg, Address(reg, addr & 0x3fff));
 983   }
 984 #else
 985   mov_slow(reg, addr & ~0xfff);
 986   ldr(reg, Address(reg, addr & 0xfff));
 987 #endif
 988 }
 989 
 990 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
 991 #ifdef AARCH64
 992   intptr_t addr = (intptr_t) address_of_global;
 993   assert ((addr & 0x7) == 0, "address should be aligned");
 994   mov_slow(reg, addr & ~0x7fff);
 995   ldr(reg, Address(reg, addr & 0x7fff));
 996 #else
 997   ldr_global_s32(reg, address_of_global);
 998 #endif
 999 }
1000 
1001 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
1002   intptr_t addr = (intptr_t) address_of_global;
1003   mov_slow(reg, addr & ~0xfff);
1004   ldrb(reg, Address(reg, addr & 0xfff));
1005 }
1006 
1007 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
1008 #ifdef AARCH64
1009   switch (bits) {
1010     case  8: uxtb(rd, rn); break;
1011     case 16: uxth(rd, rn); break;
1012     case 32: mov_w(rd, rn); break;
1013     default: ShouldNotReachHere();
1014   }
1015 #else
1016   if (bits <= 8) {
1017     andr(rd, rn, (1 << bits) - 1);
1018   } else if (bits >= 24) {
1019     bic(rd, rn, -1 << bits);
1020   } else {
1021     mov(rd, AsmOperand(rn, lsl, 32 - bits));
1022     mov(rd, AsmOperand(rd, lsr, 32 - bits));
1023   }
1024 #endif
1025 }
1026 
1027 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
1028 #ifdef AARCH64
1029   switch (bits) {
1030     case  8: sxtb(rd, rn); break;
1031     case 16: sxth(rd, rn); break;
1032     case 32: sxtw(rd, rn); break;
1033     default: ShouldNotReachHere();
1034   }
1035 #else
1036   mov(rd, AsmOperand(rn, lsl, 32 - bits));
1037   mov(rd, AsmOperand(rd, asr, 32 - bits));
1038 #endif
1039 }
1040 
1041 #ifndef AARCH64
1042 
1043 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
1044                                Register rn_lo, Register rn_hi,
1045                                AsmCondition cond) {
1046   if (rd_lo != rn_hi) {
1047     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1048     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1049   } else if (rd_hi != rn_lo) {
1050     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1051     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1052   } else {
1053     eor(rd_lo, rd_hi, rd_lo, cond);
1054     eor(rd_hi, rd_lo, rd_hi, cond);
1055     eor(rd_lo, rd_hi, rd_lo, cond);
1056   }
1057 }
1058 
1059 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1060                                 Register rn_lo, Register rn_hi,
1061                                 AsmShift shift, Register count) {
1062   Register tmp;
1063   if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
1064     tmp = rd_lo;
1065   } else {
1066     tmp = rd_hi;
1067   }
1068   assert_different_registers(tmp, count, rn_lo, rn_hi);
1069 
1070   subs(tmp, count, 32);
1071   if (shift == lsl) {
1072     assert_different_registers(rd_hi, rn_lo);
1073     assert_different_registers(count, rd_hi);
1074     mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
1075     rsb(tmp, count, 32, mi);
1076     if (rd_hi == rn_hi) {
1077       mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1078       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1079     } else {
1080       mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1081       orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1082     }
1083     mov(rd_lo, AsmOperand(rn_lo, shift, count));
1084   } else {
1085     assert_different_registers(rd_lo, rn_hi);
1086     assert_different_registers(rd_lo, count);
1087     mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
1088     rsb(tmp, count, 32, mi);
1089     if (rd_lo == rn_lo) {
1090       mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1091       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1092     } else {
1093       mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1094       orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1095     }
1096     mov(rd_hi, AsmOperand(rn_hi, shift, count));
1097   }
1098 }
1099 
1100 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1101                                 Register rn_lo, Register rn_hi,
1102                                 AsmShift shift, int count) {
1103   assert(count != 0 && (count & ~63) == 0, "must be");
1104 
1105   if (shift == lsl) {
1106     assert_different_registers(rd_hi, rn_lo);
1107     if (count >= 32) {
1108       mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
1109       mov(rd_lo, 0);
1110     } else {
1111       mov(rd_hi, AsmOperand(rn_hi, lsl, count));
1112       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
1113       mov(rd_lo, AsmOperand(rn_lo, lsl, count));
1114     }
1115   } else {
1116     assert_different_registers(rd_lo, rn_hi);
1117     if (count >= 32) {
1118       if (count == 32) {
1119         mov(rd_lo, rn_hi);
1120       } else {
1121         mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
1122       }
1123       if (shift == asr) {
1124         mov(rd_hi, AsmOperand(rn_hi, asr, 0));
1125       } else {
1126         mov(rd_hi, 0);
1127       }
1128     } else {
1129       mov(rd_lo, AsmOperand(rn_lo, lsr, count));
1130       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
1131       mov(rd_hi, AsmOperand(rn_hi, shift, count));
1132     }
1133   }
1134 }
1135 #endif // !AARCH64
1136 
1137 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
1138   // This code pattern is matched in NativeIntruction::skip_verify_oop.
1139   // Update it at modifications.
1140   if (!VerifyOops) return;
1141 
1142   char buffer[64];
1143 #ifdef COMPILER1
1144   if (CommentedAssembly) {
1145     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1146     block_comment(buffer);
1147   }
1148 #endif
1149   const char* msg_buffer = NULL;
1150   {
1151     ResourceMark rm;
1152     stringStream ss;
1153     ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
1154     msg_buffer = code_string(ss.as_string());
1155   }
1156 
1157   save_all_registers();
1158 
1159   if (reg != R2) {
1160       mov(R2, reg);                              // oop to verify
1161   }
1162   mov(R1, SP);                                   // register save area
1163 
1164   Label done;
1165   InlinedString Lmsg(msg_buffer);
1166   ldr_literal(R0, Lmsg);                         // message
1167 
1168   // call indirectly to solve generation ordering problem
1169   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1170   call(Rtemp);
1171 
1172   restore_all_registers();
1173 
1174   b(done);
1175 #ifdef COMPILER2
1176   int off = offset();
1177 #endif
1178   bind_literal(Lmsg);
1179 #ifdef COMPILER2
1180   if (offset() - off == 1 * wordSize) {
1181     // no padding, so insert nop for worst-case sizing
1182     nop();
1183   }
1184 #endif
1185   bind(done);
1186 }
1187 
1188 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
1189   if (!VerifyOops) return;
1190 
1191   const char* msg_buffer = NULL;
1192   {
1193     ResourceMark rm;
1194     stringStream ss;
1195     if ((addr.base() == SP) && (addr.index()==noreg)) {
1196       ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
1197     } else {
1198       ss.print("verify_oop_addr: %s", s);
1199     }
1200     ss.print(" (%s:%d)", file, line);
1201     msg_buffer = code_string(ss.as_string());
1202   }
1203 
1204   int push_size = save_all_registers();
1205 
1206   if (addr.base() == SP) {
1207     // computes an addr that takes into account the push
1208     if (addr.index() != noreg) {
1209       Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
1210       add(new_base, SP, push_size);
1211       addr = addr.rebase(new_base);
1212     } else {
1213       addr = addr.plus_disp(push_size);
1214     }
1215   }
1216 
1217   ldr(R2, addr);                                 // oop to verify
1218   mov(R1, SP);                                   // register save area
1219 
1220   Label done;
1221   InlinedString Lmsg(msg_buffer);
1222   ldr_literal(R0, Lmsg);                         // message
1223 
1224   // call indirectly to solve generation ordering problem
1225   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1226   call(Rtemp);
1227 
1228   restore_all_registers();
1229 
1230   b(done);
1231   bind_literal(Lmsg);
1232   bind(done);
1233 }
1234 
1235 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
1236   if (needs_explicit_null_check(offset)) {
1237 #ifdef AARCH64
1238     ldr(ZR, Address(reg));
1239 #else
1240     assert_different_registers(reg, tmp);
1241     if (tmp == noreg) {
1242       tmp = Rtemp;
1243       assert((! Thread::current()->is_Compiler_thread()) ||
1244              (! (ciEnv::current()->task() == NULL)) ||
1245              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1246              "Rtemp not available in C2"); // explicit tmp register required
1247       // XXX: could we mark the code buffer as not compatible with C2 ?
1248     }
1249     ldr(tmp, Address(reg));
1250 #endif
1251   }
1252 }
1253 
1254 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1255 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1256                                  RegisterOrConstant size_expression, Label& slow_case) {
1257   if (!Universe::heap()->supports_inline_contig_alloc()) {
1258     b(slow_case);
1259     return;
1260   }
1261 
1262   CollectedHeap* ch = Universe::heap();
1263 
1264   const Register top_addr = tmp1;
1265   const Register heap_end = tmp2;
1266 
1267   if (size_expression.is_register()) {
1268     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
1269   } else {
1270     assert_different_registers(obj, obj_end, top_addr, heap_end);
1271   }
1272 
1273   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
1274   if (load_const) {
1275     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
1276   } else {
1277     ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
1278   }
1279   // Calculate new heap_top by adding the size of the object
1280   Label retry;
1281   bind(retry);
1282 
1283 #ifdef AARCH64
1284   ldxr(obj, top_addr);
1285 #else
1286   ldr(obj, Address(top_addr));
1287 #endif // AARCH64
1288 
1289   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
1290   add_rc(obj_end, obj, size_expression);
1291   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
1292   cmp(obj_end, obj);
1293   b(slow_case, lo);
1294   // Update heap_top if allocation succeeded
1295   cmp(obj_end, heap_end);
1296   b(slow_case, hi);
1297 
1298 #ifdef AARCH64
1299   stxr(heap_end/*scratched*/, obj_end, top_addr);
1300   cbnz_w(heap_end, retry);
1301 #else
1302   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
1303   b(retry, ne);
1304 #endif // AARCH64
1305 }
1306 
1307 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1308 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1309                                  RegisterOrConstant size_expression, Label& slow_case) {
1310   const Register tlab_end = tmp1;
1311   assert_different_registers(obj, obj_end, tlab_end);
1312 
1313   ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1314   ldr(tlab_end, Address(Rthread, JavaThread::tlab_current_end_offset()));
1315   add_rc(obj_end, obj, size_expression);
1316   cmp(obj_end, tlab_end);
1317   b(slow_case, hi);
1318   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1319 }
1320 
1321 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
1322 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1323   Label loop;
1324   const Register ptr = start;
1325 
1326 #ifdef AARCH64
1327   // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1328   const Register size = tmp;
1329   Label remaining, done;
1330 
1331   sub(size, end, start);
1332 
1333 #ifdef ASSERT
1334   { Label L;
1335     tst(size, wordSize - 1);
1336     b(L, eq);
1337     stop("size is not a multiple of wordSize");
1338     bind(L);
1339   }
1340 #endif // ASSERT
1341 
1342   subs(size, size, wordSize);
1343   b(remaining, le);
1344 
1345   // Zero by 2 words per iteration.
1346   bind(loop);
1347   subs(size, size, 2*wordSize);
1348   stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1349   b(loop, gt);
1350 
1351   bind(remaining);
1352   b(done, ne);
1353   str(ZR, Address(ptr));
1354   bind(done);
1355 #else
1356   mov(tmp, 0);
1357   bind(loop);
1358   cmp(ptr, end);
1359   str(tmp, Address(ptr, wordSize, post_indexed), lo);
1360   b(loop, lo);
1361 #endif // AARCH64
1362 }
1363 
1364 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1365 #ifdef AARCH64
1366   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1367   add_rc(tmp, tmp, size_in_bytes);
1368   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1369 #else
1370   // Bump total bytes allocated by this thread
1371   Label done;
1372 
1373   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1374   adds(tmp, tmp, size_in_bytes);
1375   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc);
1376   b(done, cc);
1377 
1378   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1379   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1380   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1381   Register low, high;
1382   // Select ether R0/R1 or R2/R3
1383 
1384   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1385     low = R2;
1386     high  = R3;
1387   } else {
1388     low = R0;
1389     high  = R1;
1390   }
1391   push(RegisterSet(low, high));
1392 
1393   ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1394   adds(low, low, size_in_bytes);
1395   adc(high, high, 0);
1396   strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1397 
1398   pop(RegisterSet(low, high));
1399 
1400   bind(done);
1401 #endif // AARCH64
1402 }
1403 
1404 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1405   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1406   if (UseStackBanging) {
1407     const int page_size = os::vm_page_size();
1408 
1409     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1410     strb(R0, Address(tmp));
1411 #ifdef AARCH64
1412     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1413       sub(tmp, tmp, page_size);
1414       strb(R0, Address(tmp));
1415     }
1416 #else
1417     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1418       strb(R0, Address(tmp, -0xff0, pre_indexed));
1419     }
1420 #endif // AARCH64
1421   }
1422 }
1423 
1424 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
1425   if (UseStackBanging) {
1426     Label loop;
1427 
1428     mov(tmp, SP);
1429     add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
1430 #ifdef AARCH64
1431     sub(tmp, tmp, Rsize);
1432     bind(loop);
1433     subs(Rsize, Rsize, os::vm_page_size());
1434     strb(ZR, Address(tmp, Rsize));
1435 #else
1436     bind(loop);
1437     subs(Rsize, Rsize, 0xff0);
1438     strb(R0, Address(tmp, -0xff0, pre_indexed));
1439 #endif // AARCH64
1440     b(loop, hi);
1441   }
1442 }
1443 
1444 void MacroAssembler::stop(const char* msg) {
1445   // This code pattern is matched in NativeIntruction::is_stop.
1446   // Update it at modifications.
1447 #ifdef COMPILER1
1448   if (CommentedAssembly) {
1449     block_comment("stop");
1450   }
1451 #endif
1452 
1453   InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1454   InlinedString Lmsg(msg);
1455 
1456   // save all registers for further inspection
1457   save_all_registers();
1458 
1459   ldr_literal(R0, Lmsg);                     // message
1460   mov(R1, SP);                               // register save area
1461 
1462 #ifdef AARCH64
1463   ldr_literal(Rtemp, Ldebug);
1464   br(Rtemp);
1465 #else
1466   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
1467 #endif // AARCH64
1468 
1469 #if defined(COMPILER2) && defined(AARCH64)
1470   int off = offset();
1471 #endif
1472   bind_literal(Lmsg);
1473   bind_literal(Ldebug);
1474 #if defined(COMPILER2) && defined(AARCH64)
1475   if (offset() - off == 2 * wordSize) {
1476     // no padding, so insert nop for worst-case sizing
1477     nop();
1478   }
1479 #endif
1480 }
1481 
1482 void MacroAssembler::warn(const char* msg) {
1483 #ifdef COMPILER1
1484   if (CommentedAssembly) {
1485     block_comment("warn");
1486   }
1487 #endif
1488 
1489   InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1490   InlinedString Lmsg(msg);
1491   Label done;
1492 
1493   int push_size = save_caller_save_registers();
1494 
1495 #ifdef AARCH64
1496   // TODO-AARCH64 - get rid of extra debug parameters
1497   mov(R1, LR);
1498   mov(R2, FP);
1499   add(R3, SP, push_size);
1500 #endif
1501 
1502   ldr_literal(R0, Lmsg);                    // message
1503   ldr_literal(LR, Lwarn);                   // call warning
1504 
1505   call(LR);
1506 
1507   restore_caller_save_registers();
1508 
1509   b(done);
1510   bind_literal(Lmsg);
1511   bind_literal(Lwarn);
1512   bind(done);
1513 }
1514 
1515 
1516 int MacroAssembler::save_all_registers() {
1517   // This code pattern is matched in NativeIntruction::is_save_all_registers.
1518   // Update it at modifications.
1519 #ifdef AARCH64
1520   const Register tmp = Rtemp;
1521   raw_push(R30, ZR);
1522   for (int i = 28; i >= 0; i -= 2) {
1523       raw_push(as_Register(i), as_Register(i+1));
1524   }
1525   mov_pc_to(tmp);
1526   str(tmp, Address(SP, 31*wordSize));
1527   ldr(tmp, Address(SP, tmp->encoding()*wordSize));
1528   return 32*wordSize;
1529 #else
1530   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1531   return 15*wordSize;
1532 #endif // AARCH64
1533 }
1534 
1535 void MacroAssembler::restore_all_registers() {
1536 #ifdef AARCH64
1537   for (int i = 0; i <= 28; i += 2) {
1538     raw_pop(as_Register(i), as_Register(i+1));
1539   }
1540   raw_pop(R30, ZR);
1541 #else
1542   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
1543   add(SP, SP, wordSize);                         // discard saved PC
1544 #endif // AARCH64
1545 }
1546 
1547 int MacroAssembler::save_caller_save_registers() {
1548 #ifdef AARCH64
1549   for (int i = 0; i <= 16; i += 2) {
1550     raw_push(as_Register(i), as_Register(i+1));
1551   }
1552   raw_push(R18, LR);
1553   return 20*wordSize;
1554 #else
1555 #if R9_IS_SCRATCHED
1556   // Save also R10 to preserve alignment
1557   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1558   return 8*wordSize;
1559 #else
1560   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1561   return 6*wordSize;
1562 #endif
1563 #endif // AARCH64
1564 }
1565 
1566 void MacroAssembler::restore_caller_save_registers() {
1567 #ifdef AARCH64
1568   raw_pop(R18, LR);
1569   for (int i = 16; i >= 0; i -= 2) {
1570     raw_pop(as_Register(i), as_Register(i+1));
1571   }
1572 #else
1573 #if R9_IS_SCRATCHED
1574   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1575 #else
1576   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1577 #endif
1578 #endif // AARCH64
1579 }
1580 
1581 void MacroAssembler::debug(const char* msg, const intx* registers) {
1582   // In order to get locks to work, we need to fake a in_VM state
1583   JavaThread* thread = JavaThread::current();
1584   thread->set_thread_state(_thread_in_vm);
1585 
1586   if (ShowMessageBoxOnError) {
1587     ttyLocker ttyl;
1588     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1589       BytecodeCounter::print();
1590     }
1591     if (os::message_box(msg, "Execution stopped, print registers?")) {
1592 #ifdef AARCH64
1593       // saved registers: R0-R30, PC
1594       const int nregs = 32;
1595 #else
1596       // saved registers: R0-R12, LR, PC
1597       const int nregs = 15;
1598       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1599 #endif // AARCH64
1600 
1601       for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
1602         tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
1603       }
1604 
1605 #ifdef AARCH64
1606       tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
1607 #endif // AARCH64
1608 
1609       // derive original SP value from the address of register save area
1610       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
1611     }
1612     BREAKPOINT;
1613   } else {
1614     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1615   }
1616   assert(false, "DEBUG MESSAGE: %s", msg);
1617   fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1618 }
1619 
1620 void MacroAssembler::unimplemented(const char* what) {
1621   const char* buf = NULL;
1622   {
1623     ResourceMark rm;
1624     stringStream ss;
1625     ss.print("unimplemented: %s", what);
1626     buf = code_string(ss.as_string());
1627   }
1628   stop(buf);
1629 }
1630 
1631 
1632 // Implementation of FixedSizeCodeBlock
1633 
1634 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1635 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1636 }
1637 
1638 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1639   if (_enabled) {
1640     address curr_pc = _masm->pc();
1641 
1642     assert(_start < curr_pc, "invalid current pc");
1643     guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1644 
1645     int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1646     for (int i = 0; i < nops_count; i++) {
1647       _masm->nop();
1648     }
1649   }
1650 }
1651 
1652 #ifdef AARCH64
1653 
1654 // Serializes memory.
1655 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
1656 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
1657   if (!os::is_MP()) return;
1658 
1659   // TODO-AARCH64 investigate dsb vs dmb effects
1660   if (order_constraint == StoreStore) {
1661     dmb(DMB_st);
1662   } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
1663     dmb(DMB_ld);
1664   } else {
1665     dmb(DMB_all);
1666   }
1667 }
1668 
1669 #else
1670 
1671 // Serializes memory. Potentially blows flags and reg.
1672 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
1673 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1674 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
1675 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1676                             Register tmp,
1677                             bool preserve_flags,
1678                             Register load_tgt) {
1679   if (!os::is_MP()) return;
1680 
1681   if (order_constraint == StoreStore) {
1682     dmb(DMB_st, tmp);
1683   } else if ((order_constraint & StoreLoad)  ||
1684              (order_constraint & LoadLoad)   ||
1685              (order_constraint & StoreStore) ||
1686              (load_tgt == noreg)             ||
1687              preserve_flags) {
1688     dmb(DMB_all, tmp);
1689   } else {
1690     // LoadStore: speculative stores reordeing is prohibited
1691 
1692     // By providing an ordered load target register, we avoid an extra memory load reference
1693     Label not_taken;
1694     bind(not_taken);
1695     cmp(load_tgt, load_tgt);
1696     b(not_taken, ne);
1697   }
1698 }
1699 
1700 #endif // AARCH64
1701 
1702 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1703 // on failure, so fall-through can only mean success.
1704 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1705 // This is only needed for C2, which for some reason does not rety,
1706 // while C1/interpreter does.
1707 // TODO: measure if it makes a difference
1708 
1709 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1710   Register base, Register tmp, Label &slow_case,
1711   bool allow_fallthrough_on_failure, bool one_shot)
1712 {
1713 
1714   bool fallthrough_is_success = false;
1715 
1716   // ARM Litmus Test example does prefetching here.
1717   // TODO: investigate if it helps performance
1718 
1719   // The last store was to the displaced header, so to prevent
1720   // reordering we must issue a StoreStore or Release barrier before
1721   // the CAS store.
1722 
1723 #ifdef AARCH64
1724 
1725   Register Rscratch = tmp;
1726   Register Roop = base;
1727   Register mark = oldval;
1728   Register Rbox = newval;
1729   Label loop;
1730 
1731   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1732 
1733   // Instead of StoreStore here, we use store-release-exclusive below
1734 
1735   bind(loop);
1736 
1737   ldaxr(tmp, base);  // acquire
1738   cmp(tmp, oldval);
1739   b(slow_case, ne);
1740   stlxr(tmp, newval, base); // release
1741   if (one_shot) {
1742     cmp_w(tmp, 0);
1743   } else {
1744     cbnz_w(tmp, loop);
1745     fallthrough_is_success = true;
1746   }
1747 
1748   // MemBarAcquireLock would normally go here, but
1749   // we already do ldaxr+stlxr above, which has
1750   // Sequential Consistency
1751 
1752 #else
1753   membar(MacroAssembler::StoreStore, noreg);
1754 
1755   if (one_shot) {
1756     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1757     cmp(tmp, oldval);
1758     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1759     cmp(tmp, 0, eq);
1760   } else {
1761     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1762   }
1763 
1764   // MemBarAcquireLock barrier
1765   // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1766   // but that doesn't prevent a load or store from floating up between
1767   // the load and store in the CAS sequence, so play it safe and
1768   // do a full fence.
1769   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1770 #endif
1771   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1772     b(slow_case, ne);
1773   }
1774 }
1775 
1776 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1777   Register base, Register tmp, Label &slow_case,
1778   bool allow_fallthrough_on_failure, bool one_shot)
1779 {
1780 
1781   bool fallthrough_is_success = false;
1782 
1783   assert_different_registers(oldval,newval,base,tmp);
1784 
1785 #ifdef AARCH64
1786   Label loop;
1787 
1788   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1789 
1790   bind(loop);
1791   ldxr(tmp, base);
1792   cmp(tmp, oldval);
1793   b(slow_case, ne);
1794   // MemBarReleaseLock barrier
1795   stlxr(tmp, newval, base);
1796   if (one_shot) {
1797     cmp_w(tmp, 0);
1798   } else {
1799     cbnz_w(tmp, loop);
1800     fallthrough_is_success = true;
1801   }
1802 #else
1803   // MemBarReleaseLock barrier
1804   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1805   // but that doesn't prevent a load or store from floating down between
1806   // the load and store in the CAS sequence, so play it safe and
1807   // do a full fence.
1808   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1809 
1810   if (one_shot) {
1811     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1812     cmp(tmp, oldval);
1813     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1814     cmp(tmp, 0, eq);
1815   } else {
1816     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1817   }
1818 #endif
1819   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1820     b(slow_case, ne);
1821   }
1822 
1823   // ExitEnter
1824   // According to JSR-133 Cookbook, this should be StoreLoad, the same
1825   // barrier that follows volatile store.
1826   // TODO: Should be able to remove on armv8 if volatile loads
1827   // use the load-acquire instruction.
1828   membar(StoreLoad, noreg);
1829 }
1830 
1831 #ifndef PRODUCT
1832 
1833 // Preserves flags and all registers.
1834 // On SMP the updated value might not be visible to external observers without a sychronization barrier
1835 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1836   if (counter_addr != NULL) {
1837     InlinedAddress counter_addr_literal((address)counter_addr);
1838     Label done, retry;
1839     if (cond != al) {
1840       b(done, inverse(cond));
1841     }
1842 
1843 #ifdef AARCH64
1844     raw_push(R0, R1);
1845     raw_push(R2, ZR);
1846 
1847     ldr_literal(R0, counter_addr_literal);
1848 
1849     bind(retry);
1850     ldxr_w(R1, R0);
1851     add_w(R1, R1, 1);
1852     stxr_w(R2, R1, R0);
1853     cbnz_w(R2, retry);
1854 
1855     raw_pop(R2, ZR);
1856     raw_pop(R0, R1);
1857 #else
1858     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1859     ldr_literal(R0, counter_addr_literal);
1860 
1861     mrs(CPSR, Rtemp);
1862 
1863     bind(retry);
1864     ldr_s32(R1, Address(R0));
1865     add(R2, R1, 1);
1866     atomic_cas_bool(R1, R2, R0, 0, R3);
1867     b(retry, ne);
1868 
1869     msr(CPSR_fsxc, Rtemp);
1870 
1871     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1872 #endif // AARCH64
1873 
1874     b(done);
1875     bind_literal(counter_addr_literal);
1876 
1877     bind(done);
1878   }
1879 }
1880 
1881 #endif // !PRODUCT
1882 
1883 
1884 // Building block for CAS cases of biased locking: makes CAS and records statistics.
1885 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
1886 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
1887                                                  Register tmp, Label& slow_case, int* counter_addr) {
1888 
1889   cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
1890 #ifdef ASSERT
1891   breakpoint(ne); // Fallthrough only on success
1892 #endif
1893 #ifndef PRODUCT
1894   if (counter_addr != NULL) {
1895     cond_atomic_inc32(al, counter_addr);
1896   }
1897 #endif // !PRODUCT
1898 }
1899 
1900 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
1901                                          bool swap_reg_contains_mark,
1902                                          Register tmp2,
1903                                          Label& done, Label& slow_case,
1904                                          BiasedLockingCounters* counters) {
1905   // obj_reg must be preserved (at least) if the bias locking fails
1906   // tmp_reg is a temporary register
1907   // swap_reg was used as a temporary but contained a value
1908   //   that was used afterwards in some call pathes. Callers
1909   //   have been fixed so that swap_reg no longer needs to be
1910   //   saved.
1911   // Rtemp in no longer scratched
1912 
1913   assert(UseBiasedLocking, "why call this otherwise?");
1914   assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
1915   guarantee(swap_reg!=tmp_reg, "invariant");
1916   assert(tmp_reg != noreg, "must supply tmp_reg");
1917 
1918 #ifndef PRODUCT
1919   if (PrintBiasedLockingStatistics && (counters == NULL)) {
1920     counters = BiasedLocking::counters();
1921   }
1922 #endif
1923 
1924   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1925   Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
1926 
1927   // Biased locking
1928   // See whether the lock is currently biased toward our thread and
1929   // whether the epoch is still valid
1930   // Note that the runtime guarantees sufficient alignment of JavaThread
1931   // pointers to allow age to be placed into low bits
1932   // First check to see whether biasing is even enabled for this object
1933   Label cas_label;
1934 
1935   // The null check applies to the mark loading, if we need to load it.
1936   // If the mark has already been loaded in swap_reg then it has already
1937   // been performed and the offset is irrelevant.
1938   int null_check_offset = offset();
1939   if (!swap_reg_contains_mark) {
1940     ldr(swap_reg, mark_addr);
1941   }
1942 
1943   // On MP platform loads could return 'stale' values in some cases.
1944   // That is acceptable since either CAS or slow case path is taken in the worst case.
1945 
1946   andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1947   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
1948 
1949   b(cas_label, ne);
1950 
1951   // The bias pattern is present in the object's header. Need to check
1952   // whether the bias owner and the epoch are both still current.
1953   load_klass(tmp_reg, obj_reg);
1954   ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
1955   orr(tmp_reg, tmp_reg, Rthread);
1956   eor(tmp_reg, tmp_reg, swap_reg);
1957 
1958 #ifdef AARCH64
1959   ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
1960 #else
1961   bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
1962 #endif // AARCH64
1963 
1964 #ifndef PRODUCT
1965   if (counters != NULL) {
1966     cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
1967   }
1968 #endif // !PRODUCT
1969 
1970   b(done, eq);
1971 
1972   Label try_revoke_bias;
1973   Label try_rebias;
1974 
1975   // At this point we know that the header has the bias pattern and
1976   // that we are not the bias owner in the current epoch. We need to
1977   // figure out more details about the state of the header in order to
1978   // know what operations can be legally performed on the object's
1979   // header.
1980 
1981   // If the low three bits in the xor result aren't clear, that means
1982   // the prototype header is no longer biased and we have to revoke
1983   // the bias on this object.
1984   tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1985   b(try_revoke_bias, ne);
1986 
1987   // Biasing is still enabled for this data type. See whether the
1988   // epoch of the current bias is still valid, meaning that the epoch
1989   // bits of the mark word are equal to the epoch bits of the
1990   // prototype header. (Note that the prototype header's epoch bits
1991   // only change at a safepoint.) If not, attempt to rebias the object
1992   // toward the current thread. Note that we must be absolutely sure
1993   // that the current epoch is invalid in order to do this because
1994   // otherwise the manipulations it performs on the mark word are
1995   // illegal.
1996   tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
1997   b(try_rebias, ne);
1998 
1999   // tmp_reg has the age, epoch and pattern bits cleared
2000   // The remaining (owner) bits are (Thread ^ current_owner)
2001 
2002   // The epoch of the current bias is still valid but we know nothing
2003   // about the owner; it might be set or it might be clear. Try to
2004   // acquire the bias of the object using an atomic operation. If this
2005   // fails we will go in to the runtime to revoke the object's bias.
2006   // Note that we first construct the presumed unbiased header so we
2007   // don't accidentally blow away another thread's valid bias.
2008 
2009   // Note that we know the owner is not ourself. Hence, success can
2010   // only happen when the owner bits is 0
2011 
2012 #ifdef AARCH64
2013   // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
2014   // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
2015   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2016   andr(swap_reg, swap_reg, tmp2);
2017 #else
2018   // until the assembler can be made smarter, we need to make some assumptions about the values
2019   // so we can optimize this:
2020   assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
2021 
2022   mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
2023   mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
2024 #endif // AARCH64
2025 
2026   orr(tmp_reg, swap_reg, Rthread); // new mark
2027 
2028   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2029         (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
2030 
2031   // If the biasing toward our thread failed, this means that
2032   // another thread succeeded in biasing it toward itself and we
2033   // need to revoke that bias. The revocation will occur in the
2034   // interpreter runtime in the slow case.
2035 
2036   b(done);
2037 
2038   bind(try_rebias);
2039 
2040   // At this point we know the epoch has expired, meaning that the
2041   // current "bias owner", if any, is actually invalid. Under these
2042   // circumstances _only_, we are allowed to use the current header's
2043   // value as the comparison value when doing the cas to acquire the
2044   // bias in the current epoch. In other words, we allow transfer of
2045   // the bias from one thread to another directly in this situation.
2046 
2047   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2048 
2049   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2050 
2051   // owner bits 'random'. Set them to Rthread.
2052 #ifdef AARCH64
2053   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2054   andr(tmp_reg, tmp_reg, tmp2);
2055 #else
2056   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2057   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2058 #endif // AARCH64
2059 
2060   orr(tmp_reg, tmp_reg, Rthread); // new mark
2061 
2062   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2063         (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
2064 
2065   // If the biasing toward our thread failed, then another thread
2066   // succeeded in biasing it toward itself and we need to revoke that
2067   // bias. The revocation will occur in the runtime in the slow case.
2068 
2069   b(done);
2070 
2071   bind(try_revoke_bias);
2072 
2073   // The prototype mark in the klass doesn't have the bias bit set any
2074   // more, indicating that objects of this data type are not supposed
2075   // to be biased any more. We are going to try to reset the mark of
2076   // this object to the prototype value and fall through to the
2077   // CAS-based locking scheme. Note that if our CAS fails, it means
2078   // that another thread raced us for the privilege of revoking the
2079   // bias of this particular object, so it's okay to continue in the
2080   // normal locking code.
2081 
2082   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2083 
2084   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2085 
2086   // owner bits 'random'. Clear them
2087 #ifdef AARCH64
2088   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2089   andr(tmp_reg, tmp_reg, tmp2);
2090 #else
2091   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2092   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2093 #endif // AARCH64
2094 
2095   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
2096         (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
2097 
2098   // Fall through to the normal CAS-based lock, because no matter what
2099   // the result of the above CAS, some thread must have succeeded in
2100   // removing the bias bit from the object's header.
2101 
2102   bind(cas_label);
2103 
2104   return null_check_offset;
2105 }
2106 
2107 
2108 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
2109   assert(UseBiasedLocking, "why call this otherwise?");
2110 
2111   // Check for biased locking unlock case, which is a no-op
2112   // Note: we do not have to check the thread ID for two reasons.
2113   // First, the interpreter checks for IllegalMonitorStateException at
2114   // a higher level. Second, if the bias was revoked while we held the
2115   // lock, the object could not be rebiased toward another thread, so
2116   // the bias bit would be clear.
2117   ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2118 
2119   andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2120   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
2121   b(done, eq);
2122 }
2123 
2124 
2125 void MacroAssembler::resolve_jobject(Register value,
2126                                      Register tmp1,
2127                                      Register tmp2) {
2128   assert_different_registers(value, tmp1, tmp2);
2129   Label done, not_weak;
2130   cbz(value, done);             // Use NULL as-is.
2131   STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
2132   tbz(value, 0, not_weak);      // Test for jweak tag.
2133   // Resolve jweak.
2134   ldr(value, Address(value, -JNIHandles::weak_tag_value));
2135   verify_oop(value);
2136 #if INCLUDE_ALL_GCS
2137   if (UseG1GC) {
2138     g1_write_barrier_pre(noreg, // store_addr
2139                          noreg, // new_val
2140                          value, // pre_val
2141                          tmp1,  // tmp1
2142                          tmp2); // tmp2
2143     }
2144 #endif // INCLUDE_ALL_GCS
2145   b(done);
2146   bind(not_weak);
2147   // Resolve (untagged) jobject.
2148   ldr(value, Address(value));
2149   verify_oop(value);
2150   bind(done);
2151 }
2152 
2153 
2154 //////////////////////////////////////////////////////////////////////////////////
2155 
2156 #if INCLUDE_ALL_GCS
2157 
2158 // G1 pre-barrier.
2159 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
2160 // If store_addr != noreg, then previous value is loaded from [store_addr];
2161 // in such case store_addr and new_val registers are preserved;
2162 // otherwise pre_val register is preserved.
2163 void MacroAssembler::g1_write_barrier_pre(Register store_addr,
2164                                           Register new_val,
2165                                           Register pre_val,
2166                                           Register tmp1,
2167                                           Register tmp2) {
2168   Label done;
2169   Label runtime;
2170 
2171   if (store_addr != noreg) {
2172     assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg);
2173   } else {
2174     assert (new_val == noreg, "should be");
2175     assert_different_registers(pre_val, tmp1, tmp2, noreg);
2176   }
2177 
2178   Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
2179                                         SATBMarkQueue::byte_offset_of_active()));
2180   Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
2181                                   SATBMarkQueue::byte_offset_of_index()));
2182   Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
2183                                    SATBMarkQueue::byte_offset_of_buf()));
2184 
2185   // Is marking active?
2186   assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code");
2187   ldrb(tmp1, in_progress);
2188   cbz(tmp1, done);
2189 
2190   // Do we need to load the previous value?
2191   if (store_addr != noreg) {
2192     load_heap_oop(pre_val, Address(store_addr, 0));
2193   }
2194 
2195   // Is the previous value null?
2196   cbz(pre_val, done);
2197 
2198   // Can we store original value in the thread's buffer?
2199   // Is index == 0?
2200   // (The index field is typed as size_t.)
2201 
2202   ldr(tmp1, index);           // tmp1 := *index_adr
2203   ldr(tmp2, buffer);
2204 
2205   subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize
2206   b(runtime, lt);             // If negative, goto runtime
2207 
2208   str(tmp1, index);           // *index_adr := tmp1
2209 
2210   // Record the previous value
2211   str(pre_val, Address(tmp2, tmp1));
2212   b(done);
2213 
2214   bind(runtime);
2215 
2216   // save the live input values
2217 #ifdef AARCH64
2218   if (store_addr != noreg) {
2219     raw_push(store_addr, new_val);
2220   } else {
2221     raw_push(pre_val, ZR);
2222   }
2223 #else
2224   if (store_addr != noreg) {
2225     // avoid raw_push to support any ordering of store_addr and new_val
2226     push(RegisterSet(store_addr) | RegisterSet(new_val));
2227   } else {
2228     push(pre_val);
2229   }
2230 #endif // AARCH64
2231 
2232   if (pre_val != R0) {
2233     mov(R0, pre_val);
2234   }
2235   mov(R1, Rthread);
2236 
2237   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1);
2238 
2239 #ifdef AARCH64
2240   if (store_addr != noreg) {
2241     raw_pop(store_addr, new_val);
2242   } else {
2243     raw_pop(pre_val, ZR);
2244   }
2245 #else
2246   if (store_addr != noreg) {
2247     pop(RegisterSet(store_addr) | RegisterSet(new_val));
2248   } else {
2249     pop(pre_val);
2250   }
2251 #endif // AARCH64
2252 
2253   bind(done);
2254 }
2255 
2256 // G1 post-barrier.
2257 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
2258 void MacroAssembler::g1_write_barrier_post(Register store_addr,
2259                                            Register new_val,
2260                                            Register tmp1,
2261                                            Register tmp2,
2262                                            Register tmp3) {
2263 
2264   Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
2265                                         DirtyCardQueue::byte_offset_of_index()));
2266   Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
2267                                    DirtyCardQueue::byte_offset_of_buf()));
2268 
2269   BarrierSet* bs = Universe::heap()->barrier_set();
2270   CardTableModRefBS* ctbs = barrier_set_cast<CardTableModRefBS>(bs);
2271   CardTable* ct = ctbs->card_table();
2272   Label done;
2273   Label runtime;
2274 
2275   // Does store cross heap regions?
2276 
2277   eor(tmp1, store_addr, new_val);
2278 #ifdef AARCH64
2279   logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
2280   cbz(tmp1, done);
2281 #else
2282   movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes));
2283   b(done, eq);
2284 #endif
2285 
2286   // crosses regions, storing NULL?
2287 
2288   cbz(new_val, done);
2289 
2290   // storing region crossing non-NULL, is card already dirty?
2291   const Register card_addr = tmp1;
2292   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
2293 
2294   mov_address(tmp2, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference);
2295   add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift));
2296 
2297   ldrb(tmp2, Address(card_addr));
2298   cmp(tmp2, (int)G1CardTable::g1_young_card_val());
2299   b(done, eq);
2300 
2301   membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
2302 
2303   assert(CardTable::dirty_card_val() == 0, "adjust this code");
2304   ldrb(tmp2, Address(card_addr));
2305   cbz(tmp2, done);
2306 
2307   // storing a region crossing, non-NULL oop, card is clean.
2308   // dirty card and log.
2309 
2310   strb(zero_register(tmp2), Address(card_addr));
2311 
2312   ldr(tmp2, queue_index);
2313   ldr(tmp3, buffer);
2314 
2315   subs(tmp2, tmp2, wordSize);
2316   b(runtime, lt); // go to runtime if now negative
2317 
2318   str(tmp2, queue_index);
2319 
2320   str(card_addr, Address(tmp3, tmp2));
2321   b(done);
2322 
2323   bind(runtime);
2324 
2325   if (card_addr != R0) {
2326     mov(R0, card_addr);
2327   }
2328   mov(R1, Rthread);
2329   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1);
2330 
2331   bind(done);
2332 }
2333 
2334 #endif // INCLUDE_ALL_GCS
2335 
2336 //////////////////////////////////////////////////////////////////////////////////
2337 
2338 #ifdef AARCH64
2339 
2340 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
2341   switch (size_in_bytes) {
2342     case  8: ldr(dst, src); break;
2343     case  4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
2344     case  2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
2345     case  1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
2346     default: ShouldNotReachHere();
2347   }
2348 }
2349 
2350 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2351   switch (size_in_bytes) {
2352     case  8: str(src, dst);    break;
2353     case  4: str_32(src, dst); break;
2354     case  2: strh(src, dst);   break;
2355     case  1: strb(src, dst);   break;
2356     default: ShouldNotReachHere();
2357   }
2358 }
2359 
2360 #else
2361 
2362 void MacroAssembler::load_sized_value(Register dst, Address src,
2363                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
2364   switch (size_in_bytes) {
2365     case  4: ldr(dst, src, cond); break;
2366     case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
2367     case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
2368     default: ShouldNotReachHere();
2369   }
2370 }
2371 
2372 
2373 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
2374   switch (size_in_bytes) {
2375     case  4: str(src, dst, cond); break;
2376     case  2: strh(src, dst, cond);   break;
2377     case  1: strb(src, dst, cond);   break;
2378     default: ShouldNotReachHere();
2379   }
2380 }
2381 #endif // AARCH64
2382 
2383 // Look up the method for a megamorphic invokeinterface call.
2384 // The target method is determined by <Rinterf, Rindex>.
2385 // The receiver klass is in Rklass.
2386 // On success, the result will be in method_result, and execution falls through.
2387 // On failure, execution transfers to the given label.
2388 void MacroAssembler::lookup_interface_method(Register Rklass,
2389                                              Register Rintf,
2390                                              RegisterOrConstant itable_index,
2391                                              Register method_result,
2392                                              Register Rscan,
2393                                              Register Rtmp,
2394                                              Label& L_no_such_interface) {
2395 
2396   assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
2397 
2398   const int entry_size = itableOffsetEntry::size() * HeapWordSize;
2399   assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience");
2400 
2401   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2402   const int base = in_bytes(Klass::vtable_start_offset());
2403   const int scale = exact_log2(vtableEntry::size_in_bytes());
2404   ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
2405   add(Rscan, Rklass, base);
2406   add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
2407 
2408   // Search through the itable for an interface equal to incoming Rintf
2409   // itable looks like [intface][offset][intface][offset][intface][offset]
2410 
2411   Label loop;
2412   bind(loop);
2413   ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
2414 #ifdef AARCH64
2415   Label found;
2416   cmp(Rtmp, Rintf);
2417   b(found, eq);
2418   cbnz(Rtmp, loop);
2419 #else
2420   cmp(Rtmp, Rintf);  // set ZF and CF if interface is found
2421   cmn(Rtmp, 0, ne);  // check if tmp == 0 and clear CF if it is
2422   b(loop, ne);
2423 #endif // AARCH64
2424 
2425 #ifdef AARCH64
2426   b(L_no_such_interface);
2427   bind(found);
2428 #else
2429   // CF == 0 means we reached the end of itable without finding icklass
2430   b(L_no_such_interface, cc);
2431 #endif // !AARCH64
2432 
2433   if (method_result != noreg) {
2434     // Interface found at previous position of Rscan, now load the method
2435     ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size));
2436     if (itable_index.is_register()) {
2437       add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
2438       assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
2439       assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
2440       ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
2441     } else {
2442       int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
2443                           itableMethodEntry::method_offset_in_bytes();
2444       add_slow(method_result, Rklass, method_offset);
2445       ldr(method_result, Address(method_result, Rtmp));
2446     }
2447   }
2448 }
2449 
2450 #ifdef COMPILER2
2451 // TODO: 8 bytes at a time? pre-fetch?
2452 // Compare char[] arrays aligned to 4 bytes.
2453 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
2454                                         Register limit, Register result,
2455                                       Register chr1, Register chr2, Label& Ldone) {
2456   Label Lvector, Lloop;
2457 
2458   // Note: limit contains number of bytes (2*char_elements) != 0.
2459   tst(limit, 0x2); // trailing character ?
2460   b(Lvector, eq);
2461 
2462   // compare the trailing char
2463   sub(limit, limit, sizeof(jchar));
2464   ldrh(chr1, Address(ary1, limit));
2465   ldrh(chr2, Address(ary2, limit));
2466   cmp(chr1, chr2);
2467   mov(result, 0, ne);     // not equal
2468   b(Ldone, ne);
2469 
2470   // only one char ?
2471   tst(limit, limit);
2472   mov(result, 1, eq);
2473   b(Ldone, eq);
2474 
2475   // word by word compare, dont't need alignment check
2476   bind(Lvector);
2477 
2478   // Shift ary1 and ary2 to the end of the arrays, negate limit
2479   add(ary1, limit, ary1);
2480   add(ary2, limit, ary2);
2481   neg(limit, limit);
2482 
2483   bind(Lloop);
2484   ldr_u32(chr1, Address(ary1, limit));
2485   ldr_u32(chr2, Address(ary2, limit));
2486   cmp_32(chr1, chr2);
2487   mov(result, 0, ne);     // not equal
2488   b(Ldone, ne);
2489   adds(limit, limit, 2*sizeof(jchar));
2490   b(Lloop, ne);
2491 
2492   // Caller should set it:
2493   // mov(result_reg, 1);  //equal
2494 }
2495 #endif
2496 
2497 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
2498   mov_slow(tmpreg1, counter_addr);
2499   ldr_s32(tmpreg2, tmpreg1);
2500   add_32(tmpreg2, tmpreg2, 1);
2501   str_32(tmpreg2, tmpreg1);
2502 }
2503 
2504 void MacroAssembler::floating_cmp(Register dst) {
2505 #ifdef AARCH64
2506   NOT_TESTED();
2507   cset(dst, gt);            // 1 if '>', else 0
2508   csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
2509 #else
2510   vmrs(dst, FPSCR);
2511   orr(dst, dst, 0x08000000);
2512   eor(dst, dst, AsmOperand(dst, lsl, 3));
2513   mov(dst, AsmOperand(dst, asr, 30));
2514 #endif
2515 }
2516 
2517 void MacroAssembler::restore_default_fp_mode() {
2518 #ifdef AARCH64
2519   msr(SysReg_FPCR, ZR);
2520 #else
2521 #ifndef __SOFTFP__
2522   // Round to Near mode, IEEE compatible, masked exceptions
2523   mov(Rtemp, 0);
2524   vmsr(FPSCR, Rtemp);
2525 #endif // !__SOFTFP__
2526 #endif // AARCH64
2527 }
2528 
2529 #ifndef AARCH64
2530 // 24-bit word range == 26-bit byte range
2531 bool check26(int offset) {
2532   // this could be simplified, but it mimics encoding and decoding
2533   // an actual branch insrtuction
2534   int off1 = offset << 6 >> 8;
2535   int encoded = off1 & ((1<<24)-1);
2536   int decoded = encoded << 8 >> 6;
2537   return offset == decoded;
2538 }
2539 #endif // !AARCH64
2540 
2541 // Perform some slight adjustments so the default 32MB code cache
2542 // is fully reachable.
2543 static inline address first_cache_address() {
2544   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
2545 }
2546 static inline address last_cache_address() {
2547   return CodeCache::high_bound() - Assembler::InstructionSize;
2548 }
2549 
2550 #ifdef AARCH64
2551 // Can we reach target using ADRP?
2552 bool MacroAssembler::page_reachable_from_cache(address target) {
2553   intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
2554   intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
2555   intptr_t addr = (intptr_t)target & ~0xfff;
2556 
2557   intptr_t loffset = addr - cl;
2558   intptr_t hoffset = addr - ch;
2559   return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
2560 }
2561 #endif
2562 
2563 // Can we reach target using unconditional branch or call from anywhere
2564 // in the code cache (because code can be relocated)?
2565 bool MacroAssembler::_reachable_from_cache(address target) {
2566 #ifdef __thumb__
2567   if ((1 & (intptr_t)target) != 0) {
2568     // Return false to avoid 'b' if we need switching to THUMB mode.
2569     return false;
2570   }
2571 #endif
2572 
2573   address cl = first_cache_address();
2574   address ch = last_cache_address();
2575 
2576   if (ForceUnreachable) {
2577     // Only addresses from CodeCache can be treated as reachable.
2578     if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
2579       return false;
2580     }
2581   }
2582 
2583   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
2584   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
2585 
2586 #ifdef AARCH64
2587   return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
2588 #else
2589   return check26(loffset - 8) && check26(hoffset - 8);
2590 #endif
2591 }
2592 
2593 bool MacroAssembler::reachable_from_cache(address target) {
2594   assert(CodeCache::contains(pc()), "not supported");
2595   return _reachable_from_cache(target);
2596 }
2597 
2598 // Can we reach the entire code cache from anywhere else in the code cache?
2599 bool MacroAssembler::_cache_fully_reachable() {
2600   address cl = first_cache_address();
2601   address ch = last_cache_address();
2602   return _reachable_from_cache(cl) && _reachable_from_cache(ch);
2603 }
2604 
2605 bool MacroAssembler::cache_fully_reachable() {
2606   assert(CodeCache::contains(pc()), "not supported");
2607   return _cache_fully_reachable();
2608 }
2609 
2610 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2611   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2612   if (reachable_from_cache(target)) {
2613     relocate(rtype);
2614     b(target NOT_AARCH64_ARG(cond));
2615     return;
2616   }
2617 
2618   // Note: relocate is not needed for the code below,
2619   // encoding targets in absolute format.
2620   if (ignore_non_patchable_relocations()) {
2621     rtype = relocInfo::none;
2622   }
2623 
2624 #ifdef AARCH64
2625   assert (scratch != noreg, "should be specified");
2626   InlinedAddress address_literal(target, rtype);
2627   ldr_literal(scratch, address_literal);
2628   br(scratch);
2629   int off = offset();
2630   bind_literal(address_literal);
2631 #ifdef COMPILER2
2632   if (offset() - off == wordSize) {
2633     // no padding, so insert nop for worst-case sizing
2634     nop();
2635   }
2636 #endif
2637 #else
2638   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
2639     // Note: this version cannot be (atomically) patched
2640     mov_slow(scratch, (intptr_t)target, cond);
2641     bx(scratch, cond);
2642   } else {
2643     Label skip;
2644     InlinedAddress address_literal(target);
2645     if (cond != al) {
2646       b(skip, inverse(cond));
2647     }
2648     relocate(rtype);
2649     ldr_literal(PC, address_literal);
2650     bind_literal(address_literal);
2651     bind(skip);
2652   }
2653 #endif // AARCH64
2654 }
2655 
2656 // Similar to jump except that:
2657 // - near calls are valid only if any destination in the cache is near
2658 // - no movt/movw (not atomically patchable)
2659 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2660   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2661   if (cache_fully_reachable()) {
2662     // Note: this assumes that all possible targets (the initial one
2663     // and the addressed patched to) are all in the code cache.
2664     assert(CodeCache::contains(target), "target might be too far");
2665     relocate(rtype);
2666     b(target NOT_AARCH64_ARG(cond));
2667     return;
2668   }
2669 
2670   // Discard the relocation information if not needed for CacheCompiledCode
2671   // since the next encodings are all in absolute format.
2672   if (ignore_non_patchable_relocations()) {
2673     rtype = relocInfo::none;
2674   }
2675 
2676 #ifdef AARCH64
2677   assert (scratch != noreg, "should be specified");
2678   InlinedAddress address_literal(target);
2679   relocate(rtype);
2680   ldr_literal(scratch, address_literal);
2681   br(scratch);
2682   int off = offset();
2683   bind_literal(address_literal);
2684 #ifdef COMPILER2
2685   if (offset() - off == wordSize) {
2686     // no padding, so insert nop for worst-case sizing
2687     nop();
2688   }
2689 #endif
2690 #else
2691   {
2692     Label skip;
2693     InlinedAddress address_literal(target);
2694     if (cond != al) {
2695       b(skip, inverse(cond));
2696     }
2697     relocate(rtype);
2698     ldr_literal(PC, address_literal);
2699     bind_literal(address_literal);
2700     bind(skip);
2701   }
2702 #endif // AARCH64
2703 }
2704 
2705 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
2706   Register scratch = LR;
2707   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
2708   if (reachable_from_cache(target)) {
2709     relocate(rspec);
2710     bl(target NOT_AARCH64_ARG(cond));
2711     return;
2712   }
2713 
2714   // Note: relocate is not needed for the code below,
2715   // encoding targets in absolute format.
2716   if (ignore_non_patchable_relocations()) {
2717     // This assumes the information was needed only for relocating the code.
2718     rspec = RelocationHolder::none;
2719   }
2720 
2721 #ifndef AARCH64
2722   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
2723     // Note: this version cannot be (atomically) patched
2724     mov_slow(scratch, (intptr_t)target, cond);
2725     blx(scratch, cond);
2726     return;
2727   }
2728 #endif
2729 
2730   {
2731     Label ret_addr;
2732 #ifndef AARCH64
2733     if (cond != al) {
2734       b(ret_addr, inverse(cond));
2735     }
2736 #endif
2737 
2738 
2739 #ifdef AARCH64
2740     // TODO-AARCH64: make more optimal implementation
2741     // [ Keep in sync with MacroAssembler::call_size ]
2742     assert(rspec.type() == relocInfo::none, "call reloc not implemented");
2743     mov_slow(scratch, target);
2744     blr(scratch);
2745 #else
2746     InlinedAddress address_literal(target);
2747     relocate(rspec);
2748     adr(LR, ret_addr);
2749     ldr_literal(PC, address_literal);
2750 
2751     bind_literal(address_literal);
2752     bind(ret_addr);
2753 #endif
2754   }
2755 }
2756 
2757 #if defined(AARCH64) && defined(COMPILER2)
2758 int MacroAssembler::call_size(address target, bool far, bool patchable) {
2759   // FIXME: mov_slow is variable-length
2760   if (!far) return 1; // bl
2761   if (patchable) return 2;  // ldr; blr
2762   return instr_count_for_mov_slow((intptr_t)target) + 1;
2763 }
2764 #endif
2765 
2766 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
2767   assert(rspec.type() == relocInfo::static_call_type ||
2768          rspec.type() == relocInfo::none ||
2769          rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
2770 
2771   // Always generate the relocation information, needed for patching
2772   relocate(rspec); // used by NativeCall::is_call_before()
2773   if (cache_fully_reachable()) {
2774     // Note: this assumes that all possible targets (the initial one
2775     // and the addresses patched to) are all in the code cache.
2776     assert(CodeCache::contains(target), "target might be too far");
2777     bl(target);
2778   } else {
2779 #if defined(AARCH64) && defined(COMPILER2)
2780     if (c2) {
2781       // return address needs to match call_size().
2782       // no need to trash Rtemp
2783       int off = offset();
2784       Label skip_literal;
2785       InlinedAddress address_literal(target);
2786       ldr_literal(LR, address_literal);
2787       blr(LR);
2788       int ret_addr_offset = offset();
2789       assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
2790       b(skip_literal);
2791       int off2 = offset();
2792       bind_literal(address_literal);
2793       if (offset() - off2 == wordSize) {
2794         // no padding, so insert nop for worst-case sizing
2795         nop();
2796       }
2797       bind(skip_literal);
2798       return ret_addr_offset;
2799     }
2800 #endif
2801     Label ret_addr;
2802     InlinedAddress address_literal(target);
2803 #ifdef AARCH64
2804     ldr_literal(Rtemp, address_literal);
2805     adr(LR, ret_addr);
2806     br(Rtemp);
2807 #else
2808     adr(LR, ret_addr);
2809     ldr_literal(PC, address_literal);
2810 #endif
2811     bind_literal(address_literal);
2812     bind(ret_addr);
2813   }
2814   return offset();
2815 }
2816 
2817 // ((OopHandle)result).resolve();
2818 void MacroAssembler::resolve_oop_handle(Register result) {
2819   // OopHandle::resolve is an indirection.
2820   ldr(result, Address(result, 0));
2821 }
2822 
2823 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
2824   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2825   ldr(tmp, Address(method, Method::const_offset()));
2826   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
2827   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
2828   ldr(mirror, Address(tmp, mirror_offset));
2829   resolve_oop_handle(mirror);
2830 }
2831 
2832 
2833 ///////////////////////////////////////////////////////////////////////////////
2834 
2835 // Compressed pointers
2836 
2837 #ifdef AARCH64
2838 
2839 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
2840   if (UseCompressedClassPointers) {
2841     ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2842     decode_klass_not_null(dst_klass);
2843   } else {
2844     ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2845   }
2846 }
2847 
2848 #else
2849 
2850 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
2851   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
2852 }
2853 
2854 #endif // AARCH64
2855 
2856 // Blows src_klass.
2857 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
2858 #ifdef AARCH64
2859   if (UseCompressedClassPointers) {
2860     assert(src_klass != dst_oop, "not enough registers");
2861     encode_klass_not_null(src_klass);
2862     str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2863     return;
2864   }
2865 #endif // AARCH64
2866   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2867 }
2868 
2869 #ifdef AARCH64
2870 
2871 void MacroAssembler::store_klass_gap(Register dst) {
2872   if (UseCompressedClassPointers) {
2873     str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2874   }
2875 }
2876 
2877 #endif // AARCH64
2878 
2879 
2880 void MacroAssembler::load_heap_oop(Register dst, Address src) {
2881 #ifdef AARCH64
2882   if (UseCompressedOops) {
2883     ldr_w(dst, src);
2884     decode_heap_oop(dst);
2885     return;
2886   }
2887 #endif // AARCH64
2888   ldr(dst, src);
2889 }
2890 
2891 // Blows src and flags.
2892 void MacroAssembler::store_heap_oop(Register src, Address dst) {
2893 #ifdef AARCH64
2894   if (UseCompressedOops) {
2895     assert(!dst.uses(src), "not enough registers");
2896     encode_heap_oop(src);
2897     str_w(src, dst);
2898     return;
2899   }
2900 #endif // AARCH64
2901   str(src, dst);
2902 }
2903 
2904 void MacroAssembler::store_heap_oop_null(Register src, Address dst) {
2905 #ifdef AARCH64
2906   if (UseCompressedOops) {
2907     str_w(src, dst);
2908     return;
2909   }
2910 #endif // AARCH64
2911   str(src, dst);
2912 }
2913 
2914 
2915 #ifdef AARCH64
2916 
2917 // Algorithm must match oop.inline.hpp encode_heap_oop.
2918 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
2919   // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
2920   // Update it at modifications.
2921   assert (UseCompressedOops, "must be compressed");
2922   assert (Universe::heap() != NULL, "java heap should be initialized");
2923 #ifdef ASSERT
2924   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2925 #endif
2926   verify_oop(src);
2927   if (Universe::narrow_oop_base() == NULL) {
2928     if (Universe::narrow_oop_shift() != 0) {
2929       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2930       _lsr(dst, src, Universe::narrow_oop_shift());
2931     } else if (dst != src) {
2932       mov(dst, src);
2933     }
2934   } else {
2935     tst(src, src);
2936     csel(dst, Rheap_base, src, eq);
2937     sub(dst, dst, Rheap_base);
2938     if (Universe::narrow_oop_shift() != 0) {
2939       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2940       _lsr(dst, dst, Universe::narrow_oop_shift());
2941     }
2942   }
2943 }
2944 
2945 // Same algorithm as oop.inline.hpp decode_heap_oop.
2946 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
2947 #ifdef ASSERT
2948   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2949 #endif
2950   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2951   if (Universe::narrow_oop_base() != NULL) {
2952     tst(src, src);
2953     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2954     csel(dst, dst, ZR, ne);
2955   } else {
2956     _lsl(dst, src, Universe::narrow_oop_shift());
2957   }
2958   verify_oop(dst);
2959 }
2960 
2961 #ifdef COMPILER2
2962 // Algorithm must match oop.inline.hpp encode_heap_oop.
2963 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule
2964 // must be changed.
2965 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
2966   assert (UseCompressedOops, "must be compressed");
2967   assert (Universe::heap() != NULL, "java heap should be initialized");
2968 #ifdef ASSERT
2969   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2970 #endif
2971   verify_oop(src);
2972   if (Universe::narrow_oop_base() == NULL) {
2973     if (Universe::narrow_oop_shift() != 0) {
2974       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2975       _lsr(dst, src, Universe::narrow_oop_shift());
2976     } else if (dst != src) {
2977           mov(dst, src);
2978     }
2979   } else {
2980     sub(dst, src, Rheap_base);
2981     if (Universe::narrow_oop_shift() != 0) {
2982       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2983       _lsr(dst, dst, Universe::narrow_oop_shift());
2984     }
2985   }
2986 }
2987 
2988 // Same algorithm as oops.inline.hpp decode_heap_oop.
2989 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule
2990 // must be changed.
2991 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
2992 #ifdef ASSERT
2993   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2994 #endif
2995   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2996   if (Universe::narrow_oop_base() != NULL) {
2997     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2998   } else {
2999     _lsl(dst, src, Universe::narrow_oop_shift());
3000   }
3001   verify_oop(dst);
3002 }
3003 
3004 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
3005   assert(UseCompressedClassPointers, "should only be used for compressed header");
3006   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
3007   int klass_index = oop_recorder()->find_index(k);
3008   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
3009 
3010   // Relocation with special format (see relocInfo_arm.hpp).
3011   relocate(rspec);
3012   narrowKlass encoded_k = Klass::encode_klass(k);
3013   movz(dst, encoded_k & 0xffff, 0);
3014   movk(dst, (encoded_k >> 16) & 0xffff, 16);
3015 }
3016 
3017 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
3018   assert(UseCompressedOops, "should only be used for compressed header");
3019   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
3020   int oop_index = oop_recorder()->find_index(obj);
3021   RelocationHolder rspec = oop_Relocation::spec(oop_index);
3022 
3023   relocate(rspec);
3024   movz(dst, 0xffff, 0);
3025   movk(dst, 0xffff, 16);
3026 }
3027 
3028 #endif // COMPILER2
3029 // Must preserve condition codes, or C2 encodeKlass_not_null rule
3030 // must be changed.
3031 void MacroAssembler::encode_klass_not_null(Register r) {
3032   if (Universe::narrow_klass_base() != NULL) {
3033     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
3034     assert(r != Rheap_base, "Encoding a klass in Rheap_base");
3035     mov_slow(Rheap_base, Universe::narrow_klass_base());
3036     sub(r, r, Rheap_base);
3037   }
3038   if (Universe::narrow_klass_shift() != 0) {
3039     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3040     _lsr(r, r, Universe::narrow_klass_shift());
3041   }
3042   if (Universe::narrow_klass_base() != NULL) {
3043     reinit_heapbase();
3044   }
3045 }
3046 
3047 // Must preserve condition codes, or C2 encodeKlass_not_null rule
3048 // must be changed.
3049 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3050   if (dst == src) {
3051     encode_klass_not_null(src);
3052     return;
3053   }
3054   if (Universe::narrow_klass_base() != NULL) {
3055     mov_slow(dst, (int64_t)Universe::narrow_klass_base());
3056     sub(dst, src, dst);
3057     if (Universe::narrow_klass_shift() != 0) {
3058       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3059       _lsr(dst, dst, Universe::narrow_klass_shift());
3060     }
3061   } else {
3062     if (Universe::narrow_klass_shift() != 0) {
3063       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3064       _lsr(dst, src, Universe::narrow_klass_shift());
3065     } else {
3066       mov(dst, src);
3067     }
3068   }
3069 }
3070 
3071 // Function instr_count_for_decode_klass_not_null() counts the instructions
3072 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3073 // when (Universe::heap() != NULL).  Hence, if the instructions they
3074 // generate change, then this method needs to be updated.
3075 int MacroAssembler::instr_count_for_decode_klass_not_null() {
3076   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3077   assert(Universe::heap() != NULL, "java heap should be initialized");
3078   if (Universe::narrow_klass_base() != NULL) {
3079     return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
3080       1 +                                                                 // add
3081       instr_count_for_mov_slow(Universe::narrow_ptrs_base());   // reinit_heapbase() = mov_slow
3082   } else {
3083     if (Universe::narrow_klass_shift() != 0) {
3084       return 1;
3085     }
3086   }
3087   return 0;
3088 }
3089 
3090 // Must preserve condition codes, or C2 decodeKlass_not_null rule
3091 // must be changed.
3092 void MacroAssembler::decode_klass_not_null(Register r) {
3093   int off = offset();
3094   assert(UseCompressedClassPointers, "should only be used for compressed headers");
3095   assert(Universe::heap() != NULL, "java heap should be initialized");
3096   assert(r != Rheap_base, "Decoding a klass in Rheap_base");
3097   // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
3098   // Also do not verify_oop as this is called by verify_oop.
3099   if (Universe::narrow_klass_base() != NULL) {
3100     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
3101     mov_slow(Rheap_base, Universe::narrow_klass_base());
3102     add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
3103     reinit_heapbase();
3104   } else {
3105     if (Universe::narrow_klass_shift() != 0) {
3106       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3107       _lsl(r, r, Universe::narrow_klass_shift());
3108     }
3109   }
3110   assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
3111 }
3112 
3113 // Must preserve condition codes, or C2 decodeKlass_not_null rule
3114 // must be changed.
3115 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3116   if (src == dst) {
3117     decode_klass_not_null(src);
3118     return;
3119   }
3120 
3121   assert(UseCompressedClassPointers, "should only be used for compressed headers");
3122   assert(Universe::heap() != NULL, "java heap should be initialized");
3123   assert(src != Rheap_base, "Decoding a klass in Rheap_base");
3124   assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
3125   // Also do not verify_oop as this is called by verify_oop.
3126   if (Universe::narrow_klass_base() != NULL) {
3127     mov_slow(dst, Universe::narrow_klass_base());
3128     add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
3129   } else {
3130     _lsl(dst, src, Universe::narrow_klass_shift());
3131   }
3132 }
3133 
3134 
3135 void MacroAssembler::reinit_heapbase() {
3136   if (UseCompressedOops || UseCompressedClassPointers) {
3137     if (Universe::heap() != NULL) {
3138       mov_slow(Rheap_base, Universe::narrow_ptrs_base());
3139     } else {
3140       ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
3141     }
3142   }
3143 }
3144 
3145 #ifdef ASSERT
3146 void MacroAssembler::verify_heapbase(const char* msg) {
3147   // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
3148   // Update it at modifications.
3149   assert (UseCompressedOops, "should be compressed");
3150   assert (Universe::heap() != NULL, "java heap should be initialized");
3151   if (CheckCompressedOops) {
3152     Label ok;
3153     str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
3154     raw_push(Rtemp, ZR);
3155     mrs(Rtemp, Assembler::SysReg_NZCV);
3156     str(Rtemp, Address(SP, 1 * wordSize));
3157     mov_slow(Rtemp, Universe::narrow_ptrs_base());
3158     cmp(Rheap_base, Rtemp);
3159     b(ok, eq);
3160     stop(msg);
3161     bind(ok);
3162     ldr(Rtemp, Address(SP, 1 * wordSize));
3163     msr(Assembler::SysReg_NZCV, Rtemp);
3164     raw_pop(Rtemp, ZR);
3165     str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
3166   }
3167 }
3168 #endif // ASSERT
3169 
3170 #endif // AARCH64
3171 
3172 #ifdef COMPILER2
3173 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
3174 {
3175   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3176 
3177   Register Rmark      = Rscratch2;
3178 
3179   assert(Roop != Rscratch, "");
3180   assert(Roop != Rmark, "");
3181   assert(Rbox != Rscratch, "");
3182   assert(Rbox != Rmark, "");
3183 
3184   Label fast_lock, done;
3185 
3186   if (UseBiasedLocking && !UseOptoBiasInlining) {
3187     Label failed;
3188 #ifdef AARCH64
3189     biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed);
3190 #else
3191     biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
3192 #endif
3193     bind(failed);
3194   }
3195 
3196   ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
3197   tst(Rmark, markOopDesc::unlocked_value);
3198   b(fast_lock, ne);
3199 
3200   // Check for recursive lock
3201   // See comments in InterpreterMacroAssembler::lock_object for
3202   // explanations on the fast recursive locking check.
3203 #ifdef AARCH64
3204   intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
3205   Assembler::LogicalImmediate imm(mask, false);
3206   mov(Rscratch, SP);
3207   sub(Rscratch, Rmark, Rscratch);
3208   ands(Rscratch, Rscratch, imm);
3209   b(done, ne); // exit with failure
3210   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero
3211   b(done);
3212 
3213 #else
3214   // -1- test low 2 bits
3215   movs(Rscratch, AsmOperand(Rmark, lsl, 30));
3216   // -2- test (hdr - SP) if the low two bits are 0
3217   sub(Rscratch, Rmark, SP, eq);
3218   movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
3219   // If still 'eq' then recursive locking OK
3220   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero
3221   b(done);
3222 #endif
3223 
3224   bind(fast_lock);
3225   str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3226 
3227   bool allow_fallthrough_on_failure = true;
3228   bool one_shot = true;
3229   cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3230 
3231   bind(done);
3232 
3233 }
3234 
3235 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2  AARCH64_ONLY_ARG(Register Rscratch3))
3236 {
3237   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3238 
3239   Register Rmark      = Rscratch2;
3240 
3241   assert(Roop != Rscratch, "");
3242   assert(Roop != Rmark, "");
3243   assert(Rbox != Rscratch, "");
3244   assert(Rbox != Rmark, "");
3245 
3246   Label done;
3247 
3248   if (UseBiasedLocking && !UseOptoBiasInlining) {
3249     biased_locking_exit(Roop, Rscratch, done);
3250   }
3251 
3252   ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3253   // If hdr is NULL, we've got recursive locking and there's nothing more to do
3254   cmp(Rmark, 0);
3255   b(done, eq);
3256 
3257   // Restore the object header
3258   bool allow_fallthrough_on_failure = true;
3259   bool one_shot = true;
3260   cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3261 
3262   bind(done);
3263 
3264 }
3265 #endif // COMPILER2