1 /*
   2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "ci/ciEnv.hpp"
  30 #include "code/nativeInst.hpp"
  31 #include "compiler/disassembler.hpp"
  32 #include "gc/shared/cardTable.hpp"
  33 #include "gc/shared/cardTableBarrierSet.hpp"
  34 #include "gc/shared/collectedHeap.inline.hpp"
  35 #include "interpreter/interpreter.hpp"
  36 #include "memory/resourceArea.hpp"
  37 #include "oops/klass.inline.hpp"
  38 #include "prims/methodHandles.hpp"
  39 #include "runtime/biasedLocking.hpp"
  40 #include "runtime/interfaceSupport.inline.hpp"
  41 #include "runtime/objectMonitor.hpp"
  42 #include "runtime/os.hpp"
  43 #include "runtime/sharedRuntime.hpp"
  44 #include "runtime/stubRoutines.hpp"
  45 #include "utilities/macros.hpp"
  46 #if INCLUDE_ALL_GCS
  47 #include "gc/g1/g1BarrierSet.hpp"
  48 #include "gc/g1/g1CardTable.hpp"
  49 #include "gc/g1/heapRegion.hpp"
  50 #endif
  51 
  52 // Implementation of AddressLiteral
  53 
  54 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
  55   switch (rtype) {
  56   case relocInfo::oop_type:
  57     // Oops are a special case. Normally they would be their own section
  58     // but in cases like icBuffer they are literals in the code stream that
  59     // we don't have a section for. We use none so that we get a literal address
  60     // which is always patchable.
  61     break;
  62   case relocInfo::external_word_type:
  63     _rspec = external_word_Relocation::spec(_target);
  64     break;
  65   case relocInfo::internal_word_type:
  66     _rspec = internal_word_Relocation::spec(_target);
  67     break;
  68   case relocInfo::opt_virtual_call_type:
  69     _rspec = opt_virtual_call_Relocation::spec();
  70     break;
  71   case relocInfo::static_call_type:
  72     _rspec = static_call_Relocation::spec();
  73     break;
  74   case relocInfo::runtime_call_type:
  75     _rspec = runtime_call_Relocation::spec();
  76     break;
  77   case relocInfo::poll_type:
  78   case relocInfo::poll_return_type:
  79     _rspec = Relocation::spec_simple(rtype);
  80     break;
  81   case relocInfo::none:
  82     break;
  83   default:
  84     ShouldNotReachHere();
  85     break;
  86   }
  87 }
  88 
  89 // Initially added to the Assembler interface as a pure virtual:
  90 //   RegisterConstant delayed_value(..)
  91 // for:
  92 //   6812678 macro assembler needs delayed binding of a few constants (for 6655638)
  93 // this was subsequently modified to its present name and return type
  94 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  95                                                       Register tmp,
  96                                                       int offset) {
  97   ShouldNotReachHere();
  98   return RegisterOrConstant(-1);
  99 }
 100 
 101 
 102 #ifdef AARCH64
 103 // Note: ARM32 version is OS dependent
 104 void MacroAssembler::breakpoint(AsmCondition cond) {
 105   if (cond == al) {
 106     brk();
 107   } else {
 108     Label L;
 109     b(L, inverse(cond));
 110     brk();
 111     bind(L);
 112   }
 113 }
 114 #endif // AARCH64
 115 
 116 
 117 // virtual method calling
 118 void MacroAssembler::lookup_virtual_method(Register recv_klass,
 119                                            Register vtable_index,
 120                                            Register method_result) {
 121   const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
 122   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
 123   add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
 124   ldr(method_result, Address(recv_klass, base_offset));
 125 }
 126 
 127 
 128 // Simplified, combined version, good for typical uses.
 129 // Falls through on failure.
 130 void MacroAssembler::check_klass_subtype(Register sub_klass,
 131                                          Register super_klass,
 132                                          Register temp_reg,
 133                                          Register temp_reg2,
 134                                          Register temp_reg3,
 135                                          Label& L_success) {
 136   Label L_failure;
 137   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
 138   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
 139   bind(L_failure);
 140 };
 141 
 142 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
 143                                                    Register super_klass,
 144                                                    Register temp_reg,
 145                                                    Register temp_reg2,
 146                                                    Label* L_success,
 147                                                    Label* L_failure,
 148                                                    Label* L_slow_path) {
 149 
 150   assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
 151   const Register super_check_offset = temp_reg2;
 152 
 153   Label L_fallthrough;
 154   int label_nulls = 0;
 155   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 156   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 157   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
 158   assert(label_nulls <= 1, "at most one NULL in the batch");
 159 
 160   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 161   int sco_offset = in_bytes(Klass::super_check_offset_offset());
 162   Address super_check_offset_addr(super_klass, sco_offset);
 163 
 164   // If the pointers are equal, we are done (e.g., String[] elements).
 165   // This self-check enables sharing of secondary supertype arrays among
 166   // non-primary types such as array-of-interface.  Otherwise, each such
 167   // type would need its own customized SSA.
 168   // We move this check to the front of the fast path because many
 169   // type checks are in fact trivially successful in this manner,
 170   // so we get a nicely predicted branch right at the start of the check.
 171   cmp(sub_klass, super_klass);
 172   b(*L_success, eq);
 173 
 174   // Check the supertype display:
 175   ldr_u32(super_check_offset, super_check_offset_addr);
 176 
 177   Address super_check_addr(sub_klass, super_check_offset);
 178   ldr(temp_reg, super_check_addr);
 179   cmp(super_klass, temp_reg); // load displayed supertype
 180 
 181   // This check has worked decisively for primary supers.
 182   // Secondary supers are sought in the super_cache ('super_cache_addr').
 183   // (Secondary supers are interfaces and very deeply nested subtypes.)
 184   // This works in the same check above because of a tricky aliasing
 185   // between the super_cache and the primary super display elements.
 186   // (The 'super_check_addr' can address either, as the case requires.)
 187   // Note that the cache is updated below if it does not help us find
 188   // what we need immediately.
 189   // So if it was a primary super, we can just fail immediately.
 190   // Otherwise, it's the slow path for us (no success at this point).
 191 
 192   b(*L_success, eq);
 193   cmp_32(super_check_offset, sc_offset);
 194   if (L_failure == &L_fallthrough) {
 195     b(*L_slow_path, eq);
 196   } else {
 197     b(*L_failure, ne);
 198     if (L_slow_path != &L_fallthrough) {
 199       b(*L_slow_path);
 200     }
 201   }
 202 
 203   bind(L_fallthrough);
 204 }
 205 
 206 
 207 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
 208                                                    Register super_klass,
 209                                                    Register temp_reg,
 210                                                    Register temp2_reg,
 211                                                    Register temp3_reg,
 212                                                    Label* L_success,
 213                                                    Label* L_failure,
 214                                                    bool set_cond_codes) {
 215 #ifdef AARCH64
 216   NOT_IMPLEMENTED();
 217 #else
 218   // Note: if used by code that expects a register to be 0 on success,
 219   // this register must be temp_reg and set_cond_codes must be true
 220 
 221   Register saved_reg = noreg;
 222 
 223   // get additional tmp registers
 224   if (temp3_reg == noreg) {
 225     saved_reg = temp3_reg = LR;
 226     push(saved_reg);
 227   }
 228 
 229   assert(temp2_reg != noreg, "need all the temporary registers");
 230   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
 231 
 232   Register cmp_temp = temp_reg;
 233   Register scan_temp = temp3_reg;
 234   Register count_temp = temp2_reg;
 235 
 236   Label L_fallthrough;
 237   int label_nulls = 0;
 238   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 239   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 240   assert(label_nulls <= 1, "at most one NULL in the batch");
 241 
 242   // a couple of useful fields in sub_klass:
 243   int ss_offset = in_bytes(Klass::secondary_supers_offset());
 244   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 245   Address secondary_supers_addr(sub_klass, ss_offset);
 246   Address super_cache_addr(     sub_klass, sc_offset);
 247 
 248 #ifndef PRODUCT
 249   inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
 250 #endif
 251 
 252   // We will consult the secondary-super array.
 253   ldr(scan_temp, Address(sub_klass, ss_offset));
 254 
 255   assert(! UseCompressedOops, "search_key must be the compressed super_klass");
 256   // else search_key is the
 257   Register search_key = super_klass;
 258 
 259   // Load the array length.
 260   ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
 261   add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
 262 
 263   add(count_temp, count_temp, 1);
 264 
 265   Label L_loop, L_setnz_and_fail, L_fail;
 266 
 267   // Top of search loop
 268   bind(L_loop);
 269   // Notes:
 270   //  scan_temp starts at the array elements
 271   //  count_temp is 1+size
 272   subs(count_temp, count_temp, 1);
 273   if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
 274     // direct jump to L_failure if failed and no cleanup needed
 275     b(*L_failure, eq); // not found and
 276   } else {
 277     b(L_fail, eq); // not found in the array
 278   }
 279 
 280   // Load next super to check
 281   // In the array of super classes elements are pointer sized.
 282   int element_size = wordSize;
 283   ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
 284 
 285   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
 286   subs(cmp_temp, cmp_temp, search_key);
 287 
 288   // A miss means we are NOT a subtype and need to keep looping
 289   b(L_loop, ne);
 290 
 291   // Falling out the bottom means we found a hit; we ARE a subtype
 292 
 293   // Note: temp_reg/cmp_temp is already 0 and flag Z is set
 294 
 295   // Success.  Cache the super we found and proceed in triumph.
 296   str(super_klass, Address(sub_klass, sc_offset));
 297 
 298   if (saved_reg != noreg) {
 299     // Return success
 300     pop(saved_reg);
 301   }
 302 
 303   b(*L_success);
 304 
 305   bind(L_fail);
 306   // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
 307   if (set_cond_codes) {
 308     movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
 309   }
 310   if (saved_reg != noreg) {
 311     pop(saved_reg);
 312   }
 313   if (L_failure != &L_fallthrough) {
 314     b(*L_failure);
 315   }
 316 
 317   bind(L_fallthrough);
 318 #endif
 319 }
 320 
 321 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
 322 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
 323   assert_different_registers(params_base, params_count);
 324   add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
 325   return Address(tmp, -Interpreter::stackElementSize);
 326 }
 327 
 328 
 329 void MacroAssembler::align(int modulus) {
 330   while (offset() % modulus != 0) {
 331     nop();
 332   }
 333 }
 334 
 335 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
 336                                         Register last_java_fp,
 337                                         bool save_last_java_pc,
 338                                         Register tmp) {
 339   int pc_offset;
 340   if (last_java_fp != noreg) {
 341     // optional
 342     str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
 343     _fp_saved = true;
 344   } else {
 345     _fp_saved = false;
 346   }
 347   if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
 348 #ifdef AARCH64
 349     pc_offset = mov_pc_to(tmp);
 350     str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
 351 #else
 352     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
 353     pc_offset = offset() + VM_Version::stored_pc_adjustment();
 354 #endif
 355     _pc_saved = true;
 356   } else {
 357     _pc_saved = false;
 358     pc_offset = -1;
 359   }
 360   // According to comment in javaFrameAnchorm SP must be saved last, so that other
 361   // entries are valid when SP is set.
 362 
 363   // However, this is probably not a strong constrainst since for instance PC is
 364   // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
 365   // we now write the fields in the expected order but we have not added a StoreStore
 366   // barrier.
 367 
 368   // XXX: if the ordering is really important, PC should always be saved (without forgetting
 369   // to update oop_map offsets) and a StoreStore barrier might be needed.
 370 
 371   if (last_java_sp == noreg) {
 372     last_java_sp = SP; // always saved
 373   }
 374 #ifdef AARCH64
 375   if (last_java_sp == SP) {
 376     mov(tmp, SP);
 377     str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 378   } else {
 379     str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 380   }
 381 #else
 382   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 383 #endif
 384 
 385   return pc_offset; // for oopmaps
 386 }
 387 
 388 void MacroAssembler::reset_last_Java_frame(Register tmp) {
 389   const Register Rzero = zero_register(tmp);
 390   str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
 391   if (_fp_saved) {
 392     str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
 393   }
 394   if (_pc_saved) {
 395     str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
 396   }
 397 }
 398 
 399 
 400 // Implementation of call_VM versions
 401 
 402 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
 403   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 404   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
 405 
 406 #ifndef AARCH64
 407   // Safer to save R9 here since callers may have been written
 408   // assuming R9 survives. This is suboptimal but is not worth
 409   // optimizing for the few platforms where R9 is scratched.
 410   push(RegisterSet(R4) | R9ifScratched);
 411   mov(R4, SP);
 412   bic(SP, SP, StackAlignmentInBytes - 1);
 413 #endif // AARCH64
 414   call(entry_point, relocInfo::runtime_call_type);
 415 #ifndef AARCH64
 416   mov(SP, R4);
 417   pop(RegisterSet(R4) | R9ifScratched);
 418 #endif // AARCH64
 419 }
 420 
 421 
 422 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
 423   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 424   assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
 425 
 426   const Register tmp = Rtemp;
 427   assert_different_registers(oop_result, tmp);
 428 
 429   set_last_Java_frame(SP, FP, true, tmp);
 430 
 431 #ifdef ASSERT
 432   AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
 433 #endif // ASSERT
 434 
 435 #ifndef AARCH64
 436 #if R9_IS_SCRATCHED
 437   // Safer to save R9 here since callers may have been written
 438   // assuming R9 survives. This is suboptimal but is not worth
 439   // optimizing for the few platforms where R9 is scratched.
 440 
 441   // Note: cannot save R9 above the saved SP (some calls expect for
 442   // instance the Java stack top at the saved SP)
 443   // => once saved (with set_last_Java_frame), decrease SP before rounding to
 444   // ensure the slot at SP will be free for R9).
 445   sub(SP, SP, 4);
 446   bic(SP, SP, StackAlignmentInBytes - 1);
 447   str(R9, Address(SP, 0));
 448 #else
 449   bic(SP, SP, StackAlignmentInBytes - 1);
 450 #endif // R9_IS_SCRATCHED
 451 #endif
 452 
 453   mov(R0, Rthread);
 454   call(entry_point, relocInfo::runtime_call_type);
 455 
 456 #ifndef AARCH64
 457 #if R9_IS_SCRATCHED
 458   ldr(R9, Address(SP, 0));
 459 #endif
 460   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
 461 #endif
 462 
 463   reset_last_Java_frame(tmp);
 464 
 465   // C++ interp handles this in the interpreter
 466   check_and_handle_popframe();
 467   check_and_handle_earlyret();
 468 
 469   if (check_exceptions) {
 470     // check for pending exceptions
 471     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
 472 #ifdef AARCH64
 473     Label L;
 474     cbz(tmp, L);
 475     mov_pc_to(Rexception_pc);
 476     b(StubRoutines::forward_exception_entry());
 477     bind(L);
 478 #else
 479     cmp(tmp, 0);
 480     mov(Rexception_pc, PC, ne);
 481     b(StubRoutines::forward_exception_entry(), ne);
 482 #endif // AARCH64
 483   }
 484 
 485   // get oop result if there is one and reset the value in the thread
 486   if (oop_result->is_valid()) {
 487     get_vm_result(oop_result, tmp);
 488   }
 489 }
 490 
 491 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
 492   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 493 }
 494 
 495 
 496 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
 497   assert (arg_1 == R1, "fixed register for arg_1");
 498   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 499 }
 500 
 501 
 502 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 503   assert (arg_1 == R1, "fixed register for arg_1");
 504   assert (arg_2 == R2, "fixed register for arg_2");
 505   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 506 }
 507 
 508 
 509 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 510   assert (arg_1 == R1, "fixed register for arg_1");
 511   assert (arg_2 == R2, "fixed register for arg_2");
 512   assert (arg_3 == R3, "fixed register for arg_3");
 513   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 514 }
 515 
 516 
 517 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
 518   // Not used on ARM
 519   Unimplemented();
 520 }
 521 
 522 
 523 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
 524   // Not used on ARM
 525   Unimplemented();
 526 }
 527 
 528 
 529 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 530 // Not used on ARM
 531   Unimplemented();
 532 }
 533 
 534 
 535 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 536   // Not used on ARM
 537   Unimplemented();
 538 }
 539 
 540 // Raw call, without saving/restoring registers, exception handling, etc.
 541 // Mainly used from various stubs.
 542 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
 543   const Register tmp = Rtemp; // Rtemp free since scratched by call
 544   set_last_Java_frame(SP, FP, true, tmp);
 545 #if R9_IS_SCRATCHED
 546   if (save_R9_if_scratched) {
 547     // Note: Saving also R10 for alignment.
 548     push(RegisterSet(R9, R10));
 549   }
 550 #endif
 551   mov(R0, Rthread);
 552   call(entry_point, relocInfo::runtime_call_type);
 553 #if R9_IS_SCRATCHED
 554   if (save_R9_if_scratched) {
 555     pop(RegisterSet(R9, R10));
 556   }
 557 #endif
 558   reset_last_Java_frame(tmp);
 559 }
 560 
 561 void MacroAssembler::call_VM_leaf(address entry_point) {
 562   call_VM_leaf_helper(entry_point, 0);
 563 }
 564 
 565 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
 566   assert (arg_1 == R0, "fixed register for arg_1");
 567   call_VM_leaf_helper(entry_point, 1);
 568 }
 569 
 570 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
 571   assert (arg_1 == R0, "fixed register for arg_1");
 572   assert (arg_2 == R1, "fixed register for arg_2");
 573   call_VM_leaf_helper(entry_point, 2);
 574 }
 575 
 576 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
 577   assert (arg_1 == R0, "fixed register for arg_1");
 578   assert (arg_2 == R1, "fixed register for arg_2");
 579   assert (arg_3 == R2, "fixed register for arg_3");
 580   call_VM_leaf_helper(entry_point, 3);
 581 }
 582 
 583 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
 584   assert (arg_1 == R0, "fixed register for arg_1");
 585   assert (arg_2 == R1, "fixed register for arg_2");
 586   assert (arg_3 == R2, "fixed register for arg_3");
 587   assert (arg_4 == R3, "fixed register for arg_4");
 588   call_VM_leaf_helper(entry_point, 4);
 589 }
 590 
 591 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
 592   assert_different_registers(oop_result, tmp);
 593   ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
 594   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
 595   verify_oop(oop_result);
 596 }
 597 
 598 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
 599   assert_different_registers(metadata_result, tmp);
 600   ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
 601   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
 602 }
 603 
 604 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
 605   if (arg2.is_register()) {
 606     add(dst, arg1, arg2.as_register());
 607   } else {
 608     add(dst, arg1, arg2.as_constant());
 609   }
 610 }
 611 
 612 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
 613 #ifdef AARCH64
 614   if (c == 0) {
 615     if (rd != rn) {
 616       mov(rd, rn);
 617     }
 618     return;
 619   }
 620   if (c < 0) {
 621     sub_slow(rd, rn, -c);
 622     return;
 623   }
 624   if (c > right_n_bits(24)) {
 625     guarantee(rd != rn, "no large add_slow with only one register");
 626     mov_slow(rd, c);
 627     add(rd, rn, rd);
 628   } else {
 629     int lo = c & right_n_bits(12);
 630     int hi = (c >> 12) & right_n_bits(12);
 631     if (lo != 0) {
 632       add(rd, rn, lo, lsl0);
 633     }
 634     if (hi != 0) {
 635       add(rd, (lo == 0) ? rn : rd, hi, lsl12);
 636     }
 637   }
 638 #else
 639   // This function is used in compiler for handling large frame offsets
 640   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 641     return sub(rd, rn, (-c));
 642   }
 643   int low = c & 0x3fc;
 644   if (low != 0) {
 645     add(rd, rn, low);
 646     rn = rd;
 647   }
 648   if (c & ~0x3fc) {
 649     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
 650     add(rd, rn, c & ~0x3fc);
 651   } else if (rd != rn) {
 652     assert(c == 0, "");
 653     mov(rd, rn); // need to generate at least one move!
 654   }
 655 #endif // AARCH64
 656 }
 657 
 658 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
 659 #ifdef AARCH64
 660   if (c <= 0) {
 661     add_slow(rd, rn, -c);
 662     return;
 663   }
 664   if (c > right_n_bits(24)) {
 665     guarantee(rd != rn, "no large sub_slow with only one register");
 666     mov_slow(rd, c);
 667     sub(rd, rn, rd);
 668   } else {
 669     int lo = c & right_n_bits(12);
 670     int hi = (c >> 12) & right_n_bits(12);
 671     if (lo != 0) {
 672       sub(rd, rn, lo, lsl0);
 673     }
 674     if (hi != 0) {
 675       sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
 676     }
 677   }
 678 #else
 679   // This function is used in compiler for handling large frame offsets
 680   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 681     return add(rd, rn, (-c));
 682   }
 683   int low = c & 0x3fc;
 684   if (low != 0) {
 685     sub(rd, rn, low);
 686     rn = rd;
 687   }
 688   if (c & ~0x3fc) {
 689     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
 690     sub(rd, rn, c & ~0x3fc);
 691   } else if (rd != rn) {
 692     assert(c == 0, "");
 693     mov(rd, rn); // need to generate at least one move!
 694   }
 695 #endif // AARCH64
 696 }
 697 
 698 void MacroAssembler::mov_slow(Register rd, address addr) {
 699   // do *not* call the non relocated mov_related_address
 700   mov_slow(rd, (intptr_t)addr);
 701 }
 702 
 703 void MacroAssembler::mov_slow(Register rd, const char *str) {
 704   mov_slow(rd, (intptr_t)str);
 705 }
 706 
 707 #ifdef AARCH64
 708 
 709 // Common code for mov_slow and instr_count_for_mov_slow.
 710 // Returns number of instructions of mov_slow pattern,
 711 // generating it if non-null MacroAssembler is given.
 712 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
 713   // This code pattern is matched in NativeIntruction::is_mov_slow.
 714   // Update it at modifications.
 715 
 716   const intx mask = right_n_bits(16);
 717   // 1 movz instruction
 718   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 719     if ((c & ~(mask << base_shift)) == 0) {
 720       if (masm != NULL) {
 721         masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
 722       }
 723       return 1;
 724     }
 725   }
 726   // 1 movn instruction
 727   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 728     if (((~c) & ~(mask << base_shift)) == 0) {
 729       if (masm != NULL) {
 730         masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
 731       }
 732       return 1;
 733     }
 734   }
 735   // 1 orr instruction
 736   {
 737     LogicalImmediate imm(c, false);
 738     if (imm.is_encoded()) {
 739       if (masm != NULL) {
 740         masm->orr(rd, ZR, imm);
 741       }
 742       return 1;
 743     }
 744   }
 745   // 1 movz/movn + up to 3 movk instructions
 746   int zeroes = 0;
 747   int ones = 0;
 748   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 749     int part = (c >> base_shift) & mask;
 750     if (part == 0) {
 751       ++zeroes;
 752     } else if (part == mask) {
 753       ++ones;
 754     }
 755   }
 756   int def_bits = 0;
 757   if (ones > zeroes) {
 758     def_bits = mask;
 759   }
 760   int inst_count = 0;
 761   for (int base_shift = 0; base_shift < 64; base_shift += 16) {
 762     int part = (c >> base_shift) & mask;
 763     if (part != def_bits) {
 764       if (masm != NULL) {
 765         if (inst_count > 0) {
 766           masm->movk(rd, part, base_shift);
 767         } else {
 768           if (def_bits == 0) {
 769             masm->movz(rd, part, base_shift);
 770           } else {
 771             masm->movn(rd, ~part & mask, base_shift);
 772           }
 773         }
 774       }
 775       inst_count++;
 776     }
 777   }
 778   assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
 779   return inst_count;
 780 }
 781 
 782 void MacroAssembler::mov_slow(Register rd, intptr_t c) {
 783 #ifdef ASSERT
 784   int off = offset();
 785 #endif
 786   (void) mov_slow_helper(rd, c, this);
 787   assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
 788 }
 789 
 790 // Counts instructions generated by mov_slow(rd, c).
 791 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
 792   return mov_slow_helper(noreg, c, NULL);
 793 }
 794 
 795 int MacroAssembler::instr_count_for_mov_slow(address c) {
 796   return mov_slow_helper(noreg, (intptr_t)c, NULL);
 797 }
 798 
 799 #else
 800 
 801 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
 802   if (AsmOperand::is_rotated_imm(c)) {
 803     mov(rd, c, cond);
 804   } else if (AsmOperand::is_rotated_imm(~c)) {
 805     mvn(rd, ~c, cond);
 806   } else if (VM_Version::supports_movw()) {
 807     movw(rd, c & 0xffff, cond);
 808     if ((unsigned int)c >> 16) {
 809       movt(rd, (unsigned int)c >> 16, cond);
 810     }
 811   } else {
 812     // Find first non-zero bit
 813     int shift = 0;
 814     while ((c & (3 << shift)) == 0) {
 815       shift += 2;
 816     }
 817     // Put the least significant part of the constant
 818     int mask = 0xff << shift;
 819     mov(rd, c & mask, cond);
 820     // Add up to 3 other parts of the constant;
 821     // each of them can be represented as rotated_imm
 822     if (c & (mask << 8)) {
 823       orr(rd, rd, c & (mask << 8), cond);
 824     }
 825     if (c & (mask << 16)) {
 826       orr(rd, rd, c & (mask << 16), cond);
 827     }
 828     if (c & (mask << 24)) {
 829       orr(rd, rd, c & (mask << 24), cond);
 830     }
 831   }
 832 }
 833 
 834 #endif // AARCH64
 835 
 836 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
 837 #ifdef AARCH64
 838                              bool patchable
 839 #else
 840                              AsmCondition cond
 841 #endif
 842                              ) {
 843 
 844   if (o == NULL) {
 845 #ifdef AARCH64
 846     if (patchable) {
 847       nop();
 848     }
 849     mov(rd, ZR);
 850 #else
 851     mov(rd, 0, cond);
 852 #endif
 853     return;
 854   }
 855 
 856   if (oop_index == 0) {
 857     oop_index = oop_recorder()->allocate_oop_index(o);
 858   }
 859   relocate(oop_Relocation::spec(oop_index));
 860 
 861 #ifdef AARCH64
 862   if (patchable) {
 863     nop();
 864   }
 865   ldr(rd, pc());
 866 #else
 867   if (VM_Version::supports_movw()) {
 868     movw(rd, 0, cond);
 869     movt(rd, 0, cond);
 870   } else {
 871     ldr(rd, Address(PC), cond);
 872     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
 873     nop();
 874   }
 875 #endif
 876 }
 877 
 878 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
 879   if (o == NULL) {
 880 #ifdef AARCH64
 881     if (patchable) {
 882       nop();
 883     }
 884 #endif
 885     mov(rd, 0);
 886     return;
 887   }
 888 
 889   if (metadata_index == 0) {
 890     metadata_index = oop_recorder()->allocate_metadata_index(o);
 891   }
 892   relocate(metadata_Relocation::spec(metadata_index));
 893 
 894 #ifdef AARCH64
 895   if (patchable) {
 896     nop();
 897   }
 898 #ifdef COMPILER2
 899   if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
 900     mov_slow(rd, (address)o);
 901     return;
 902   }
 903 #endif
 904   ldr(rd, pc());
 905 #else
 906   if (VM_Version::supports_movw()) {
 907     movw(rd, ((int)o) & 0xffff);
 908     movt(rd, (unsigned int)o >> 16);
 909   } else {
 910     ldr(rd, Address(PC));
 911     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
 912     nop();
 913   }
 914 #endif // AARCH64
 915 }
 916 
 917 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
 918   Label skip_constant;
 919   union {
 920     jfloat f;
 921     jint i;
 922   } accessor;
 923   accessor.f = c;
 924 
 925 #ifdef AARCH64
 926   // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
 927   Label L;
 928   ldr_s(fd, target(L));
 929   b(skip_constant);
 930   bind(L);
 931   emit_int32(accessor.i);
 932   bind(skip_constant);
 933 #else
 934   flds(fd, Address(PC), cond);
 935   b(skip_constant);
 936   emit_int32(accessor.i);
 937   bind(skip_constant);
 938 #endif // AARCH64
 939 }
 940 
 941 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
 942   Label skip_constant;
 943   union {
 944     jdouble d;
 945     jint i[2];
 946   } accessor;
 947   accessor.d = c;
 948 
 949 #ifdef AARCH64
 950   // TODO-AARCH64 - try to optimize loading of double constants with fmov
 951   Label L;
 952   ldr_d(fd, target(L));
 953   b(skip_constant);
 954   align(wordSize);
 955   bind(L);
 956   emit_int32(accessor.i[0]);
 957   emit_int32(accessor.i[1]);
 958   bind(skip_constant);
 959 #else
 960   fldd(fd, Address(PC), cond);
 961   b(skip_constant);
 962   emit_int32(accessor.i[0]);
 963   emit_int32(accessor.i[1]);
 964   bind(skip_constant);
 965 #endif // AARCH64
 966 }
 967 
 968 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
 969   intptr_t addr = (intptr_t) address_of_global;
 970 #ifdef AARCH64
 971   assert((addr & 0x3) == 0, "address should be aligned");
 972 
 973   // FIXME: TODO
 974   if (false && page_reachable_from_cache(address_of_global)) {
 975     assert(false,"TODO: relocate");
 976     //relocate();
 977     adrp(reg, address_of_global);
 978     ldrsw(reg, Address(reg, addr & 0xfff));
 979   } else {
 980     mov_slow(reg, addr & ~0x3fff);
 981     ldrsw(reg, Address(reg, addr & 0x3fff));
 982   }
 983 #else
 984   mov_slow(reg, addr & ~0xfff);
 985   ldr(reg, Address(reg, addr & 0xfff));
 986 #endif
 987 }
 988 
 989 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
 990 #ifdef AARCH64
 991   intptr_t addr = (intptr_t) address_of_global;
 992   assert ((addr & 0x7) == 0, "address should be aligned");
 993   mov_slow(reg, addr & ~0x7fff);
 994   ldr(reg, Address(reg, addr & 0x7fff));
 995 #else
 996   ldr_global_s32(reg, address_of_global);
 997 #endif
 998 }
 999 
1000 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
1001   intptr_t addr = (intptr_t) address_of_global;
1002   mov_slow(reg, addr & ~0xfff);
1003   ldrb(reg, Address(reg, addr & 0xfff));
1004 }
1005 
1006 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
1007 #ifdef AARCH64
1008   switch (bits) {
1009     case  8: uxtb(rd, rn); break;
1010     case 16: uxth(rd, rn); break;
1011     case 32: mov_w(rd, rn); break;
1012     default: ShouldNotReachHere();
1013   }
1014 #else
1015   if (bits <= 8) {
1016     andr(rd, rn, (1 << bits) - 1);
1017   } else if (bits >= 24) {
1018     bic(rd, rn, -1 << bits);
1019   } else {
1020     mov(rd, AsmOperand(rn, lsl, 32 - bits));
1021     mov(rd, AsmOperand(rd, lsr, 32 - bits));
1022   }
1023 #endif
1024 }
1025 
1026 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
1027 #ifdef AARCH64
1028   switch (bits) {
1029     case  8: sxtb(rd, rn); break;
1030     case 16: sxth(rd, rn); break;
1031     case 32: sxtw(rd, rn); break;
1032     default: ShouldNotReachHere();
1033   }
1034 #else
1035   mov(rd, AsmOperand(rn, lsl, 32 - bits));
1036   mov(rd, AsmOperand(rd, asr, 32 - bits));
1037 #endif
1038 }
1039 
1040 #ifndef AARCH64
1041 
1042 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
1043                                Register rn_lo, Register rn_hi,
1044                                AsmCondition cond) {
1045   if (rd_lo != rn_hi) {
1046     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1047     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1048   } else if (rd_hi != rn_lo) {
1049     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1050     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1051   } else {
1052     eor(rd_lo, rd_hi, rd_lo, cond);
1053     eor(rd_hi, rd_lo, rd_hi, cond);
1054     eor(rd_lo, rd_hi, rd_lo, cond);
1055   }
1056 }
1057 
1058 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1059                                 Register rn_lo, Register rn_hi,
1060                                 AsmShift shift, Register count) {
1061   Register tmp;
1062   if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
1063     tmp = rd_lo;
1064   } else {
1065     tmp = rd_hi;
1066   }
1067   assert_different_registers(tmp, count, rn_lo, rn_hi);
1068 
1069   subs(tmp, count, 32);
1070   if (shift == lsl) {
1071     assert_different_registers(rd_hi, rn_lo);
1072     assert_different_registers(count, rd_hi);
1073     mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
1074     rsb(tmp, count, 32, mi);
1075     if (rd_hi == rn_hi) {
1076       mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1077       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1078     } else {
1079       mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1080       orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1081     }
1082     mov(rd_lo, AsmOperand(rn_lo, shift, count));
1083   } else {
1084     assert_different_registers(rd_lo, rn_hi);
1085     assert_different_registers(rd_lo, count);
1086     mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
1087     rsb(tmp, count, 32, mi);
1088     if (rd_lo == rn_lo) {
1089       mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1090       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1091     } else {
1092       mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1093       orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1094     }
1095     mov(rd_hi, AsmOperand(rn_hi, shift, count));
1096   }
1097 }
1098 
1099 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1100                                 Register rn_lo, Register rn_hi,
1101                                 AsmShift shift, int count) {
1102   assert(count != 0 && (count & ~63) == 0, "must be");
1103 
1104   if (shift == lsl) {
1105     assert_different_registers(rd_hi, rn_lo);
1106     if (count >= 32) {
1107       mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
1108       mov(rd_lo, 0);
1109     } else {
1110       mov(rd_hi, AsmOperand(rn_hi, lsl, count));
1111       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
1112       mov(rd_lo, AsmOperand(rn_lo, lsl, count));
1113     }
1114   } else {
1115     assert_different_registers(rd_lo, rn_hi);
1116     if (count >= 32) {
1117       if (count == 32) {
1118         mov(rd_lo, rn_hi);
1119       } else {
1120         mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
1121       }
1122       if (shift == asr) {
1123         mov(rd_hi, AsmOperand(rn_hi, asr, 0));
1124       } else {
1125         mov(rd_hi, 0);
1126       }
1127     } else {
1128       mov(rd_lo, AsmOperand(rn_lo, lsr, count));
1129       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
1130       mov(rd_hi, AsmOperand(rn_hi, shift, count));
1131     }
1132   }
1133 }
1134 #endif // !AARCH64
1135 
1136 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
1137   // This code pattern is matched in NativeIntruction::skip_verify_oop.
1138   // Update it at modifications.
1139   if (!VerifyOops) return;
1140 
1141   char buffer[64];
1142 #ifdef COMPILER1
1143   if (CommentedAssembly) {
1144     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1145     block_comment(buffer);
1146   }
1147 #endif
1148   const char* msg_buffer = NULL;
1149   {
1150     ResourceMark rm;
1151     stringStream ss;
1152     ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
1153     msg_buffer = code_string(ss.as_string());
1154   }
1155 
1156   save_all_registers();
1157 
1158   if (reg != R2) {
1159       mov(R2, reg);                              // oop to verify
1160   }
1161   mov(R1, SP);                                   // register save area
1162 
1163   Label done;
1164   InlinedString Lmsg(msg_buffer);
1165   ldr_literal(R0, Lmsg);                         // message
1166 
1167   // call indirectly to solve generation ordering problem
1168   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1169   call(Rtemp);
1170 
1171   restore_all_registers();
1172 
1173   b(done);
1174 #ifdef COMPILER2
1175   int off = offset();
1176 #endif
1177   bind_literal(Lmsg);
1178 #ifdef COMPILER2
1179   if (offset() - off == 1 * wordSize) {
1180     // no padding, so insert nop for worst-case sizing
1181     nop();
1182   }
1183 #endif
1184   bind(done);
1185 }
1186 
1187 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
1188   if (!VerifyOops) return;
1189 
1190   const char* msg_buffer = NULL;
1191   {
1192     ResourceMark rm;
1193     stringStream ss;
1194     if ((addr.base() == SP) && (addr.index()==noreg)) {
1195       ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
1196     } else {
1197       ss.print("verify_oop_addr: %s", s);
1198     }
1199     ss.print(" (%s:%d)", file, line);
1200     msg_buffer = code_string(ss.as_string());
1201   }
1202 
1203   int push_size = save_all_registers();
1204 
1205   if (addr.base() == SP) {
1206     // computes an addr that takes into account the push
1207     if (addr.index() != noreg) {
1208       Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
1209       add(new_base, SP, push_size);
1210       addr = addr.rebase(new_base);
1211     } else {
1212       addr = addr.plus_disp(push_size);
1213     }
1214   }
1215 
1216   ldr(R2, addr);                                 // oop to verify
1217   mov(R1, SP);                                   // register save area
1218 
1219   Label done;
1220   InlinedString Lmsg(msg_buffer);
1221   ldr_literal(R0, Lmsg);                         // message
1222 
1223   // call indirectly to solve generation ordering problem
1224   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1225   call(Rtemp);
1226 
1227   restore_all_registers();
1228 
1229   b(done);
1230   bind_literal(Lmsg);
1231   bind(done);
1232 }
1233 
1234 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
1235   if (needs_explicit_null_check(offset)) {
1236 #ifdef AARCH64
1237     ldr(ZR, Address(reg));
1238 #else
1239     assert_different_registers(reg, tmp);
1240     if (tmp == noreg) {
1241       tmp = Rtemp;
1242       assert((! Thread::current()->is_Compiler_thread()) ||
1243              (! (ciEnv::current()->task() == NULL)) ||
1244              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1245              "Rtemp not available in C2"); // explicit tmp register required
1246       // XXX: could we mark the code buffer as not compatible with C2 ?
1247     }
1248     ldr(tmp, Address(reg));
1249 #endif
1250   }
1251 }
1252 
1253 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1254 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1255                                  RegisterOrConstant size_expression, Label& slow_case) {
1256   if (!Universe::heap()->supports_inline_contig_alloc()) {
1257     b(slow_case);
1258     return;
1259   }
1260 
1261   CollectedHeap* ch = Universe::heap();
1262 
1263   const Register top_addr = tmp1;
1264   const Register heap_end = tmp2;
1265 
1266   if (size_expression.is_register()) {
1267     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
1268   } else {
1269     assert_different_registers(obj, obj_end, top_addr, heap_end);
1270   }
1271 
1272   bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
1273   if (load_const) {
1274     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
1275   } else {
1276     ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
1277   }
1278   // Calculate new heap_top by adding the size of the object
1279   Label retry;
1280   bind(retry);
1281 
1282 #ifdef AARCH64
1283   ldxr(obj, top_addr);
1284 #else
1285   ldr(obj, Address(top_addr));
1286 #endif // AARCH64
1287 
1288   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
1289   add_rc(obj_end, obj, size_expression);
1290   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
1291   cmp(obj_end, obj);
1292   b(slow_case, lo);
1293   // Update heap_top if allocation succeeded
1294   cmp(obj_end, heap_end);
1295   b(slow_case, hi);
1296 
1297 #ifdef AARCH64
1298   stxr(heap_end/*scratched*/, obj_end, top_addr);
1299   cbnz_w(heap_end, retry);
1300 #else
1301   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
1302   b(retry, ne);
1303 #endif // AARCH64
1304 }
1305 
1306 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1307 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1308                                  RegisterOrConstant size_expression, Label& slow_case) {
1309   const Register tlab_end = tmp1;
1310   assert_different_registers(obj, obj_end, tlab_end);
1311 
1312   ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1313   ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
1314   add_rc(obj_end, obj, size_expression);
1315   cmp(obj_end, tlab_end);
1316   b(slow_case, hi);
1317   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1318 }
1319 
1320 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
1321 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1322   Label loop;
1323   const Register ptr = start;
1324 
1325 #ifdef AARCH64
1326   // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1327   const Register size = tmp;
1328   Label remaining, done;
1329 
1330   sub(size, end, start);
1331 
1332 #ifdef ASSERT
1333   { Label L;
1334     tst(size, wordSize - 1);
1335     b(L, eq);
1336     stop("size is not a multiple of wordSize");
1337     bind(L);
1338   }
1339 #endif // ASSERT
1340 
1341   subs(size, size, wordSize);
1342   b(remaining, le);
1343 
1344   // Zero by 2 words per iteration.
1345   bind(loop);
1346   subs(size, size, 2*wordSize);
1347   stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1348   b(loop, gt);
1349 
1350   bind(remaining);
1351   b(done, ne);
1352   str(ZR, Address(ptr));
1353   bind(done);
1354 #else
1355   mov(tmp, 0);
1356   bind(loop);
1357   cmp(ptr, end);
1358   str(tmp, Address(ptr, wordSize, post_indexed), lo);
1359   b(loop, lo);
1360 #endif // AARCH64
1361 }
1362 
1363 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1364 #ifdef AARCH64
1365   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1366   add_rc(tmp, tmp, size_in_bytes);
1367   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1368 #else
1369   // Bump total bytes allocated by this thread
1370   Label done;
1371 
1372   ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1373   adds(tmp, tmp, size_in_bytes);
1374   str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc);
1375   b(done, cc);
1376 
1377   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1378   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1379   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1380   Register low, high;
1381   // Select ether R0/R1 or R2/R3
1382 
1383   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1384     low = R2;
1385     high  = R3;
1386   } else {
1387     low = R0;
1388     high  = R1;
1389   }
1390   push(RegisterSet(low, high));
1391 
1392   ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1393   adds(low, low, size_in_bytes);
1394   adc(high, high, 0);
1395   strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1396 
1397   pop(RegisterSet(low, high));
1398 
1399   bind(done);
1400 #endif // AARCH64
1401 }
1402 
1403 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1404   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1405   if (UseStackBanging) {
1406     const int page_size = os::vm_page_size();
1407 
1408     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1409     strb(R0, Address(tmp));
1410 #ifdef AARCH64
1411     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1412       sub(tmp, tmp, page_size);
1413       strb(R0, Address(tmp));
1414     }
1415 #else
1416     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1417       strb(R0, Address(tmp, -0xff0, pre_indexed));
1418     }
1419 #endif // AARCH64
1420   }
1421 }
1422 
1423 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
1424   if (UseStackBanging) {
1425     Label loop;
1426 
1427     mov(tmp, SP);
1428     add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
1429 #ifdef AARCH64
1430     sub(tmp, tmp, Rsize);
1431     bind(loop);
1432     subs(Rsize, Rsize, os::vm_page_size());
1433     strb(ZR, Address(tmp, Rsize));
1434 #else
1435     bind(loop);
1436     subs(Rsize, Rsize, 0xff0);
1437     strb(R0, Address(tmp, -0xff0, pre_indexed));
1438 #endif // AARCH64
1439     b(loop, hi);
1440   }
1441 }
1442 
1443 void MacroAssembler::stop(const char* msg) {
1444   // This code pattern is matched in NativeIntruction::is_stop.
1445   // Update it at modifications.
1446 #ifdef COMPILER1
1447   if (CommentedAssembly) {
1448     block_comment("stop");
1449   }
1450 #endif
1451 
1452   InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1453   InlinedString Lmsg(msg);
1454 
1455   // save all registers for further inspection
1456   save_all_registers();
1457 
1458   ldr_literal(R0, Lmsg);                     // message
1459   mov(R1, SP);                               // register save area
1460 
1461 #ifdef AARCH64
1462   ldr_literal(Rtemp, Ldebug);
1463   br(Rtemp);
1464 #else
1465   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
1466 #endif // AARCH64
1467 
1468 #if defined(COMPILER2) && defined(AARCH64)
1469   int off = offset();
1470 #endif
1471   bind_literal(Lmsg);
1472   bind_literal(Ldebug);
1473 #if defined(COMPILER2) && defined(AARCH64)
1474   if (offset() - off == 2 * wordSize) {
1475     // no padding, so insert nop for worst-case sizing
1476     nop();
1477   }
1478 #endif
1479 }
1480 
1481 void MacroAssembler::warn(const char* msg) {
1482 #ifdef COMPILER1
1483   if (CommentedAssembly) {
1484     block_comment("warn");
1485   }
1486 #endif
1487 
1488   InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1489   InlinedString Lmsg(msg);
1490   Label done;
1491 
1492   int push_size = save_caller_save_registers();
1493 
1494 #ifdef AARCH64
1495   // TODO-AARCH64 - get rid of extra debug parameters
1496   mov(R1, LR);
1497   mov(R2, FP);
1498   add(R3, SP, push_size);
1499 #endif
1500 
1501   ldr_literal(R0, Lmsg);                    // message
1502   ldr_literal(LR, Lwarn);                   // call warning
1503 
1504   call(LR);
1505 
1506   restore_caller_save_registers();
1507 
1508   b(done);
1509   bind_literal(Lmsg);
1510   bind_literal(Lwarn);
1511   bind(done);
1512 }
1513 
1514 
1515 int MacroAssembler::save_all_registers() {
1516   // This code pattern is matched in NativeIntruction::is_save_all_registers.
1517   // Update it at modifications.
1518 #ifdef AARCH64
1519   const Register tmp = Rtemp;
1520   raw_push(R30, ZR);
1521   for (int i = 28; i >= 0; i -= 2) {
1522       raw_push(as_Register(i), as_Register(i+1));
1523   }
1524   mov_pc_to(tmp);
1525   str(tmp, Address(SP, 31*wordSize));
1526   ldr(tmp, Address(SP, tmp->encoding()*wordSize));
1527   return 32*wordSize;
1528 #else
1529   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1530   return 15*wordSize;
1531 #endif // AARCH64
1532 }
1533 
1534 void MacroAssembler::restore_all_registers() {
1535 #ifdef AARCH64
1536   for (int i = 0; i <= 28; i += 2) {
1537     raw_pop(as_Register(i), as_Register(i+1));
1538   }
1539   raw_pop(R30, ZR);
1540 #else
1541   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
1542   add(SP, SP, wordSize);                         // discard saved PC
1543 #endif // AARCH64
1544 }
1545 
1546 int MacroAssembler::save_caller_save_registers() {
1547 #ifdef AARCH64
1548   for (int i = 0; i <= 16; i += 2) {
1549     raw_push(as_Register(i), as_Register(i+1));
1550   }
1551   raw_push(R18, LR);
1552   return 20*wordSize;
1553 #else
1554 #if R9_IS_SCRATCHED
1555   // Save also R10 to preserve alignment
1556   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1557   return 8*wordSize;
1558 #else
1559   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1560   return 6*wordSize;
1561 #endif
1562 #endif // AARCH64
1563 }
1564 
1565 void MacroAssembler::restore_caller_save_registers() {
1566 #ifdef AARCH64
1567   raw_pop(R18, LR);
1568   for (int i = 16; i >= 0; i -= 2) {
1569     raw_pop(as_Register(i), as_Register(i+1));
1570   }
1571 #else
1572 #if R9_IS_SCRATCHED
1573   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1574 #else
1575   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1576 #endif
1577 #endif // AARCH64
1578 }
1579 
1580 void MacroAssembler::debug(const char* msg, const intx* registers) {
1581   // In order to get locks to work, we need to fake a in_VM state
1582   JavaThread* thread = JavaThread::current();
1583   thread->set_thread_state(_thread_in_vm);
1584 
1585   if (ShowMessageBoxOnError) {
1586     ttyLocker ttyl;
1587     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1588       BytecodeCounter::print();
1589     }
1590     if (os::message_box(msg, "Execution stopped, print registers?")) {
1591 #ifdef AARCH64
1592       // saved registers: R0-R30, PC
1593       const int nregs = 32;
1594 #else
1595       // saved registers: R0-R12, LR, PC
1596       const int nregs = 15;
1597       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1598 #endif // AARCH64
1599 
1600       for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
1601         tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
1602       }
1603 
1604 #ifdef AARCH64
1605       tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
1606 #endif // AARCH64
1607 
1608       // derive original SP value from the address of register save area
1609       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
1610     }
1611     BREAKPOINT;
1612   } else {
1613     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1614   }
1615   assert(false, "DEBUG MESSAGE: %s", msg);
1616   fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1617 }
1618 
1619 void MacroAssembler::unimplemented(const char* what) {
1620   const char* buf = NULL;
1621   {
1622     ResourceMark rm;
1623     stringStream ss;
1624     ss.print("unimplemented: %s", what);
1625     buf = code_string(ss.as_string());
1626   }
1627   stop(buf);
1628 }
1629 
1630 
1631 // Implementation of FixedSizeCodeBlock
1632 
1633 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1634 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1635 }
1636 
1637 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1638   if (_enabled) {
1639     address curr_pc = _masm->pc();
1640 
1641     assert(_start < curr_pc, "invalid current pc");
1642     guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1643 
1644     int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1645     for (int i = 0; i < nops_count; i++) {
1646       _masm->nop();
1647     }
1648   }
1649 }
1650 
1651 #ifdef AARCH64
1652 
1653 // Serializes memory.
1654 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
1655 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
1656   if (!os::is_MP()) return;
1657 
1658   // TODO-AARCH64 investigate dsb vs dmb effects
1659   if (order_constraint == StoreStore) {
1660     dmb(DMB_st);
1661   } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
1662     dmb(DMB_ld);
1663   } else {
1664     dmb(DMB_all);
1665   }
1666 }
1667 
1668 #else
1669 
1670 // Serializes memory. Potentially blows flags and reg.
1671 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
1672 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1673 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
1674 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1675                             Register tmp,
1676                             bool preserve_flags,
1677                             Register load_tgt) {
1678   if (!os::is_MP()) return;
1679 
1680   if (order_constraint == StoreStore) {
1681     dmb(DMB_st, tmp);
1682   } else if ((order_constraint & StoreLoad)  ||
1683              (order_constraint & LoadLoad)   ||
1684              (order_constraint & StoreStore) ||
1685              (load_tgt == noreg)             ||
1686              preserve_flags) {
1687     dmb(DMB_all, tmp);
1688   } else {
1689     // LoadStore: speculative stores reordeing is prohibited
1690 
1691     // By providing an ordered load target register, we avoid an extra memory load reference
1692     Label not_taken;
1693     bind(not_taken);
1694     cmp(load_tgt, load_tgt);
1695     b(not_taken, ne);
1696   }
1697 }
1698 
1699 #endif // AARCH64
1700 
1701 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1702 // on failure, so fall-through can only mean success.
1703 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1704 // This is only needed for C2, which for some reason does not rety,
1705 // while C1/interpreter does.
1706 // TODO: measure if it makes a difference
1707 
1708 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1709   Register base, Register tmp, Label &slow_case,
1710   bool allow_fallthrough_on_failure, bool one_shot)
1711 {
1712 
1713   bool fallthrough_is_success = false;
1714 
1715   // ARM Litmus Test example does prefetching here.
1716   // TODO: investigate if it helps performance
1717 
1718   // The last store was to the displaced header, so to prevent
1719   // reordering we must issue a StoreStore or Release barrier before
1720   // the CAS store.
1721 
1722 #ifdef AARCH64
1723 
1724   Register Rscratch = tmp;
1725   Register Roop = base;
1726   Register mark = oldval;
1727   Register Rbox = newval;
1728   Label loop;
1729 
1730   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1731 
1732   // Instead of StoreStore here, we use store-release-exclusive below
1733 
1734   bind(loop);
1735 
1736   ldaxr(tmp, base);  // acquire
1737   cmp(tmp, oldval);
1738   b(slow_case, ne);
1739   stlxr(tmp, newval, base); // release
1740   if (one_shot) {
1741     cmp_w(tmp, 0);
1742   } else {
1743     cbnz_w(tmp, loop);
1744     fallthrough_is_success = true;
1745   }
1746 
1747   // MemBarAcquireLock would normally go here, but
1748   // we already do ldaxr+stlxr above, which has
1749   // Sequential Consistency
1750 
1751 #else
1752   membar(MacroAssembler::StoreStore, noreg);
1753 
1754   if (one_shot) {
1755     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1756     cmp(tmp, oldval);
1757     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1758     cmp(tmp, 0, eq);
1759   } else {
1760     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1761   }
1762 
1763   // MemBarAcquireLock barrier
1764   // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1765   // but that doesn't prevent a load or store from floating up between
1766   // the load and store in the CAS sequence, so play it safe and
1767   // do a full fence.
1768   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1769 #endif
1770   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1771     b(slow_case, ne);
1772   }
1773 }
1774 
1775 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1776   Register base, Register tmp, Label &slow_case,
1777   bool allow_fallthrough_on_failure, bool one_shot)
1778 {
1779 
1780   bool fallthrough_is_success = false;
1781 
1782   assert_different_registers(oldval,newval,base,tmp);
1783 
1784 #ifdef AARCH64
1785   Label loop;
1786 
1787   assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1788 
1789   bind(loop);
1790   ldxr(tmp, base);
1791   cmp(tmp, oldval);
1792   b(slow_case, ne);
1793   // MemBarReleaseLock barrier
1794   stlxr(tmp, newval, base);
1795   if (one_shot) {
1796     cmp_w(tmp, 0);
1797   } else {
1798     cbnz_w(tmp, loop);
1799     fallthrough_is_success = true;
1800   }
1801 #else
1802   // MemBarReleaseLock barrier
1803   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1804   // but that doesn't prevent a load or store from floating down between
1805   // the load and store in the CAS sequence, so play it safe and
1806   // do a full fence.
1807   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1808 
1809   if (one_shot) {
1810     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1811     cmp(tmp, oldval);
1812     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1813     cmp(tmp, 0, eq);
1814   } else {
1815     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1816   }
1817 #endif
1818   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1819     b(slow_case, ne);
1820   }
1821 
1822   // ExitEnter
1823   // According to JSR-133 Cookbook, this should be StoreLoad, the same
1824   // barrier that follows volatile store.
1825   // TODO: Should be able to remove on armv8 if volatile loads
1826   // use the load-acquire instruction.
1827   membar(StoreLoad, noreg);
1828 }
1829 
1830 #ifndef PRODUCT
1831 
1832 // Preserves flags and all registers.
1833 // On SMP the updated value might not be visible to external observers without a sychronization barrier
1834 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1835   if (counter_addr != NULL) {
1836     InlinedAddress counter_addr_literal((address)counter_addr);
1837     Label done, retry;
1838     if (cond != al) {
1839       b(done, inverse(cond));
1840     }
1841 
1842 #ifdef AARCH64
1843     raw_push(R0, R1);
1844     raw_push(R2, ZR);
1845 
1846     ldr_literal(R0, counter_addr_literal);
1847 
1848     bind(retry);
1849     ldxr_w(R1, R0);
1850     add_w(R1, R1, 1);
1851     stxr_w(R2, R1, R0);
1852     cbnz_w(R2, retry);
1853 
1854     raw_pop(R2, ZR);
1855     raw_pop(R0, R1);
1856 #else
1857     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1858     ldr_literal(R0, counter_addr_literal);
1859 
1860     mrs(CPSR, Rtemp);
1861 
1862     bind(retry);
1863     ldr_s32(R1, Address(R0));
1864     add(R2, R1, 1);
1865     atomic_cas_bool(R1, R2, R0, 0, R3);
1866     b(retry, ne);
1867 
1868     msr(CPSR_fsxc, Rtemp);
1869 
1870     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1871 #endif // AARCH64
1872 
1873     b(done);
1874     bind_literal(counter_addr_literal);
1875 
1876     bind(done);
1877   }
1878 }
1879 
1880 #endif // !PRODUCT
1881 
1882 
1883 // Building block for CAS cases of biased locking: makes CAS and records statistics.
1884 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
1885 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
1886                                                  Register tmp, Label& slow_case, int* counter_addr) {
1887 
1888   cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
1889 #ifdef ASSERT
1890   breakpoint(ne); // Fallthrough only on success
1891 #endif
1892 #ifndef PRODUCT
1893   if (counter_addr != NULL) {
1894     cond_atomic_inc32(al, counter_addr);
1895   }
1896 #endif // !PRODUCT
1897 }
1898 
1899 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
1900                                          bool swap_reg_contains_mark,
1901                                          Register tmp2,
1902                                          Label& done, Label& slow_case,
1903                                          BiasedLockingCounters* counters) {
1904   // obj_reg must be preserved (at least) if the bias locking fails
1905   // tmp_reg is a temporary register
1906   // swap_reg was used as a temporary but contained a value
1907   //   that was used afterwards in some call pathes. Callers
1908   //   have been fixed so that swap_reg no longer needs to be
1909   //   saved.
1910   // Rtemp in no longer scratched
1911 
1912   assert(UseBiasedLocking, "why call this otherwise?");
1913   assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
1914   guarantee(swap_reg!=tmp_reg, "invariant");
1915   assert(tmp_reg != noreg, "must supply tmp_reg");
1916 
1917 #ifndef PRODUCT
1918   if (PrintBiasedLockingStatistics && (counters == NULL)) {
1919     counters = BiasedLocking::counters();
1920   }
1921 #endif
1922 
1923   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1924   Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
1925 
1926   // Biased locking
1927   // See whether the lock is currently biased toward our thread and
1928   // whether the epoch is still valid
1929   // Note that the runtime guarantees sufficient alignment of JavaThread
1930   // pointers to allow age to be placed into low bits
1931   // First check to see whether biasing is even enabled for this object
1932   Label cas_label;
1933 
1934   // The null check applies to the mark loading, if we need to load it.
1935   // If the mark has already been loaded in swap_reg then it has already
1936   // been performed and the offset is irrelevant.
1937   int null_check_offset = offset();
1938   if (!swap_reg_contains_mark) {
1939     ldr(swap_reg, mark_addr);
1940   }
1941 
1942   // On MP platform loads could return 'stale' values in some cases.
1943   // That is acceptable since either CAS or slow case path is taken in the worst case.
1944 
1945   andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1946   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
1947 
1948   b(cas_label, ne);
1949 
1950   // The bias pattern is present in the object's header. Need to check
1951   // whether the bias owner and the epoch are both still current.
1952   load_klass(tmp_reg, obj_reg);
1953   ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
1954   orr(tmp_reg, tmp_reg, Rthread);
1955   eor(tmp_reg, tmp_reg, swap_reg);
1956 
1957 #ifdef AARCH64
1958   ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
1959 #else
1960   bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
1961 #endif // AARCH64
1962 
1963 #ifndef PRODUCT
1964   if (counters != NULL) {
1965     cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
1966   }
1967 #endif // !PRODUCT
1968 
1969   b(done, eq);
1970 
1971   Label try_revoke_bias;
1972   Label try_rebias;
1973 
1974   // At this point we know that the header has the bias pattern and
1975   // that we are not the bias owner in the current epoch. We need to
1976   // figure out more details about the state of the header in order to
1977   // know what operations can be legally performed on the object's
1978   // header.
1979 
1980   // If the low three bits in the xor result aren't clear, that means
1981   // the prototype header is no longer biased and we have to revoke
1982   // the bias on this object.
1983   tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1984   b(try_revoke_bias, ne);
1985 
1986   // Biasing is still enabled for this data type. See whether the
1987   // epoch of the current bias is still valid, meaning that the epoch
1988   // bits of the mark word are equal to the epoch bits of the
1989   // prototype header. (Note that the prototype header's epoch bits
1990   // only change at a safepoint.) If not, attempt to rebias the object
1991   // toward the current thread. Note that we must be absolutely sure
1992   // that the current epoch is invalid in order to do this because
1993   // otherwise the manipulations it performs on the mark word are
1994   // illegal.
1995   tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
1996   b(try_rebias, ne);
1997 
1998   // tmp_reg has the age, epoch and pattern bits cleared
1999   // The remaining (owner) bits are (Thread ^ current_owner)
2000 
2001   // The epoch of the current bias is still valid but we know nothing
2002   // about the owner; it might be set or it might be clear. Try to
2003   // acquire the bias of the object using an atomic operation. If this
2004   // fails we will go in to the runtime to revoke the object's bias.
2005   // Note that we first construct the presumed unbiased header so we
2006   // don't accidentally blow away another thread's valid bias.
2007 
2008   // Note that we know the owner is not ourself. Hence, success can
2009   // only happen when the owner bits is 0
2010 
2011 #ifdef AARCH64
2012   // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
2013   // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
2014   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2015   andr(swap_reg, swap_reg, tmp2);
2016 #else
2017   // until the assembler can be made smarter, we need to make some assumptions about the values
2018   // so we can optimize this:
2019   assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
2020 
2021   mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
2022   mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
2023 #endif // AARCH64
2024 
2025   orr(tmp_reg, swap_reg, Rthread); // new mark
2026 
2027   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2028         (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
2029 
2030   // If the biasing toward our thread failed, this means that
2031   // another thread succeeded in biasing it toward itself and we
2032   // need to revoke that bias. The revocation will occur in the
2033   // interpreter runtime in the slow case.
2034 
2035   b(done);
2036 
2037   bind(try_rebias);
2038 
2039   // At this point we know the epoch has expired, meaning that the
2040   // current "bias owner", if any, is actually invalid. Under these
2041   // circumstances _only_, we are allowed to use the current header's
2042   // value as the comparison value when doing the cas to acquire the
2043   // bias in the current epoch. In other words, we allow transfer of
2044   // the bias from one thread to another directly in this situation.
2045 
2046   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2047 
2048   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2049 
2050   // owner bits 'random'. Set them to Rthread.
2051 #ifdef AARCH64
2052   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2053   andr(tmp_reg, tmp_reg, tmp2);
2054 #else
2055   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2056   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2057 #endif // AARCH64
2058 
2059   orr(tmp_reg, tmp_reg, Rthread); // new mark
2060 
2061   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2062         (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
2063 
2064   // If the biasing toward our thread failed, then another thread
2065   // succeeded in biasing it toward itself and we need to revoke that
2066   // bias. The revocation will occur in the runtime in the slow case.
2067 
2068   b(done);
2069 
2070   bind(try_revoke_bias);
2071 
2072   // The prototype mark in the klass doesn't have the bias bit set any
2073   // more, indicating that objects of this data type are not supposed
2074   // to be biased any more. We are going to try to reset the mark of
2075   // this object to the prototype value and fall through to the
2076   // CAS-based locking scheme. Note that if our CAS fails, it means
2077   // that another thread raced us for the privilege of revoking the
2078   // bias of this particular object, so it's okay to continue in the
2079   // normal locking code.
2080 
2081   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2082 
2083   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2084 
2085   // owner bits 'random'. Clear them
2086 #ifdef AARCH64
2087   mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2088   andr(tmp_reg, tmp_reg, tmp2);
2089 #else
2090   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2091   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2092 #endif // AARCH64
2093 
2094   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
2095         (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
2096 
2097   // Fall through to the normal CAS-based lock, because no matter what
2098   // the result of the above CAS, some thread must have succeeded in
2099   // removing the bias bit from the object's header.
2100 
2101   bind(cas_label);
2102 
2103   return null_check_offset;
2104 }
2105 
2106 
2107 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
2108   assert(UseBiasedLocking, "why call this otherwise?");
2109 
2110   // Check for biased locking unlock case, which is a no-op
2111   // Note: we do not have to check the thread ID for two reasons.
2112   // First, the interpreter checks for IllegalMonitorStateException at
2113   // a higher level. Second, if the bias was revoked while we held the
2114   // lock, the object could not be rebiased toward another thread, so
2115   // the bias bit would be clear.
2116   ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2117 
2118   andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2119   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
2120   b(done, eq);
2121 }
2122 
2123 
2124 void MacroAssembler::resolve_jobject(Register value,
2125                                      Register tmp1,
2126                                      Register tmp2) {
2127   assert_different_registers(value, tmp1, tmp2);
2128   Label done, not_weak;
2129   cbz(value, done);             // Use NULL as-is.
2130   STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
2131   tbz(value, 0, not_weak);      // Test for jweak tag.
2132   // Resolve jweak.
2133   ldr(value, Address(value, -JNIHandles::weak_tag_value));
2134   verify_oop(value);
2135 #if INCLUDE_ALL_GCS
2136   if (UseG1GC) {
2137     g1_write_barrier_pre(noreg, // store_addr
2138                          noreg, // new_val
2139                          value, // pre_val
2140                          tmp1,  // tmp1
2141                          tmp2); // tmp2
2142     }
2143 #endif // INCLUDE_ALL_GCS
2144   b(done);
2145   bind(not_weak);
2146   // Resolve (untagged) jobject.
2147   ldr(value, Address(value));
2148   verify_oop(value);
2149   bind(done);
2150 }
2151 
2152 
2153 //////////////////////////////////////////////////////////////////////////////////
2154 
2155 #if INCLUDE_ALL_GCS
2156 
2157 // G1 pre-barrier.
2158 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
2159 // If store_addr != noreg, then previous value is loaded from [store_addr];
2160 // in such case store_addr and new_val registers are preserved;
2161 // otherwise pre_val register is preserved.
2162 void MacroAssembler::g1_write_barrier_pre(Register store_addr,
2163                                           Register new_val,
2164                                           Register pre_val,
2165                                           Register tmp1,
2166                                           Register tmp2) {
2167   Label done;
2168   Label runtime;
2169 
2170   if (store_addr != noreg) {
2171     assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg);
2172   } else {
2173     assert (new_val == noreg, "should be");
2174     assert_different_registers(pre_val, tmp1, tmp2, noreg);
2175   }
2176 
2177   Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
2178                                         SATBMarkQueue::byte_offset_of_active()));
2179   Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
2180                                   SATBMarkQueue::byte_offset_of_index()));
2181   Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
2182                                    SATBMarkQueue::byte_offset_of_buf()));
2183 
2184   // Is marking active?
2185   assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code");
2186   ldrb(tmp1, in_progress);
2187   cbz(tmp1, done);
2188 
2189   // Do we need to load the previous value?
2190   if (store_addr != noreg) {
2191     load_heap_oop(pre_val, Address(store_addr, 0));
2192   }
2193 
2194   // Is the previous value null?
2195   cbz(pre_val, done);
2196 
2197   // Can we store original value in the thread's buffer?
2198   // Is index == 0?
2199   // (The index field is typed as size_t.)
2200 
2201   ldr(tmp1, index);           // tmp1 := *index_adr
2202   ldr(tmp2, buffer);
2203 
2204   subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize
2205   b(runtime, lt);             // If negative, goto runtime
2206 
2207   str(tmp1, index);           // *index_adr := tmp1
2208 
2209   // Record the previous value
2210   str(pre_val, Address(tmp2, tmp1));
2211   b(done);
2212 
2213   bind(runtime);
2214 
2215   // save the live input values
2216 #ifdef AARCH64
2217   if (store_addr != noreg) {
2218     raw_push(store_addr, new_val);
2219   } else {
2220     raw_push(pre_val, ZR);
2221   }
2222 #else
2223   if (store_addr != noreg) {
2224     // avoid raw_push to support any ordering of store_addr and new_val
2225     push(RegisterSet(store_addr) | RegisterSet(new_val));
2226   } else {
2227     push(pre_val);
2228   }
2229 #endif // AARCH64
2230 
2231   if (pre_val != R0) {
2232     mov(R0, pre_val);
2233   }
2234   mov(R1, Rthread);
2235 
2236   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1);
2237 
2238 #ifdef AARCH64
2239   if (store_addr != noreg) {
2240     raw_pop(store_addr, new_val);
2241   } else {
2242     raw_pop(pre_val, ZR);
2243   }
2244 #else
2245   if (store_addr != noreg) {
2246     pop(RegisterSet(store_addr) | RegisterSet(new_val));
2247   } else {
2248     pop(pre_val);
2249   }
2250 #endif // AARCH64
2251 
2252   bind(done);
2253 }
2254 
2255 // G1 post-barrier.
2256 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
2257 void MacroAssembler::g1_write_barrier_post(Register store_addr,
2258                                            Register new_val,
2259                                            Register tmp1,
2260                                            Register tmp2,
2261                                            Register tmp3) {
2262 
2263   Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
2264                                         DirtyCardQueue::byte_offset_of_index()));
2265   Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
2266                                    DirtyCardQueue::byte_offset_of_buf()));
2267 
2268   BarrierSet* bs = Universe::heap()->barrier_set();
2269   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
2270   CardTable* ct = ctbs->card_table();
2271   Label done;
2272   Label runtime;
2273 
2274   // Does store cross heap regions?
2275 
2276   eor(tmp1, store_addr, new_val);
2277 #ifdef AARCH64
2278   logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
2279   cbz(tmp1, done);
2280 #else
2281   movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes));
2282   b(done, eq);
2283 #endif
2284 
2285   // crosses regions, storing NULL?
2286 
2287   cbz(new_val, done);
2288 
2289   // storing region crossing non-NULL, is card already dirty?
2290   const Register card_addr = tmp1;
2291   assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
2292 
2293   mov_address(tmp2, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference);
2294   add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift));
2295 
2296   ldrb(tmp2, Address(card_addr));
2297   cmp(tmp2, (int)G1CardTable::g1_young_card_val());
2298   b(done, eq);
2299 
2300   membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
2301 
2302   assert(CardTable::dirty_card_val() == 0, "adjust this code");
2303   ldrb(tmp2, Address(card_addr));
2304   cbz(tmp2, done);
2305 
2306   // storing a region crossing, non-NULL oop, card is clean.
2307   // dirty card and log.
2308 
2309   strb(zero_register(tmp2), Address(card_addr));
2310 
2311   ldr(tmp2, queue_index);
2312   ldr(tmp3, buffer);
2313 
2314   subs(tmp2, tmp2, wordSize);
2315   b(runtime, lt); // go to runtime if now negative
2316 
2317   str(tmp2, queue_index);
2318 
2319   str(card_addr, Address(tmp3, tmp2));
2320   b(done);
2321 
2322   bind(runtime);
2323 
2324   if (card_addr != R0) {
2325     mov(R0, card_addr);
2326   }
2327   mov(R1, Rthread);
2328   call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1);
2329 
2330   bind(done);
2331 }
2332 
2333 #endif // INCLUDE_ALL_GCS
2334 
2335 //////////////////////////////////////////////////////////////////////////////////
2336 
2337 #ifdef AARCH64
2338 
2339 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
2340   switch (size_in_bytes) {
2341     case  8: ldr(dst, src); break;
2342     case  4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
2343     case  2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
2344     case  1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
2345     default: ShouldNotReachHere();
2346   }
2347 }
2348 
2349 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2350   switch (size_in_bytes) {
2351     case  8: str(src, dst);    break;
2352     case  4: str_32(src, dst); break;
2353     case  2: strh(src, dst);   break;
2354     case  1: strb(src, dst);   break;
2355     default: ShouldNotReachHere();
2356   }
2357 }
2358 
2359 #else
2360 
2361 void MacroAssembler::load_sized_value(Register dst, Address src,
2362                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
2363   switch (size_in_bytes) {
2364     case  4: ldr(dst, src, cond); break;
2365     case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
2366     case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
2367     default: ShouldNotReachHere();
2368   }
2369 }
2370 
2371 
2372 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
2373   switch (size_in_bytes) {
2374     case  4: str(src, dst, cond); break;
2375     case  2: strh(src, dst, cond);   break;
2376     case  1: strb(src, dst, cond);   break;
2377     default: ShouldNotReachHere();
2378   }
2379 }
2380 #endif // AARCH64
2381 
2382 // Look up the method for a megamorphic invokeinterface call.
2383 // The target method is determined by <Rinterf, Rindex>.
2384 // The receiver klass is in Rklass.
2385 // On success, the result will be in method_result, and execution falls through.
2386 // On failure, execution transfers to the given label.
2387 void MacroAssembler::lookup_interface_method(Register Rklass,
2388                                              Register Rintf,
2389                                              RegisterOrConstant itable_index,
2390                                              Register method_result,
2391                                              Register Rscan,
2392                                              Register Rtmp,
2393                                              Label& L_no_such_interface) {
2394 
2395   assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
2396 
2397   const int entry_size = itableOffsetEntry::size() * HeapWordSize;
2398   assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience");
2399 
2400   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2401   const int base = in_bytes(Klass::vtable_start_offset());
2402   const int scale = exact_log2(vtableEntry::size_in_bytes());
2403   ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
2404   add(Rscan, Rklass, base);
2405   add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
2406 
2407   // Search through the itable for an interface equal to incoming Rintf
2408   // itable looks like [intface][offset][intface][offset][intface][offset]
2409 
2410   Label loop;
2411   bind(loop);
2412   ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
2413 #ifdef AARCH64
2414   Label found;
2415   cmp(Rtmp, Rintf);
2416   b(found, eq);
2417   cbnz(Rtmp, loop);
2418 #else
2419   cmp(Rtmp, Rintf);  // set ZF and CF if interface is found
2420   cmn(Rtmp, 0, ne);  // check if tmp == 0 and clear CF if it is
2421   b(loop, ne);
2422 #endif // AARCH64
2423 
2424 #ifdef AARCH64
2425   b(L_no_such_interface);
2426   bind(found);
2427 #else
2428   // CF == 0 means we reached the end of itable without finding icklass
2429   b(L_no_such_interface, cc);
2430 #endif // !AARCH64
2431 
2432   if (method_result != noreg) {
2433     // Interface found at previous position of Rscan, now load the method
2434     ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size));
2435     if (itable_index.is_register()) {
2436       add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
2437       assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
2438       assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
2439       ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
2440     } else {
2441       int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
2442                           itableMethodEntry::method_offset_in_bytes();
2443       add_slow(method_result, Rklass, method_offset);
2444       ldr(method_result, Address(method_result, Rtmp));
2445     }
2446   }
2447 }
2448 
2449 #ifdef COMPILER2
2450 // TODO: 8 bytes at a time? pre-fetch?
2451 // Compare char[] arrays aligned to 4 bytes.
2452 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
2453                                         Register limit, Register result,
2454                                       Register chr1, Register chr2, Label& Ldone) {
2455   Label Lvector, Lloop;
2456 
2457   // Note: limit contains number of bytes (2*char_elements) != 0.
2458   tst(limit, 0x2); // trailing character ?
2459   b(Lvector, eq);
2460 
2461   // compare the trailing char
2462   sub(limit, limit, sizeof(jchar));
2463   ldrh(chr1, Address(ary1, limit));
2464   ldrh(chr2, Address(ary2, limit));
2465   cmp(chr1, chr2);
2466   mov(result, 0, ne);     // not equal
2467   b(Ldone, ne);
2468 
2469   // only one char ?
2470   tst(limit, limit);
2471   mov(result, 1, eq);
2472   b(Ldone, eq);
2473 
2474   // word by word compare, dont't need alignment check
2475   bind(Lvector);
2476 
2477   // Shift ary1 and ary2 to the end of the arrays, negate limit
2478   add(ary1, limit, ary1);
2479   add(ary2, limit, ary2);
2480   neg(limit, limit);
2481 
2482   bind(Lloop);
2483   ldr_u32(chr1, Address(ary1, limit));
2484   ldr_u32(chr2, Address(ary2, limit));
2485   cmp_32(chr1, chr2);
2486   mov(result, 0, ne);     // not equal
2487   b(Ldone, ne);
2488   adds(limit, limit, 2*sizeof(jchar));
2489   b(Lloop, ne);
2490 
2491   // Caller should set it:
2492   // mov(result_reg, 1);  //equal
2493 }
2494 #endif
2495 
2496 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
2497   mov_slow(tmpreg1, counter_addr);
2498   ldr_s32(tmpreg2, tmpreg1);
2499   add_32(tmpreg2, tmpreg2, 1);
2500   str_32(tmpreg2, tmpreg1);
2501 }
2502 
2503 void MacroAssembler::floating_cmp(Register dst) {
2504 #ifdef AARCH64
2505   NOT_TESTED();
2506   cset(dst, gt);            // 1 if '>', else 0
2507   csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
2508 #else
2509   vmrs(dst, FPSCR);
2510   orr(dst, dst, 0x08000000);
2511   eor(dst, dst, AsmOperand(dst, lsl, 3));
2512   mov(dst, AsmOperand(dst, asr, 30));
2513 #endif
2514 }
2515 
2516 void MacroAssembler::restore_default_fp_mode() {
2517 #ifdef AARCH64
2518   msr(SysReg_FPCR, ZR);
2519 #else
2520 #ifndef __SOFTFP__
2521   // Round to Near mode, IEEE compatible, masked exceptions
2522   mov(Rtemp, 0);
2523   vmsr(FPSCR, Rtemp);
2524 #endif // !__SOFTFP__
2525 #endif // AARCH64
2526 }
2527 
2528 #ifndef AARCH64
2529 // 24-bit word range == 26-bit byte range
2530 bool check26(int offset) {
2531   // this could be simplified, but it mimics encoding and decoding
2532   // an actual branch insrtuction
2533   int off1 = offset << 6 >> 8;
2534   int encoded = off1 & ((1<<24)-1);
2535   int decoded = encoded << 8 >> 6;
2536   return offset == decoded;
2537 }
2538 #endif // !AARCH64
2539 
2540 // Perform some slight adjustments so the default 32MB code cache
2541 // is fully reachable.
2542 static inline address first_cache_address() {
2543   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
2544 }
2545 static inline address last_cache_address() {
2546   return CodeCache::high_bound() - Assembler::InstructionSize;
2547 }
2548 
2549 #ifdef AARCH64
2550 // Can we reach target using ADRP?
2551 bool MacroAssembler::page_reachable_from_cache(address target) {
2552   intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
2553   intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
2554   intptr_t addr = (intptr_t)target & ~0xfff;
2555 
2556   intptr_t loffset = addr - cl;
2557   intptr_t hoffset = addr - ch;
2558   return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
2559 }
2560 #endif
2561 
2562 // Can we reach target using unconditional branch or call from anywhere
2563 // in the code cache (because code can be relocated)?
2564 bool MacroAssembler::_reachable_from_cache(address target) {
2565 #ifdef __thumb__
2566   if ((1 & (intptr_t)target) != 0) {
2567     // Return false to avoid 'b' if we need switching to THUMB mode.
2568     return false;
2569   }
2570 #endif
2571 
2572   address cl = first_cache_address();
2573   address ch = last_cache_address();
2574 
2575   if (ForceUnreachable) {
2576     // Only addresses from CodeCache can be treated as reachable.
2577     if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
2578       return false;
2579     }
2580   }
2581 
2582   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
2583   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
2584 
2585 #ifdef AARCH64
2586   return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
2587 #else
2588   return check26(loffset - 8) && check26(hoffset - 8);
2589 #endif
2590 }
2591 
2592 bool MacroAssembler::reachable_from_cache(address target) {
2593   assert(CodeCache::contains(pc()), "not supported");
2594   return _reachable_from_cache(target);
2595 }
2596 
2597 // Can we reach the entire code cache from anywhere else in the code cache?
2598 bool MacroAssembler::_cache_fully_reachable() {
2599   address cl = first_cache_address();
2600   address ch = last_cache_address();
2601   return _reachable_from_cache(cl) && _reachable_from_cache(ch);
2602 }
2603 
2604 bool MacroAssembler::cache_fully_reachable() {
2605   assert(CodeCache::contains(pc()), "not supported");
2606   return _cache_fully_reachable();
2607 }
2608 
2609 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2610   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2611   if (reachable_from_cache(target)) {
2612     relocate(rtype);
2613     b(target NOT_AARCH64_ARG(cond));
2614     return;
2615   }
2616 
2617   // Note: relocate is not needed for the code below,
2618   // encoding targets in absolute format.
2619   if (ignore_non_patchable_relocations()) {
2620     rtype = relocInfo::none;
2621   }
2622 
2623 #ifdef AARCH64
2624   assert (scratch != noreg, "should be specified");
2625   InlinedAddress address_literal(target, rtype);
2626   ldr_literal(scratch, address_literal);
2627   br(scratch);
2628   int off = offset();
2629   bind_literal(address_literal);
2630 #ifdef COMPILER2
2631   if (offset() - off == wordSize) {
2632     // no padding, so insert nop for worst-case sizing
2633     nop();
2634   }
2635 #endif
2636 #else
2637   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
2638     // Note: this version cannot be (atomically) patched
2639     mov_slow(scratch, (intptr_t)target, cond);
2640     bx(scratch, cond);
2641   } else {
2642     Label skip;
2643     InlinedAddress address_literal(target);
2644     if (cond != al) {
2645       b(skip, inverse(cond));
2646     }
2647     relocate(rtype);
2648     ldr_literal(PC, address_literal);
2649     bind_literal(address_literal);
2650     bind(skip);
2651   }
2652 #endif // AARCH64
2653 }
2654 
2655 // Similar to jump except that:
2656 // - near calls are valid only if any destination in the cache is near
2657 // - no movt/movw (not atomically patchable)
2658 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2659   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2660   if (cache_fully_reachable()) {
2661     // Note: this assumes that all possible targets (the initial one
2662     // and the addressed patched to) are all in the code cache.
2663     assert(CodeCache::contains(target), "target might be too far");
2664     relocate(rtype);
2665     b(target NOT_AARCH64_ARG(cond));
2666     return;
2667   }
2668 
2669   // Discard the relocation information if not needed for CacheCompiledCode
2670   // since the next encodings are all in absolute format.
2671   if (ignore_non_patchable_relocations()) {
2672     rtype = relocInfo::none;
2673   }
2674 
2675 #ifdef AARCH64
2676   assert (scratch != noreg, "should be specified");
2677   InlinedAddress address_literal(target);
2678   relocate(rtype);
2679   ldr_literal(scratch, address_literal);
2680   br(scratch);
2681   int off = offset();
2682   bind_literal(address_literal);
2683 #ifdef COMPILER2
2684   if (offset() - off == wordSize) {
2685     // no padding, so insert nop for worst-case sizing
2686     nop();
2687   }
2688 #endif
2689 #else
2690   {
2691     Label skip;
2692     InlinedAddress address_literal(target);
2693     if (cond != al) {
2694       b(skip, inverse(cond));
2695     }
2696     relocate(rtype);
2697     ldr_literal(PC, address_literal);
2698     bind_literal(address_literal);
2699     bind(skip);
2700   }
2701 #endif // AARCH64
2702 }
2703 
2704 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
2705   Register scratch = LR;
2706   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
2707   if (reachable_from_cache(target)) {
2708     relocate(rspec);
2709     bl(target NOT_AARCH64_ARG(cond));
2710     return;
2711   }
2712 
2713   // Note: relocate is not needed for the code below,
2714   // encoding targets in absolute format.
2715   if (ignore_non_patchable_relocations()) {
2716     // This assumes the information was needed only for relocating the code.
2717     rspec = RelocationHolder::none;
2718   }
2719 
2720 #ifndef AARCH64
2721   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
2722     // Note: this version cannot be (atomically) patched
2723     mov_slow(scratch, (intptr_t)target, cond);
2724     blx(scratch, cond);
2725     return;
2726   }
2727 #endif
2728 
2729   {
2730     Label ret_addr;
2731 #ifndef AARCH64
2732     if (cond != al) {
2733       b(ret_addr, inverse(cond));
2734     }
2735 #endif
2736 
2737 
2738 #ifdef AARCH64
2739     // TODO-AARCH64: make more optimal implementation
2740     // [ Keep in sync with MacroAssembler::call_size ]
2741     assert(rspec.type() == relocInfo::none, "call reloc not implemented");
2742     mov_slow(scratch, target);
2743     blr(scratch);
2744 #else
2745     InlinedAddress address_literal(target);
2746     relocate(rspec);
2747     adr(LR, ret_addr);
2748     ldr_literal(PC, address_literal);
2749 
2750     bind_literal(address_literal);
2751     bind(ret_addr);
2752 #endif
2753   }
2754 }
2755 
2756 #if defined(AARCH64) && defined(COMPILER2)
2757 int MacroAssembler::call_size(address target, bool far, bool patchable) {
2758   // FIXME: mov_slow is variable-length
2759   if (!far) return 1; // bl
2760   if (patchable) return 2;  // ldr; blr
2761   return instr_count_for_mov_slow((intptr_t)target) + 1;
2762 }
2763 #endif
2764 
2765 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
2766   assert(rspec.type() == relocInfo::static_call_type ||
2767          rspec.type() == relocInfo::none ||
2768          rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
2769 
2770   // Always generate the relocation information, needed for patching
2771   relocate(rspec); // used by NativeCall::is_call_before()
2772   if (cache_fully_reachable()) {
2773     // Note: this assumes that all possible targets (the initial one
2774     // and the addresses patched to) are all in the code cache.
2775     assert(CodeCache::contains(target), "target might be too far");
2776     bl(target);
2777   } else {
2778 #if defined(AARCH64) && defined(COMPILER2)
2779     if (c2) {
2780       // return address needs to match call_size().
2781       // no need to trash Rtemp
2782       int off = offset();
2783       Label skip_literal;
2784       InlinedAddress address_literal(target);
2785       ldr_literal(LR, address_literal);
2786       blr(LR);
2787       int ret_addr_offset = offset();
2788       assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
2789       b(skip_literal);
2790       int off2 = offset();
2791       bind_literal(address_literal);
2792       if (offset() - off2 == wordSize) {
2793         // no padding, so insert nop for worst-case sizing
2794         nop();
2795       }
2796       bind(skip_literal);
2797       return ret_addr_offset;
2798     }
2799 #endif
2800     Label ret_addr;
2801     InlinedAddress address_literal(target);
2802 #ifdef AARCH64
2803     ldr_literal(Rtemp, address_literal);
2804     adr(LR, ret_addr);
2805     br(Rtemp);
2806 #else
2807     adr(LR, ret_addr);
2808     ldr_literal(PC, address_literal);
2809 #endif
2810     bind_literal(address_literal);
2811     bind(ret_addr);
2812   }
2813   return offset();
2814 }
2815 
2816 // ((OopHandle)result).resolve();
2817 void MacroAssembler::resolve_oop_handle(Register result) {
2818   // OopHandle::resolve is an indirection.
2819   ldr(result, Address(result, 0));
2820 }
2821 
2822 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
2823   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2824   ldr(tmp, Address(method, Method::const_offset()));
2825   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
2826   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
2827   ldr(mirror, Address(tmp, mirror_offset));
2828   resolve_oop_handle(mirror);
2829 }
2830 
2831 
2832 ///////////////////////////////////////////////////////////////////////////////
2833 
2834 // Compressed pointers
2835 
2836 #ifdef AARCH64
2837 
2838 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
2839   if (UseCompressedClassPointers) {
2840     ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2841     decode_klass_not_null(dst_klass);
2842   } else {
2843     ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2844   }
2845 }
2846 
2847 #else
2848 
2849 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
2850   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
2851 }
2852 
2853 #endif // AARCH64
2854 
2855 // Blows src_klass.
2856 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
2857 #ifdef AARCH64
2858   if (UseCompressedClassPointers) {
2859     assert(src_klass != dst_oop, "not enough registers");
2860     encode_klass_not_null(src_klass);
2861     str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2862     return;
2863   }
2864 #endif // AARCH64
2865   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2866 }
2867 
2868 #ifdef AARCH64
2869 
2870 void MacroAssembler::store_klass_gap(Register dst) {
2871   if (UseCompressedClassPointers) {
2872     str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2873   }
2874 }
2875 
2876 #endif // AARCH64
2877 
2878 
2879 void MacroAssembler::load_heap_oop(Register dst, Address src) {
2880 #ifdef AARCH64
2881   if (UseCompressedOops) {
2882     ldr_w(dst, src);
2883     decode_heap_oop(dst);
2884     return;
2885   }
2886 #endif // AARCH64
2887   ldr(dst, src);
2888 }
2889 
2890 // Blows src and flags.
2891 void MacroAssembler::store_heap_oop(Register src, Address dst) {
2892 #ifdef AARCH64
2893   if (UseCompressedOops) {
2894     assert(!dst.uses(src), "not enough registers");
2895     encode_heap_oop(src);
2896     str_w(src, dst);
2897     return;
2898   }
2899 #endif // AARCH64
2900   str(src, dst);
2901 }
2902 
2903 void MacroAssembler::store_heap_oop_null(Register src, Address dst) {
2904 #ifdef AARCH64
2905   if (UseCompressedOops) {
2906     str_w(src, dst);
2907     return;
2908   }
2909 #endif // AARCH64
2910   str(src, dst);
2911 }
2912 
2913 
2914 #ifdef AARCH64
2915 
2916 // Algorithm must match oop.inline.hpp encode_heap_oop.
2917 void MacroAssembler::encode_heap_oop(Register dst, Register src) {
2918   // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
2919   // Update it at modifications.
2920   assert (UseCompressedOops, "must be compressed");
2921   assert (Universe::heap() != NULL, "java heap should be initialized");
2922 #ifdef ASSERT
2923   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2924 #endif
2925   verify_oop(src);
2926   if (Universe::narrow_oop_base() == NULL) {
2927     if (Universe::narrow_oop_shift() != 0) {
2928       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2929       _lsr(dst, src, Universe::narrow_oop_shift());
2930     } else if (dst != src) {
2931       mov(dst, src);
2932     }
2933   } else {
2934     tst(src, src);
2935     csel(dst, Rheap_base, src, eq);
2936     sub(dst, dst, Rheap_base);
2937     if (Universe::narrow_oop_shift() != 0) {
2938       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2939       _lsr(dst, dst, Universe::narrow_oop_shift());
2940     }
2941   }
2942 }
2943 
2944 // Same algorithm as oop.inline.hpp decode_heap_oop.
2945 void MacroAssembler::decode_heap_oop(Register dst, Register src) {
2946 #ifdef ASSERT
2947   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2948 #endif
2949   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2950   if (Universe::narrow_oop_base() != NULL) {
2951     tst(src, src);
2952     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2953     csel(dst, dst, ZR, ne);
2954   } else {
2955     _lsl(dst, src, Universe::narrow_oop_shift());
2956   }
2957   verify_oop(dst);
2958 }
2959 
2960 #ifdef COMPILER2
2961 // Algorithm must match oop.inline.hpp encode_heap_oop.
2962 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule
2963 // must be changed.
2964 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
2965   assert (UseCompressedOops, "must be compressed");
2966   assert (Universe::heap() != NULL, "java heap should be initialized");
2967 #ifdef ASSERT
2968   verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2969 #endif
2970   verify_oop(src);
2971   if (Universe::narrow_oop_base() == NULL) {
2972     if (Universe::narrow_oop_shift() != 0) {
2973       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2974       _lsr(dst, src, Universe::narrow_oop_shift());
2975     } else if (dst != src) {
2976           mov(dst, src);
2977     }
2978   } else {
2979     sub(dst, src, Rheap_base);
2980     if (Universe::narrow_oop_shift() != 0) {
2981       assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2982       _lsr(dst, dst, Universe::narrow_oop_shift());
2983     }
2984   }
2985 }
2986 
2987 // Same algorithm as oops.inline.hpp decode_heap_oop.
2988 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule
2989 // must be changed.
2990 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
2991 #ifdef ASSERT
2992   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
2993 #endif
2994   assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
2995   if (Universe::narrow_oop_base() != NULL) {
2996     add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
2997   } else {
2998     _lsl(dst, src, Universe::narrow_oop_shift());
2999   }
3000   verify_oop(dst);
3001 }
3002 
3003 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
3004   assert(UseCompressedClassPointers, "should only be used for compressed header");
3005   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
3006   int klass_index = oop_recorder()->find_index(k);
3007   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
3008 
3009   // Relocation with special format (see relocInfo_arm.hpp).
3010   relocate(rspec);
3011   narrowKlass encoded_k = Klass::encode_klass(k);
3012   movz(dst, encoded_k & 0xffff, 0);
3013   movk(dst, (encoded_k >> 16) & 0xffff, 16);
3014 }
3015 
3016 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
3017   assert(UseCompressedOops, "should only be used for compressed header");
3018   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
3019   int oop_index = oop_recorder()->find_index(obj);
3020   RelocationHolder rspec = oop_Relocation::spec(oop_index);
3021 
3022   relocate(rspec);
3023   movz(dst, 0xffff, 0);
3024   movk(dst, 0xffff, 16);
3025 }
3026 
3027 #endif // COMPILER2
3028 // Must preserve condition codes, or C2 encodeKlass_not_null rule
3029 // must be changed.
3030 void MacroAssembler::encode_klass_not_null(Register r) {
3031   if (Universe::narrow_klass_base() != NULL) {
3032     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
3033     assert(r != Rheap_base, "Encoding a klass in Rheap_base");
3034     mov_slow(Rheap_base, Universe::narrow_klass_base());
3035     sub(r, r, Rheap_base);
3036   }
3037   if (Universe::narrow_klass_shift() != 0) {
3038     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3039     _lsr(r, r, Universe::narrow_klass_shift());
3040   }
3041   if (Universe::narrow_klass_base() != NULL) {
3042     reinit_heapbase();
3043   }
3044 }
3045 
3046 // Must preserve condition codes, or C2 encodeKlass_not_null rule
3047 // must be changed.
3048 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3049   if (dst == src) {
3050     encode_klass_not_null(src);
3051     return;
3052   }
3053   if (Universe::narrow_klass_base() != NULL) {
3054     mov_slow(dst, (int64_t)Universe::narrow_klass_base());
3055     sub(dst, src, dst);
3056     if (Universe::narrow_klass_shift() != 0) {
3057       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3058       _lsr(dst, dst, Universe::narrow_klass_shift());
3059     }
3060   } else {
3061     if (Universe::narrow_klass_shift() != 0) {
3062       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3063       _lsr(dst, src, Universe::narrow_klass_shift());
3064     } else {
3065       mov(dst, src);
3066     }
3067   }
3068 }
3069 
3070 // Function instr_count_for_decode_klass_not_null() counts the instructions
3071 // generated by decode_klass_not_null(register r) and reinit_heapbase(),
3072 // when (Universe::heap() != NULL).  Hence, if the instructions they
3073 // generate change, then this method needs to be updated.
3074 int MacroAssembler::instr_count_for_decode_klass_not_null() {
3075   assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3076   assert(Universe::heap() != NULL, "java heap should be initialized");
3077   if (Universe::narrow_klass_base() != NULL) {
3078     return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
3079       1 +                                                                 // add
3080       instr_count_for_mov_slow(Universe::narrow_ptrs_base());   // reinit_heapbase() = mov_slow
3081   } else {
3082     if (Universe::narrow_klass_shift() != 0) {
3083       return 1;
3084     }
3085   }
3086   return 0;
3087 }
3088 
3089 // Must preserve condition codes, or C2 decodeKlass_not_null rule
3090 // must be changed.
3091 void MacroAssembler::decode_klass_not_null(Register r) {
3092   int off = offset();
3093   assert(UseCompressedClassPointers, "should only be used for compressed headers");
3094   assert(Universe::heap() != NULL, "java heap should be initialized");
3095   assert(r != Rheap_base, "Decoding a klass in Rheap_base");
3096   // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
3097   // Also do not verify_oop as this is called by verify_oop.
3098   if (Universe::narrow_klass_base() != NULL) {
3099     // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
3100     mov_slow(Rheap_base, Universe::narrow_klass_base());
3101     add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
3102     reinit_heapbase();
3103   } else {
3104     if (Universe::narrow_klass_shift() != 0) {
3105       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3106       _lsl(r, r, Universe::narrow_klass_shift());
3107     }
3108   }
3109   assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
3110 }
3111 
3112 // Must preserve condition codes, or C2 decodeKlass_not_null rule
3113 // must be changed.
3114 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3115   if (src == dst) {
3116     decode_klass_not_null(src);
3117     return;
3118   }
3119 
3120   assert(UseCompressedClassPointers, "should only be used for compressed headers");
3121   assert(Universe::heap() != NULL, "java heap should be initialized");
3122   assert(src != Rheap_base, "Decoding a klass in Rheap_base");
3123   assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
3124   // Also do not verify_oop as this is called by verify_oop.
3125   if (Universe::narrow_klass_base() != NULL) {
3126     mov_slow(dst, Universe::narrow_klass_base());
3127     add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
3128   } else {
3129     _lsl(dst, src, Universe::narrow_klass_shift());
3130   }
3131 }
3132 
3133 
3134 void MacroAssembler::reinit_heapbase() {
3135   if (UseCompressedOops || UseCompressedClassPointers) {
3136     if (Universe::heap() != NULL) {
3137       mov_slow(Rheap_base, Universe::narrow_ptrs_base());
3138     } else {
3139       ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
3140     }
3141   }
3142 }
3143 
3144 #ifdef ASSERT
3145 void MacroAssembler::verify_heapbase(const char* msg) {
3146   // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
3147   // Update it at modifications.
3148   assert (UseCompressedOops, "should be compressed");
3149   assert (Universe::heap() != NULL, "java heap should be initialized");
3150   if (CheckCompressedOops) {
3151     Label ok;
3152     str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
3153     raw_push(Rtemp, ZR);
3154     mrs(Rtemp, Assembler::SysReg_NZCV);
3155     str(Rtemp, Address(SP, 1 * wordSize));
3156     mov_slow(Rtemp, Universe::narrow_ptrs_base());
3157     cmp(Rheap_base, Rtemp);
3158     b(ok, eq);
3159     stop(msg);
3160     bind(ok);
3161     ldr(Rtemp, Address(SP, 1 * wordSize));
3162     msr(Assembler::SysReg_NZCV, Rtemp);
3163     raw_pop(Rtemp, ZR);
3164     str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
3165   }
3166 }
3167 #endif // ASSERT
3168 
3169 #endif // AARCH64
3170 
3171 #ifdef COMPILER2
3172 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
3173 {
3174   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3175 
3176   Register Rmark      = Rscratch2;
3177 
3178   assert(Roop != Rscratch, "");
3179   assert(Roop != Rmark, "");
3180   assert(Rbox != Rscratch, "");
3181   assert(Rbox != Rmark, "");
3182 
3183   Label fast_lock, done;
3184 
3185   if (UseBiasedLocking && !UseOptoBiasInlining) {
3186     Label failed;
3187 #ifdef AARCH64
3188     biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed);
3189 #else
3190     biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
3191 #endif
3192     bind(failed);
3193   }
3194 
3195   ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
3196   tst(Rmark, markOopDesc::unlocked_value);
3197   b(fast_lock, ne);
3198 
3199   // Check for recursive lock
3200   // See comments in InterpreterMacroAssembler::lock_object for
3201   // explanations on the fast recursive locking check.
3202 #ifdef AARCH64
3203   intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
3204   Assembler::LogicalImmediate imm(mask, false);
3205   mov(Rscratch, SP);
3206   sub(Rscratch, Rmark, Rscratch);
3207   ands(Rscratch, Rscratch, imm);
3208   b(done, ne); // exit with failure
3209   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero
3210   b(done);
3211 
3212 #else
3213   // -1- test low 2 bits
3214   movs(Rscratch, AsmOperand(Rmark, lsl, 30));
3215   // -2- test (hdr - SP) if the low two bits are 0
3216   sub(Rscratch, Rmark, SP, eq);
3217   movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
3218   // If still 'eq' then recursive locking OK
3219   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero
3220   b(done);
3221 #endif
3222 
3223   bind(fast_lock);
3224   str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3225 
3226   bool allow_fallthrough_on_failure = true;
3227   bool one_shot = true;
3228   cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3229 
3230   bind(done);
3231 
3232 }
3233 
3234 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2  AARCH64_ONLY_ARG(Register Rscratch3))
3235 {
3236   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3237 
3238   Register Rmark      = Rscratch2;
3239 
3240   assert(Roop != Rscratch, "");
3241   assert(Roop != Rmark, "");
3242   assert(Rbox != Rscratch, "");
3243   assert(Rbox != Rmark, "");
3244 
3245   Label done;
3246 
3247   if (UseBiasedLocking && !UseOptoBiasInlining) {
3248     biased_locking_exit(Roop, Rscratch, done);
3249   }
3250 
3251   ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3252   // If hdr is NULL, we've got recursive locking and there's nothing more to do
3253   cmp(Rmark, 0);
3254   b(done, eq);
3255 
3256   // Restore the object header
3257   bool allow_fallthrough_on_failure = true;
3258   bool one_shot = true;
3259   cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3260 
3261   bind(done);
3262 
3263 }
3264 #endif // COMPILER2