1 /*
   2  * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "ci/ciEnv.hpp"
  30 #include "code/nativeInst.hpp"
  31 #include "compiler/disassembler.hpp"
  32 #include "gc/shared/barrierSet.hpp"
  33 #include "gc/shared/cardTable.hpp"
  34 #include "gc/shared/barrierSetAssembler.hpp"
  35 #include "gc/shared/cardTableBarrierSet.hpp"
  36 #include "gc/shared/collectedHeap.inline.hpp"
  37 #include "interpreter/interpreter.hpp"
  38 #include "memory/resourceArea.hpp"
  39 #include "oops/accessDecorators.hpp"
  40 #include "oops/klass.inline.hpp"
  41 #include "prims/methodHandles.hpp"
  42 #include "runtime/biasedLocking.hpp"
  43 #include "runtime/interfaceSupport.inline.hpp"
  44 #include "runtime/objectMonitor.hpp"
  45 #include "runtime/os.hpp"
  46 #include "runtime/sharedRuntime.hpp"
  47 #include "runtime/stubRoutines.hpp"
  48 #include "utilities/macros.hpp"
  49 
  50 // Implementation of AddressLiteral
  51 
  52 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
  53   switch (rtype) {
  54   case relocInfo::oop_type:
  55     // Oops are a special case. Normally they would be their own section
  56     // but in cases like icBuffer they are literals in the code stream that
  57     // we don't have a section for. We use none so that we get a literal address
  58     // which is always patchable.
  59     break;
  60   case relocInfo::external_word_type:
  61     _rspec = external_word_Relocation::spec(_target);
  62     break;
  63   case relocInfo::internal_word_type:
  64     _rspec = internal_word_Relocation::spec(_target);
  65     break;
  66   case relocInfo::opt_virtual_call_type:
  67     _rspec = opt_virtual_call_Relocation::spec();
  68     break;
  69   case relocInfo::static_call_type:
  70     _rspec = static_call_Relocation::spec();
  71     break;
  72   case relocInfo::runtime_call_type:
  73     _rspec = runtime_call_Relocation::spec();
  74     break;
  75   case relocInfo::poll_type:
  76   case relocInfo::poll_return_type:
  77     _rspec = Relocation::spec_simple(rtype);
  78     break;
  79   case relocInfo::none:
  80     break;
  81   default:
  82     ShouldNotReachHere();
  83     break;
  84   }
  85 }
  86 
  87 // Initially added to the Assembler interface as a pure virtual:
  88 //   RegisterConstant delayed_value(..)
  89 // for:
  90 //   6812678 macro assembler needs delayed binding of a few constants (for 6655638)
  91 // this was subsequently modified to its present name and return type
  92 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
  93                                                       Register tmp,
  94                                                       int offset) {
  95   ShouldNotReachHere();
  96   return RegisterOrConstant(-1);
  97 }
  98 
  99 
 100 
 101 
 102 // virtual method calling
 103 void MacroAssembler::lookup_virtual_method(Register recv_klass,
 104                                            Register vtable_index,
 105                                            Register method_result) {
 106   const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
 107   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
 108   add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
 109   ldr(method_result, Address(recv_klass, base_offset));
 110 }
 111 
 112 
 113 // Simplified, combined version, good for typical uses.
 114 // Falls through on failure.
 115 void MacroAssembler::check_klass_subtype(Register sub_klass,
 116                                          Register super_klass,
 117                                          Register temp_reg,
 118                                          Register temp_reg2,
 119                                          Register temp_reg3,
 120                                          Label& L_success) {
 121   Label L_failure;
 122   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
 123   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
 124   bind(L_failure);
 125 };
 126 
 127 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
 128                                                    Register super_klass,
 129                                                    Register temp_reg,
 130                                                    Register temp_reg2,
 131                                                    Label* L_success,
 132                                                    Label* L_failure,
 133                                                    Label* L_slow_path) {
 134 
 135   assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
 136   const Register super_check_offset = temp_reg2;
 137 
 138   Label L_fallthrough;
 139   int label_nulls = 0;
 140   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 141   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 142   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
 143   assert(label_nulls <= 1, "at most one NULL in the batch");
 144 
 145   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 146   int sco_offset = in_bytes(Klass::super_check_offset_offset());
 147   Address super_check_offset_addr(super_klass, sco_offset);
 148 
 149   // If the pointers are equal, we are done (e.g., String[] elements).
 150   // This self-check enables sharing of secondary supertype arrays among
 151   // non-primary types such as array-of-interface.  Otherwise, each such
 152   // type would need its own customized SSA.
 153   // We move this check to the front of the fast path because many
 154   // type checks are in fact trivially successful in this manner,
 155   // so we get a nicely predicted branch right at the start of the check.
 156   cmp(sub_klass, super_klass);
 157   b(*L_success, eq);
 158 
 159   // Check the supertype display:
 160   ldr_u32(super_check_offset, super_check_offset_addr);
 161 
 162   Address super_check_addr(sub_klass, super_check_offset);
 163   ldr(temp_reg, super_check_addr);
 164   cmp(super_klass, temp_reg); // load displayed supertype
 165 
 166   // This check has worked decisively for primary supers.
 167   // Secondary supers are sought in the super_cache ('super_cache_addr').
 168   // (Secondary supers are interfaces and very deeply nested subtypes.)
 169   // This works in the same check above because of a tricky aliasing
 170   // between the super_cache and the primary super display elements.
 171   // (The 'super_check_addr' can address either, as the case requires.)
 172   // Note that the cache is updated below if it does not help us find
 173   // what we need immediately.
 174   // So if it was a primary super, we can just fail immediately.
 175   // Otherwise, it's the slow path for us (no success at this point).
 176 
 177   b(*L_success, eq);
 178   cmp_32(super_check_offset, sc_offset);
 179   if (L_failure == &L_fallthrough) {
 180     b(*L_slow_path, eq);
 181   } else {
 182     b(*L_failure, ne);
 183     if (L_slow_path != &L_fallthrough) {
 184       b(*L_slow_path);
 185     }
 186   }
 187 
 188   bind(L_fallthrough);
 189 }
 190 
 191 
 192 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
 193                                                    Register super_klass,
 194                                                    Register temp_reg,
 195                                                    Register temp2_reg,
 196                                                    Register temp3_reg,
 197                                                    Label* L_success,
 198                                                    Label* L_failure,
 199                                                    bool set_cond_codes) {
 200   // Note: if used by code that expects a register to be 0 on success,
 201   // this register must be temp_reg and set_cond_codes must be true
 202 
 203   Register saved_reg = noreg;
 204 
 205   // get additional tmp registers
 206   if (temp3_reg == noreg) {
 207     saved_reg = temp3_reg = LR;
 208     push(saved_reg);
 209   }
 210 
 211   assert(temp2_reg != noreg, "need all the temporary registers");
 212   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
 213 
 214   Register cmp_temp = temp_reg;
 215   Register scan_temp = temp3_reg;
 216   Register count_temp = temp2_reg;
 217 
 218   Label L_fallthrough;
 219   int label_nulls = 0;
 220   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
 221   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 222   assert(label_nulls <= 1, "at most one NULL in the batch");
 223 
 224   // a couple of useful fields in sub_klass:
 225   int ss_offset = in_bytes(Klass::secondary_supers_offset());
 226   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 227   Address secondary_supers_addr(sub_klass, ss_offset);
 228   Address super_cache_addr(     sub_klass, sc_offset);
 229 
 230 #ifndef PRODUCT
 231   inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
 232 #endif
 233 
 234   // We will consult the secondary-super array.
 235   ldr(scan_temp, Address(sub_klass, ss_offset));
 236 
 237   assert(! UseCompressedOops, "search_key must be the compressed super_klass");
 238   // else search_key is the
 239   Register search_key = super_klass;
 240 
 241   // Load the array length.
 242   ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
 243   add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
 244 
 245   add(count_temp, count_temp, 1);
 246 
 247   Label L_loop, L_fail;
 248 
 249   // Top of search loop
 250   bind(L_loop);
 251   // Notes:
 252   //  scan_temp starts at the array elements
 253   //  count_temp is 1+size
 254   subs(count_temp, count_temp, 1);
 255   if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
 256     // direct jump to L_failure if failed and no cleanup needed
 257     b(*L_failure, eq); // not found and
 258   } else {
 259     b(L_fail, eq); // not found in the array
 260   }
 261 
 262   // Load next super to check
 263   // In the array of super classes elements are pointer sized.
 264   int element_size = wordSize;
 265   ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
 266 
 267   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
 268   subs(cmp_temp, cmp_temp, search_key);
 269 
 270   // A miss means we are NOT a subtype and need to keep looping
 271   b(L_loop, ne);
 272 
 273   // Falling out the bottom means we found a hit; we ARE a subtype
 274 
 275   // Note: temp_reg/cmp_temp is already 0 and flag Z is set
 276 
 277   // Success.  Cache the super we found and proceed in triumph.
 278   str(super_klass, Address(sub_klass, sc_offset));
 279 
 280   if (saved_reg != noreg) {
 281     // Return success
 282     pop(saved_reg);
 283   }
 284 
 285   b(*L_success);
 286 
 287   bind(L_fail);
 288   // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
 289   if (set_cond_codes) {
 290     movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
 291   }
 292   if (saved_reg != noreg) {
 293     pop(saved_reg);
 294   }
 295   if (L_failure != &L_fallthrough) {
 296     b(*L_failure);
 297   }
 298 
 299   bind(L_fallthrough);
 300 }
 301 
 302 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
 303 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
 304   assert_different_registers(params_base, params_count);
 305   add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
 306   return Address(tmp, -Interpreter::stackElementSize);
 307 }
 308 
 309 
 310 void MacroAssembler::align(int modulus) {
 311   while (offset() % modulus != 0) {
 312     nop();
 313   }
 314 }
 315 
 316 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
 317                                         Register last_java_fp,
 318                                         bool save_last_java_pc,
 319                                         Register tmp) {
 320   int pc_offset;
 321   if (last_java_fp != noreg) {
 322     // optional
 323     str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
 324     _fp_saved = true;
 325   } else {
 326     _fp_saved = false;
 327   }
 328   if (save_last_java_pc) {
 329     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
 330     pc_offset = offset() + VM_Version::stored_pc_adjustment();
 331     _pc_saved = true;
 332   } else {
 333     _pc_saved = false;
 334     pc_offset = -1;
 335   }
 336   // According to comment in javaFrameAnchorm SP must be saved last, so that other
 337   // entries are valid when SP is set.
 338 
 339   // However, this is probably not a strong constrainst since for instance PC is
 340   // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
 341   // we now write the fields in the expected order but we have not added a StoreStore
 342   // barrier.
 343 
 344   // XXX: if the ordering is really important, PC should always be saved (without forgetting
 345   // to update oop_map offsets) and a StoreStore barrier might be needed.
 346 
 347   if (last_java_sp == noreg) {
 348     last_java_sp = SP; // always saved
 349   }
 350   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 351 
 352   return pc_offset; // for oopmaps
 353 }
 354 
 355 void MacroAssembler::reset_last_Java_frame(Register tmp) {
 356   const Register Rzero = zero_register(tmp);
 357   str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
 358   if (_fp_saved) {
 359     str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
 360   }
 361   if (_pc_saved) {
 362     str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
 363   }
 364 }
 365 
 366 
 367 // Implementation of call_VM versions
 368 
 369 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
 370   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 371   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
 372 
 373   // Safer to save R9 here since callers may have been written
 374   // assuming R9 survives. This is suboptimal but is not worth
 375   // optimizing for the few platforms where R9 is scratched.
 376   push(RegisterSet(R4) | R9ifScratched);
 377   mov(R4, SP);
 378   bic(SP, SP, StackAlignmentInBytes - 1);
 379   call(entry_point, relocInfo::runtime_call_type);
 380   mov(SP, R4);
 381   pop(RegisterSet(R4) | R9ifScratched);
 382 }
 383 
 384 
 385 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
 386   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 387   assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
 388 
 389   const Register tmp = Rtemp;
 390   assert_different_registers(oop_result, tmp);
 391 
 392   set_last_Java_frame(SP, FP, true, tmp);
 393 
 394 #if R9_IS_SCRATCHED
 395   // Safer to save R9 here since callers may have been written
 396   // assuming R9 survives. This is suboptimal but is not worth
 397   // optimizing for the few platforms where R9 is scratched.
 398 
 399   // Note: cannot save R9 above the saved SP (some calls expect for
 400   // instance the Java stack top at the saved SP)
 401   // => once saved (with set_last_Java_frame), decrease SP before rounding to
 402   // ensure the slot at SP will be free for R9).
 403   sub(SP, SP, 4);
 404   bic(SP, SP, StackAlignmentInBytes - 1);
 405   str(R9, Address(SP, 0));
 406 #else
 407   bic(SP, SP, StackAlignmentInBytes - 1);
 408 #endif // R9_IS_SCRATCHED
 409 
 410   mov(R0, Rthread);
 411   call(entry_point, relocInfo::runtime_call_type);
 412 
 413 #if R9_IS_SCRATCHED
 414   ldr(R9, Address(SP, 0));
 415 #endif
 416   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
 417 
 418   reset_last_Java_frame(tmp);
 419 
 420   // C++ interp handles this in the interpreter
 421   check_and_handle_popframe();
 422   check_and_handle_earlyret();
 423 
 424   if (check_exceptions) {
 425     // check for pending exceptions
 426     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
 427     cmp(tmp, 0);
 428     mov(Rexception_pc, PC, ne);
 429     b(StubRoutines::forward_exception_entry(), ne);
 430   }
 431 
 432   // get oop result if there is one and reset the value in the thread
 433   if (oop_result->is_valid()) {
 434     get_vm_result(oop_result, tmp);
 435   }
 436 }
 437 
 438 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
 439   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 440 }
 441 
 442 
 443 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
 444   assert (arg_1 == R1, "fixed register for arg_1");
 445   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 446 }
 447 
 448 
 449 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 450   assert (arg_1 == R1, "fixed register for arg_1");
 451   assert (arg_2 == R2, "fixed register for arg_2");
 452   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 453 }
 454 
 455 
 456 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 457   assert (arg_1 == R1, "fixed register for arg_1");
 458   assert (arg_2 == R2, "fixed register for arg_2");
 459   assert (arg_3 == R3, "fixed register for arg_3");
 460   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 461 }
 462 
 463 
 464 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
 465   // Not used on ARM
 466   Unimplemented();
 467 }
 468 
 469 
 470 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
 471   // Not used on ARM
 472   Unimplemented();
 473 }
 474 
 475 
 476 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 477 // Not used on ARM
 478   Unimplemented();
 479 }
 480 
 481 
 482 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 483   // Not used on ARM
 484   Unimplemented();
 485 }
 486 
 487 // Raw call, without saving/restoring registers, exception handling, etc.
 488 // Mainly used from various stubs.
 489 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
 490   const Register tmp = Rtemp; // Rtemp free since scratched by call
 491   set_last_Java_frame(SP, FP, true, tmp);
 492 #if R9_IS_SCRATCHED
 493   if (save_R9_if_scratched) {
 494     // Note: Saving also R10 for alignment.
 495     push(RegisterSet(R9, R10));
 496   }
 497 #endif
 498   mov(R0, Rthread);
 499   call(entry_point, relocInfo::runtime_call_type);
 500 #if R9_IS_SCRATCHED
 501   if (save_R9_if_scratched) {
 502     pop(RegisterSet(R9, R10));
 503   }
 504 #endif
 505   reset_last_Java_frame(tmp);
 506 }
 507 
 508 void MacroAssembler::call_VM_leaf(address entry_point) {
 509   call_VM_leaf_helper(entry_point, 0);
 510 }
 511 
 512 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
 513   assert (arg_1 == R0, "fixed register for arg_1");
 514   call_VM_leaf_helper(entry_point, 1);
 515 }
 516 
 517 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
 518   assert (arg_1 == R0, "fixed register for arg_1");
 519   assert (arg_2 == R1, "fixed register for arg_2");
 520   call_VM_leaf_helper(entry_point, 2);
 521 }
 522 
 523 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
 524   assert (arg_1 == R0, "fixed register for arg_1");
 525   assert (arg_2 == R1, "fixed register for arg_2");
 526   assert (arg_3 == R2, "fixed register for arg_3");
 527   call_VM_leaf_helper(entry_point, 3);
 528 }
 529 
 530 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
 531   assert (arg_1 == R0, "fixed register for arg_1");
 532   assert (arg_2 == R1, "fixed register for arg_2");
 533   assert (arg_3 == R2, "fixed register for arg_3");
 534   assert (arg_4 == R3, "fixed register for arg_4");
 535   call_VM_leaf_helper(entry_point, 4);
 536 }
 537 
 538 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
 539   assert_different_registers(oop_result, tmp);
 540   ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
 541   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
 542   verify_oop(oop_result);
 543 }
 544 
 545 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
 546   assert_different_registers(metadata_result, tmp);
 547   ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
 548   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
 549 }
 550 
 551 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
 552   if (arg2.is_register()) {
 553     add(dst, arg1, arg2.as_register());
 554   } else {
 555     add(dst, arg1, arg2.as_constant());
 556   }
 557 }
 558 
 559 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
 560   // This function is used in compiler for handling large frame offsets
 561   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 562     return sub(rd, rn, (-c));
 563   }
 564   int low = c & 0x3fc;
 565   if (low != 0) {
 566     add(rd, rn, low);
 567     rn = rd;
 568   }
 569   if (c & ~0x3fc) {
 570     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
 571     add(rd, rn, c & ~0x3fc);
 572   } else if (rd != rn) {
 573     assert(c == 0, "");
 574     mov(rd, rn); // need to generate at least one move!
 575   }
 576 }
 577 
 578 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
 579   // This function is used in compiler for handling large frame offsets
 580   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 581     return add(rd, rn, (-c));
 582   }
 583   int low = c & 0x3fc;
 584   if (low != 0) {
 585     sub(rd, rn, low);
 586     rn = rd;
 587   }
 588   if (c & ~0x3fc) {
 589     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
 590     sub(rd, rn, c & ~0x3fc);
 591   } else if (rd != rn) {
 592     assert(c == 0, "");
 593     mov(rd, rn); // need to generate at least one move!
 594   }
 595 }
 596 
 597 void MacroAssembler::mov_slow(Register rd, address addr) {
 598   // do *not* call the non relocated mov_related_address
 599   mov_slow(rd, (intptr_t)addr);
 600 }
 601 
 602 void MacroAssembler::mov_slow(Register rd, const char *str) {
 603   mov_slow(rd, (intptr_t)str);
 604 }
 605 
 606 
 607 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
 608   if (AsmOperand::is_rotated_imm(c)) {
 609     mov(rd, c, cond);
 610   } else if (AsmOperand::is_rotated_imm(~c)) {
 611     mvn(rd, ~c, cond);
 612   } else if (VM_Version::supports_movw()) {
 613     movw(rd, c & 0xffff, cond);
 614     if ((unsigned int)c >> 16) {
 615       movt(rd, (unsigned int)c >> 16, cond);
 616     }
 617   } else {
 618     // Find first non-zero bit
 619     int shift = 0;
 620     while ((c & (3 << shift)) == 0) {
 621       shift += 2;
 622     }
 623     // Put the least significant part of the constant
 624     int mask = 0xff << shift;
 625     mov(rd, c & mask, cond);
 626     // Add up to 3 other parts of the constant;
 627     // each of them can be represented as rotated_imm
 628     if (c & (mask << 8)) {
 629       orr(rd, rd, c & (mask << 8), cond);
 630     }
 631     if (c & (mask << 16)) {
 632       orr(rd, rd, c & (mask << 16), cond);
 633     }
 634     if (c & (mask << 24)) {
 635       orr(rd, rd, c & (mask << 24), cond);
 636     }
 637   }
 638 }
 639 
 640 
 641 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
 642                              AsmCondition cond
 643                              ) {
 644 
 645   if (o == NULL) {
 646     mov(rd, 0, cond);
 647     return;
 648   }
 649 
 650   if (oop_index == 0) {
 651     oop_index = oop_recorder()->allocate_oop_index(o);
 652   }
 653   relocate(oop_Relocation::spec(oop_index));
 654 
 655   if (VM_Version::supports_movw()) {
 656     movw(rd, 0, cond);
 657     movt(rd, 0, cond);
 658   } else {
 659     ldr(rd, Address(PC), cond);
 660     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
 661     nop();
 662   }
 663 }
 664 
 665 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index) {
 666   if (o == NULL) {
 667     mov(rd, 0);
 668     return;
 669   }
 670 
 671   if (metadata_index == 0) {
 672     metadata_index = oop_recorder()->allocate_metadata_index(o);
 673   }
 674   relocate(metadata_Relocation::spec(metadata_index));
 675 
 676   if (VM_Version::supports_movw()) {
 677     movw(rd, ((int)o) & 0xffff);
 678     movt(rd, (unsigned int)o >> 16);
 679   } else {
 680     ldr(rd, Address(PC));
 681     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
 682     nop();
 683   }
 684 }
 685 
 686 void MacroAssembler::mov_float(FloatRegister fd, jfloat c, AsmCondition cond) {
 687   Label skip_constant;
 688   union {
 689     jfloat f;
 690     jint i;
 691   } accessor;
 692   accessor.f = c;
 693 
 694   flds(fd, Address(PC), cond);
 695   b(skip_constant);
 696   emit_int32(accessor.i);
 697   bind(skip_constant);
 698 }
 699 
 700 void MacroAssembler::mov_double(FloatRegister fd, jdouble c, AsmCondition cond) {
 701   Label skip_constant;
 702   union {
 703     jdouble d;
 704     jint i[2];
 705   } accessor;
 706   accessor.d = c;
 707 
 708   fldd(fd, Address(PC), cond);
 709   b(skip_constant);
 710   emit_int32(accessor.i[0]);
 711   emit_int32(accessor.i[1]);
 712   bind(skip_constant);
 713 }
 714 
 715 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
 716   intptr_t addr = (intptr_t) address_of_global;
 717   mov_slow(reg, addr & ~0xfff);
 718   ldr(reg, Address(reg, addr & 0xfff));
 719 }
 720 
 721 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
 722   ldr_global_s32(reg, address_of_global);
 723 }
 724 
 725 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
 726   intptr_t addr = (intptr_t) address_of_global;
 727   mov_slow(reg, addr & ~0xfff);
 728   ldrb(reg, Address(reg, addr & 0xfff));
 729 }
 730 
 731 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
 732   if (bits <= 8) {
 733     andr(rd, rn, (1 << bits) - 1);
 734   } else if (bits >= 24) {
 735     bic(rd, rn, -1 << bits);
 736   } else {
 737     mov(rd, AsmOperand(rn, lsl, 32 - bits));
 738     mov(rd, AsmOperand(rd, lsr, 32 - bits));
 739   }
 740 }
 741 
 742 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
 743   mov(rd, AsmOperand(rn, lsl, 32 - bits));
 744   mov(rd, AsmOperand(rd, asr, 32 - bits));
 745 }
 746 
 747 
 748 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
 749                                Register rn_lo, Register rn_hi,
 750                                AsmCondition cond) {
 751   if (rd_lo != rn_hi) {
 752     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
 753     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
 754   } else if (rd_hi != rn_lo) {
 755     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
 756     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
 757   } else {
 758     eor(rd_lo, rd_hi, rd_lo, cond);
 759     eor(rd_hi, rd_lo, rd_hi, cond);
 760     eor(rd_lo, rd_hi, rd_lo, cond);
 761   }
 762 }
 763 
 764 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
 765                                 Register rn_lo, Register rn_hi,
 766                                 AsmShift shift, Register count) {
 767   Register tmp;
 768   if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
 769     tmp = rd_lo;
 770   } else {
 771     tmp = rd_hi;
 772   }
 773   assert_different_registers(tmp, count, rn_lo, rn_hi);
 774 
 775   subs(tmp, count, 32);
 776   if (shift == lsl) {
 777     assert_different_registers(rd_hi, rn_lo);
 778     assert_different_registers(count, rd_hi);
 779     mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
 780     rsb(tmp, count, 32, mi);
 781     if (rd_hi == rn_hi) {
 782       mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
 783       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
 784     } else {
 785       mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
 786       orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
 787     }
 788     mov(rd_lo, AsmOperand(rn_lo, shift, count));
 789   } else {
 790     assert_different_registers(rd_lo, rn_hi);
 791     assert_different_registers(rd_lo, count);
 792     mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
 793     rsb(tmp, count, 32, mi);
 794     if (rd_lo == rn_lo) {
 795       mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
 796       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
 797     } else {
 798       mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
 799       orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
 800     }
 801     mov(rd_hi, AsmOperand(rn_hi, shift, count));
 802   }
 803 }
 804 
 805 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
 806                                 Register rn_lo, Register rn_hi,
 807                                 AsmShift shift, int count) {
 808   assert(count != 0 && (count & ~63) == 0, "must be");
 809 
 810   if (shift == lsl) {
 811     assert_different_registers(rd_hi, rn_lo);
 812     if (count >= 32) {
 813       mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
 814       mov(rd_lo, 0);
 815     } else {
 816       mov(rd_hi, AsmOperand(rn_hi, lsl, count));
 817       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
 818       mov(rd_lo, AsmOperand(rn_lo, lsl, count));
 819     }
 820   } else {
 821     assert_different_registers(rd_lo, rn_hi);
 822     if (count >= 32) {
 823       if (count == 32) {
 824         mov(rd_lo, rn_hi);
 825       } else {
 826         mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
 827       }
 828       if (shift == asr) {
 829         mov(rd_hi, AsmOperand(rn_hi, asr, 0));
 830       } else {
 831         mov(rd_hi, 0);
 832       }
 833     } else {
 834       mov(rd_lo, AsmOperand(rn_lo, lsr, count));
 835       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
 836       mov(rd_hi, AsmOperand(rn_hi, shift, count));
 837     }
 838   }
 839 }
 840 
 841 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
 842   // This code pattern is matched in NativeIntruction::skip_verify_oop.
 843   // Update it at modifications.
 844   if (!VerifyOops) return;
 845 
 846   char buffer[64];
 847 #ifdef COMPILER1
 848   if (CommentedAssembly) {
 849     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
 850     block_comment(buffer);
 851   }
 852 #endif
 853   const char* msg_buffer = NULL;
 854   {
 855     ResourceMark rm;
 856     stringStream ss;
 857     ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
 858     msg_buffer = code_string(ss.as_string());
 859   }
 860 
 861   save_all_registers();
 862 
 863   if (reg != R2) {
 864       mov(R2, reg);                              // oop to verify
 865   }
 866   mov(R1, SP);                                   // register save area
 867 
 868   Label done;
 869   InlinedString Lmsg(msg_buffer);
 870   ldr_literal(R0, Lmsg);                         // message
 871 
 872   // call indirectly to solve generation ordering problem
 873   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
 874   call(Rtemp);
 875 
 876   restore_all_registers();
 877 
 878   b(done);
 879 #ifdef COMPILER2
 880   int off = offset();
 881 #endif
 882   bind_literal(Lmsg);
 883 #ifdef COMPILER2
 884   if (offset() - off == 1 * wordSize) {
 885     // no padding, so insert nop for worst-case sizing
 886     nop();
 887   }
 888 #endif
 889   bind(done);
 890 }
 891 
 892 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
 893   if (!VerifyOops) return;
 894 
 895   const char* msg_buffer = NULL;
 896   {
 897     ResourceMark rm;
 898     stringStream ss;
 899     if ((addr.base() == SP) && (addr.index()==noreg)) {
 900       ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
 901     } else {
 902       ss.print("verify_oop_addr: %s", s);
 903     }
 904     ss.print(" (%s:%d)", file, line);
 905     msg_buffer = code_string(ss.as_string());
 906   }
 907 
 908   int push_size = save_all_registers();
 909 
 910   if (addr.base() == SP) {
 911     // computes an addr that takes into account the push
 912     if (addr.index() != noreg) {
 913       Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
 914       add(new_base, SP, push_size);
 915       addr = addr.rebase(new_base);
 916     } else {
 917       addr = addr.plus_disp(push_size);
 918     }
 919   }
 920 
 921   ldr(R2, addr);                                 // oop to verify
 922   mov(R1, SP);                                   // register save area
 923 
 924   Label done;
 925   InlinedString Lmsg(msg_buffer);
 926   ldr_literal(R0, Lmsg);                         // message
 927 
 928   // call indirectly to solve generation ordering problem
 929   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
 930   call(Rtemp);
 931 
 932   restore_all_registers();
 933 
 934   b(done);
 935   bind_literal(Lmsg);
 936   bind(done);
 937 }
 938 
 939 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
 940   if (needs_explicit_null_check(offset)) {
 941     assert_different_registers(reg, tmp);
 942     if (tmp == noreg) {
 943       tmp = Rtemp;
 944       assert((! Thread::current()->is_Compiler_thread()) ||
 945              (! (ciEnv::current()->task() == NULL)) ||
 946              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
 947              "Rtemp not available in C2"); // explicit tmp register required
 948       // XXX: could we mark the code buffer as not compatible with C2 ?
 949     }
 950     ldr(tmp, Address(reg));
 951   }
 952 }
 953 
 954 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
 955 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
 956                                  RegisterOrConstant size_expression, Label& slow_case) {
 957   if (!Universe::heap()->supports_inline_contig_alloc()) {
 958     b(slow_case);
 959     return;
 960   }
 961 
 962   CollectedHeap* ch = Universe::heap();
 963 
 964   const Register top_addr = tmp1;
 965   const Register heap_end = tmp2;
 966 
 967   if (size_expression.is_register()) {
 968     assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
 969   } else {
 970     assert_different_registers(obj, obj_end, top_addr, heap_end);
 971   }
 972 
 973   bool load_const = VM_Version::supports_movw();
 974   if (load_const) {
 975     mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
 976   } else {
 977     ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
 978   }
 979   // Calculate new heap_top by adding the size of the object
 980   Label retry;
 981   bind(retry);
 982 
 983   ldr(obj, Address(top_addr));
 984 
 985   ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
 986   add_rc(obj_end, obj, size_expression);
 987   // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
 988   cmp(obj_end, obj);
 989   b(slow_case, lo);
 990   // Update heap_top if allocation succeeded
 991   cmp(obj_end, heap_end);
 992   b(slow_case, hi);
 993 
 994   atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
 995   b(retry, ne);
 996 }
 997 
 998 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
 999 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1000                                  RegisterOrConstant size_expression, Label& slow_case) {
1001   const Register tlab_end = tmp1;
1002   assert_different_registers(obj, obj_end, tlab_end);
1003 
1004   ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1005   ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
1006   add_rc(obj_end, obj, size_expression);
1007   cmp(obj_end, tlab_end);
1008   b(slow_case, hi);
1009   str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1010 }
1011 
1012 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
1013 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1014   Label loop;
1015   const Register ptr = start;
1016 
1017   mov(tmp, 0);
1018   bind(loop);
1019   cmp(ptr, end);
1020   str(tmp, Address(ptr, wordSize, post_indexed), lo);
1021   b(loop, lo);
1022 }
1023 
1024 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1025   // Bump total bytes allocated by this thread
1026   Label done;
1027 
1028   // Borrow the Rthread for alloc counter
1029   Register Ralloc = Rthread;
1030   add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
1031   ldr(tmp, Address(Ralloc));
1032   adds(tmp, tmp, size_in_bytes);
1033   str(tmp, Address(Ralloc), cc);
1034   b(done, cc);
1035 
1036   // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1037   // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1038   // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1039   Register low, high;
1040   // Select ether R0/R1 or R2/R3
1041 
1042   if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1043     low = R2;
1044     high  = R3;
1045   } else {
1046     low = R0;
1047     high  = R1;
1048   }
1049   push(RegisterSet(low, high));
1050 
1051   ldrd(low, Address(Ralloc));
1052   adds(low, low, size_in_bytes);
1053   adc(high, high, 0);
1054   strd(low, Address(Ralloc));
1055 
1056   pop(RegisterSet(low, high));
1057 
1058   bind(done);
1059 
1060   // Unborrow the Rthread
1061   sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
1062 }
1063 
1064 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1065   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1066   if (UseStackBanging) {
1067     const int page_size = os::vm_page_size();
1068 
1069     sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1070     strb(R0, Address(tmp));
1071     for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1072       strb(R0, Address(tmp, -0xff0, pre_indexed));
1073     }
1074   }
1075 }
1076 
1077 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
1078   if (UseStackBanging) {
1079     Label loop;
1080 
1081     mov(tmp, SP);
1082     add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
1083     bind(loop);
1084     subs(Rsize, Rsize, 0xff0);
1085     strb(R0, Address(tmp, -0xff0, pre_indexed));
1086     b(loop, hi);
1087   }
1088 }
1089 
1090 void MacroAssembler::stop(const char* msg) {
1091   // This code pattern is matched in NativeIntruction::is_stop.
1092   // Update it at modifications.
1093 #ifdef COMPILER1
1094   if (CommentedAssembly) {
1095     block_comment("stop");
1096   }
1097 #endif
1098 
1099   InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1100   InlinedString Lmsg(msg);
1101 
1102   // save all registers for further inspection
1103   save_all_registers();
1104 
1105   ldr_literal(R0, Lmsg);                     // message
1106   mov(R1, SP);                               // register save area
1107 
1108   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
1109 
1110   bind_literal(Lmsg);
1111   bind_literal(Ldebug);
1112 }
1113 
1114 void MacroAssembler::warn(const char* msg) {
1115 #ifdef COMPILER1
1116   if (CommentedAssembly) {
1117     block_comment("warn");
1118   }
1119 #endif
1120 
1121   InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1122   InlinedString Lmsg(msg);
1123   Label done;
1124 
1125   int push_size = save_caller_save_registers();
1126 
1127 
1128   ldr_literal(R0, Lmsg);                    // message
1129   ldr_literal(LR, Lwarn);                   // call warning
1130 
1131   call(LR);
1132 
1133   restore_caller_save_registers();
1134 
1135   b(done);
1136   bind_literal(Lmsg);
1137   bind_literal(Lwarn);
1138   bind(done);
1139 }
1140 
1141 
1142 int MacroAssembler::save_all_registers() {
1143   // This code pattern is matched in NativeIntruction::is_save_all_registers.
1144   // Update it at modifications.
1145   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1146   return 15*wordSize;
1147 }
1148 
1149 void MacroAssembler::restore_all_registers() {
1150   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
1151   add(SP, SP, wordSize);                         // discard saved PC
1152 }
1153 
1154 int MacroAssembler::save_caller_save_registers() {
1155 #if R9_IS_SCRATCHED
1156   // Save also R10 to preserve alignment
1157   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1158   return 8*wordSize;
1159 #else
1160   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1161   return 6*wordSize;
1162 #endif
1163 }
1164 
1165 void MacroAssembler::restore_caller_save_registers() {
1166 #if R9_IS_SCRATCHED
1167   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1168 #else
1169   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1170 #endif
1171 }
1172 
1173 void MacroAssembler::debug(const char* msg, const intx* registers) {
1174   // In order to get locks to work, we need to fake a in_VM state
1175   JavaThread* thread = JavaThread::current();
1176   thread->set_thread_state(_thread_in_vm);
1177 
1178   if (ShowMessageBoxOnError) {
1179     ttyLocker ttyl;
1180     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1181       BytecodeCounter::print();
1182     }
1183     if (os::message_box(msg, "Execution stopped, print registers?")) {
1184       // saved registers: R0-R12, LR, PC
1185       const int nregs = 15;
1186       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1187 
1188       for (int i = 0; i < nregs; i++) {
1189         tty->print_cr("%s = " INTPTR_FORMAT, regs[i]->name(), registers[i]);
1190       }
1191 
1192       // derive original SP value from the address of register save area
1193       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
1194     }
1195     BREAKPOINT;
1196   } else {
1197     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1198   }
1199   assert(false, "DEBUG MESSAGE: %s", msg);
1200   fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1201 }
1202 
1203 void MacroAssembler::unimplemented(const char* what) {
1204   const char* buf = NULL;
1205   {
1206     ResourceMark rm;
1207     stringStream ss;
1208     ss.print("unimplemented: %s", what);
1209     buf = code_string(ss.as_string());
1210   }
1211   stop(buf);
1212 }
1213 
1214 
1215 // Implementation of FixedSizeCodeBlock
1216 
1217 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1218 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1219 }
1220 
1221 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1222   if (_enabled) {
1223     address curr_pc = _masm->pc();
1224 
1225     assert(_start < curr_pc, "invalid current pc");
1226     guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1227 
1228     int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1229     for (int i = 0; i < nops_count; i++) {
1230       _masm->nop();
1231     }
1232   }
1233 }
1234 
1235 
1236 // Serializes memory. Potentially blows flags and reg.
1237 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
1238 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1239 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
1240 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1241                             Register tmp,
1242                             bool preserve_flags,
1243                             Register load_tgt) {
1244   if (!os::is_MP()) return;
1245 
1246   if (order_constraint == StoreStore) {
1247     dmb(DMB_st, tmp);
1248   } else if ((order_constraint & StoreLoad)  ||
1249              (order_constraint & LoadLoad)   ||
1250              (order_constraint & StoreStore) ||
1251              (load_tgt == noreg)             ||
1252              preserve_flags) {
1253     dmb(DMB_all, tmp);
1254   } else {
1255     // LoadStore: speculative stores reordeing is prohibited
1256 
1257     // By providing an ordered load target register, we avoid an extra memory load reference
1258     Label not_taken;
1259     bind(not_taken);
1260     cmp(load_tgt, load_tgt);
1261     b(not_taken, ne);
1262   }
1263 }
1264 
1265 
1266 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1267 // on failure, so fall-through can only mean success.
1268 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1269 // This is only needed for C2, which for some reason does not rety,
1270 // while C1/interpreter does.
1271 // TODO: measure if it makes a difference
1272 
1273 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1274   Register base, Register tmp, Label &slow_case,
1275   bool allow_fallthrough_on_failure, bool one_shot)
1276 {
1277 
1278   bool fallthrough_is_success = false;
1279 
1280   // ARM Litmus Test example does prefetching here.
1281   // TODO: investigate if it helps performance
1282 
1283   // The last store was to the displaced header, so to prevent
1284   // reordering we must issue a StoreStore or Release barrier before
1285   // the CAS store.
1286 
1287   membar(MacroAssembler::StoreStore, noreg);
1288 
1289   if (one_shot) {
1290     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1291     cmp(tmp, oldval);
1292     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1293     cmp(tmp, 0, eq);
1294   } else {
1295     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1296   }
1297 
1298   // MemBarAcquireLock barrier
1299   // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1300   // but that doesn't prevent a load or store from floating up between
1301   // the load and store in the CAS sequence, so play it safe and
1302   // do a full fence.
1303   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1304   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1305     b(slow_case, ne);
1306   }
1307 }
1308 
1309 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1310   Register base, Register tmp, Label &slow_case,
1311   bool allow_fallthrough_on_failure, bool one_shot)
1312 {
1313 
1314   bool fallthrough_is_success = false;
1315 
1316   assert_different_registers(oldval,newval,base,tmp);
1317 
1318   // MemBarReleaseLock barrier
1319   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1320   // but that doesn't prevent a load or store from floating down between
1321   // the load and store in the CAS sequence, so play it safe and
1322   // do a full fence.
1323   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1324 
1325   if (one_shot) {
1326     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1327     cmp(tmp, oldval);
1328     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1329     cmp(tmp, 0, eq);
1330   } else {
1331     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1332   }
1333   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1334     b(slow_case, ne);
1335   }
1336 
1337   // ExitEnter
1338   // According to JSR-133 Cookbook, this should be StoreLoad, the same
1339   // barrier that follows volatile store.
1340   // TODO: Should be able to remove on armv8 if volatile loads
1341   // use the load-acquire instruction.
1342   membar(StoreLoad, noreg);
1343 }
1344 
1345 #ifndef PRODUCT
1346 
1347 // Preserves flags and all registers.
1348 // On SMP the updated value might not be visible to external observers without a sychronization barrier
1349 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1350   if (counter_addr != NULL) {
1351     InlinedAddress counter_addr_literal((address)counter_addr);
1352     Label done, retry;
1353     if (cond != al) {
1354       b(done, inverse(cond));
1355     }
1356 
1357     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1358     ldr_literal(R0, counter_addr_literal);
1359 
1360     mrs(CPSR, Rtemp);
1361 
1362     bind(retry);
1363     ldr_s32(R1, Address(R0));
1364     add(R2, R1, 1);
1365     atomic_cas_bool(R1, R2, R0, 0, R3);
1366     b(retry, ne);
1367 
1368     msr(CPSR_fsxc, Rtemp);
1369 
1370     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1371 
1372     b(done);
1373     bind_literal(counter_addr_literal);
1374 
1375     bind(done);
1376   }
1377 }
1378 
1379 #endif // !PRODUCT
1380 
1381 
1382 // Building block for CAS cases of biased locking: makes CAS and records statistics.
1383 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
1384 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
1385                                                  Register tmp, Label& slow_case, int* counter_addr) {
1386 
1387   cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
1388 #ifdef ASSERT
1389   breakpoint(ne); // Fallthrough only on success
1390 #endif
1391 #ifndef PRODUCT
1392   if (counter_addr != NULL) {
1393     cond_atomic_inc32(al, counter_addr);
1394   }
1395 #endif // !PRODUCT
1396 }
1397 
1398 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
1399                                          bool swap_reg_contains_mark,
1400                                          Register tmp2,
1401                                          Label& done, Label& slow_case,
1402                                          BiasedLockingCounters* counters) {
1403   // obj_reg must be preserved (at least) if the bias locking fails
1404   // tmp_reg is a temporary register
1405   // swap_reg was used as a temporary but contained a value
1406   //   that was used afterwards in some call pathes. Callers
1407   //   have been fixed so that swap_reg no longer needs to be
1408   //   saved.
1409   // Rtemp in no longer scratched
1410 
1411   assert(UseBiasedLocking, "why call this otherwise?");
1412   assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
1413   guarantee(swap_reg!=tmp_reg, "invariant");
1414   assert(tmp_reg != noreg, "must supply tmp_reg");
1415 
1416 #ifndef PRODUCT
1417   if (PrintBiasedLockingStatistics && (counters == NULL)) {
1418     counters = BiasedLocking::counters();
1419   }
1420 #endif
1421 
1422   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1423   Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
1424 
1425   // Biased locking
1426   // See whether the lock is currently biased toward our thread and
1427   // whether the epoch is still valid
1428   // Note that the runtime guarantees sufficient alignment of JavaThread
1429   // pointers to allow age to be placed into low bits
1430   // First check to see whether biasing is even enabled for this object
1431   Label cas_label;
1432 
1433   // The null check applies to the mark loading, if we need to load it.
1434   // If the mark has already been loaded in swap_reg then it has already
1435   // been performed and the offset is irrelevant.
1436   int null_check_offset = offset();
1437   if (!swap_reg_contains_mark) {
1438     ldr(swap_reg, mark_addr);
1439   }
1440 
1441   // On MP platform loads could return 'stale' values in some cases.
1442   // That is acceptable since either CAS or slow case path is taken in the worst case.
1443 
1444   andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1445   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
1446 
1447   b(cas_label, ne);
1448 
1449   // The bias pattern is present in the object's header. Need to check
1450   // whether the bias owner and the epoch are both still current.
1451   load_klass(tmp_reg, obj_reg);
1452   ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
1453   orr(tmp_reg, tmp_reg, Rthread);
1454   eor(tmp_reg, tmp_reg, swap_reg);
1455 
1456   bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
1457 
1458 #ifndef PRODUCT
1459   if (counters != NULL) {
1460     cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
1461   }
1462 #endif // !PRODUCT
1463 
1464   b(done, eq);
1465 
1466   Label try_revoke_bias;
1467   Label try_rebias;
1468 
1469   // At this point we know that the header has the bias pattern and
1470   // that we are not the bias owner in the current epoch. We need to
1471   // figure out more details about the state of the header in order to
1472   // know what operations can be legally performed on the object's
1473   // header.
1474 
1475   // If the low three bits in the xor result aren't clear, that means
1476   // the prototype header is no longer biased and we have to revoke
1477   // the bias on this object.
1478   tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1479   b(try_revoke_bias, ne);
1480 
1481   // Biasing is still enabled for this data type. See whether the
1482   // epoch of the current bias is still valid, meaning that the epoch
1483   // bits of the mark word are equal to the epoch bits of the
1484   // prototype header. (Note that the prototype header's epoch bits
1485   // only change at a safepoint.) If not, attempt to rebias the object
1486   // toward the current thread. Note that we must be absolutely sure
1487   // that the current epoch is invalid in order to do this because
1488   // otherwise the manipulations it performs on the mark word are
1489   // illegal.
1490   tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
1491   b(try_rebias, ne);
1492 
1493   // tmp_reg has the age, epoch and pattern bits cleared
1494   // The remaining (owner) bits are (Thread ^ current_owner)
1495 
1496   // The epoch of the current bias is still valid but we know nothing
1497   // about the owner; it might be set or it might be clear. Try to
1498   // acquire the bias of the object using an atomic operation. If this
1499   // fails we will go in to the runtime to revoke the object's bias.
1500   // Note that we first construct the presumed unbiased header so we
1501   // don't accidentally blow away another thread's valid bias.
1502 
1503   // Note that we know the owner is not ourself. Hence, success can
1504   // only happen when the owner bits is 0
1505 
1506   // until the assembler can be made smarter, we need to make some assumptions about the values
1507   // so we can optimize this:
1508   assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
1509 
1510   mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
1511   mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
1512 
1513   orr(tmp_reg, swap_reg, Rthread); // new mark
1514 
1515   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
1516         (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
1517 
1518   // If the biasing toward our thread failed, this means that
1519   // another thread succeeded in biasing it toward itself and we
1520   // need to revoke that bias. The revocation will occur in the
1521   // interpreter runtime in the slow case.
1522 
1523   b(done);
1524 
1525   bind(try_rebias);
1526 
1527   // At this point we know the epoch has expired, meaning that the
1528   // current "bias owner", if any, is actually invalid. Under these
1529   // circumstances _only_, we are allowed to use the current header's
1530   // value as the comparison value when doing the cas to acquire the
1531   // bias in the current epoch. In other words, we allow transfer of
1532   // the bias from one thread to another directly in this situation.
1533 
1534   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
1535 
1536   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
1537 
1538   // owner bits 'random'. Set them to Rthread.
1539   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
1540   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
1541 
1542   orr(tmp_reg, tmp_reg, Rthread); // new mark
1543 
1544   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
1545         (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
1546 
1547   // If the biasing toward our thread failed, then another thread
1548   // succeeded in biasing it toward itself and we need to revoke that
1549   // bias. The revocation will occur in the runtime in the slow case.
1550 
1551   b(done);
1552 
1553   bind(try_revoke_bias);
1554 
1555   // The prototype mark in the klass doesn't have the bias bit set any
1556   // more, indicating that objects of this data type are not supposed
1557   // to be biased any more. We are going to try to reset the mark of
1558   // this object to the prototype value and fall through to the
1559   // CAS-based locking scheme. Note that if our CAS fails, it means
1560   // that another thread raced us for the privilege of revoking the
1561   // bias of this particular object, so it's okay to continue in the
1562   // normal locking code.
1563 
1564   // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
1565 
1566   eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
1567 
1568   // owner bits 'random'. Clear them
1569   mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
1570   mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
1571 
1572   biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
1573         (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
1574 
1575   // Fall through to the normal CAS-based lock, because no matter what
1576   // the result of the above CAS, some thread must have succeeded in
1577   // removing the bias bit from the object's header.
1578 
1579   bind(cas_label);
1580 
1581   return null_check_offset;
1582 }
1583 
1584 
1585 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
1586   assert(UseBiasedLocking, "why call this otherwise?");
1587 
1588   // Check for biased locking unlock case, which is a no-op
1589   // Note: we do not have to check the thread ID for two reasons.
1590   // First, the interpreter checks for IllegalMonitorStateException at
1591   // a higher level. Second, if the bias was revoked while we held the
1592   // lock, the object could not be rebiased toward another thread, so
1593   // the bias bit would be clear.
1594   ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1595 
1596   andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
1597   cmp(tmp_reg, markOopDesc::biased_lock_pattern);
1598   b(done, eq);
1599 }
1600 
1601 
1602 void MacroAssembler::resolve_jobject(Register value,
1603                                      Register tmp1,
1604                                      Register tmp2) {
1605   assert_different_registers(value, tmp1, tmp2);
1606   Label done, not_weak;
1607   cbz(value, done);             // Use NULL as-is.
1608   STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
1609   tbz(value, 0, not_weak);      // Test for jweak tag.
1610 
1611   // Resolve jweak.
1612   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
1613                  Address(value, -JNIHandles::weak_tag_value), value, tmp1, tmp2, noreg);
1614   b(done);
1615   bind(not_weak);
1616   // Resolve (untagged) jobject.
1617   access_load_at(T_OBJECT, IN_NATIVE,
1618                  Address(value, 0), value, tmp1, tmp2, noreg);
1619   verify_oop(value);
1620   bind(done);
1621 }
1622 
1623 
1624 //////////////////////////////////////////////////////////////////////////////////
1625 
1626 
1627 void MacroAssembler::load_sized_value(Register dst, Address src,
1628                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
1629   switch (size_in_bytes) {
1630     case  4: ldr(dst, src, cond); break;
1631     case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
1632     case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
1633     default: ShouldNotReachHere();
1634   }
1635 }
1636 
1637 
1638 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
1639   switch (size_in_bytes) {
1640     case  4: str(src, dst, cond); break;
1641     case  2: strh(src, dst, cond);   break;
1642     case  1: strb(src, dst, cond);   break;
1643     default: ShouldNotReachHere();
1644   }
1645 }
1646 
1647 // Look up the method for a megamorphic invokeinterface call.
1648 // The target method is determined by <Rinterf, Rindex>.
1649 // The receiver klass is in Rklass.
1650 // On success, the result will be in method_result, and execution falls through.
1651 // On failure, execution transfers to the given label.
1652 void MacroAssembler::lookup_interface_method(Register Rklass,
1653                                              Register Rintf,
1654                                              RegisterOrConstant itable_index,
1655                                              Register method_result,
1656                                              Register Rscan,
1657                                              Register Rtmp,
1658                                              Label& L_no_such_interface) {
1659 
1660   assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
1661 
1662   const int entry_size = itableOffsetEntry::size() * HeapWordSize;
1663   assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience");
1664 
1665   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
1666   const int base = in_bytes(Klass::vtable_start_offset());
1667   const int scale = exact_log2(vtableEntry::size_in_bytes());
1668   ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
1669   add(Rscan, Rklass, base);
1670   add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
1671 
1672   // Search through the itable for an interface equal to incoming Rintf
1673   // itable looks like [intface][offset][intface][offset][intface][offset]
1674 
1675   Label loop;
1676   bind(loop);
1677   ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
1678   cmp(Rtmp, Rintf);  // set ZF and CF if interface is found
1679   cmn(Rtmp, 0, ne);  // check if tmp == 0 and clear CF if it is
1680   b(loop, ne);
1681 
1682   // CF == 0 means we reached the end of itable without finding icklass
1683   b(L_no_such_interface, cc);
1684 
1685   if (method_result != noreg) {
1686     // Interface found at previous position of Rscan, now load the method
1687     ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size));
1688     if (itable_index.is_register()) {
1689       add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
1690       assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
1691       assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
1692       ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
1693     } else {
1694       int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
1695                           itableMethodEntry::method_offset_in_bytes();
1696       add_slow(method_result, Rklass, method_offset);
1697       ldr(method_result, Address(method_result, Rtmp));
1698     }
1699   }
1700 }
1701 
1702 #ifdef COMPILER2
1703 // TODO: 8 bytes at a time? pre-fetch?
1704 // Compare char[] arrays aligned to 4 bytes.
1705 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
1706                                         Register limit, Register result,
1707                                       Register chr1, Register chr2, Label& Ldone) {
1708   Label Lvector, Lloop;
1709 
1710   // Note: limit contains number of bytes (2*char_elements) != 0.
1711   tst(limit, 0x2); // trailing character ?
1712   b(Lvector, eq);
1713 
1714   // compare the trailing char
1715   sub(limit, limit, sizeof(jchar));
1716   ldrh(chr1, Address(ary1, limit));
1717   ldrh(chr2, Address(ary2, limit));
1718   cmp(chr1, chr2);
1719   mov(result, 0, ne);     // not equal
1720   b(Ldone, ne);
1721 
1722   // only one char ?
1723   tst(limit, limit);
1724   mov(result, 1, eq);
1725   b(Ldone, eq);
1726 
1727   // word by word compare, dont't need alignment check
1728   bind(Lvector);
1729 
1730   // Shift ary1 and ary2 to the end of the arrays, negate limit
1731   add(ary1, limit, ary1);
1732   add(ary2, limit, ary2);
1733   neg(limit, limit);
1734 
1735   bind(Lloop);
1736   ldr_u32(chr1, Address(ary1, limit));
1737   ldr_u32(chr2, Address(ary2, limit));
1738   cmp_32(chr1, chr2);
1739   mov(result, 0, ne);     // not equal
1740   b(Ldone, ne);
1741   adds(limit, limit, 2*sizeof(jchar));
1742   b(Lloop, ne);
1743 
1744   // Caller should set it:
1745   // mov(result_reg, 1);  //equal
1746 }
1747 #endif
1748 
1749 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
1750   mov_slow(tmpreg1, counter_addr);
1751   ldr_s32(tmpreg2, tmpreg1);
1752   add_32(tmpreg2, tmpreg2, 1);
1753   str_32(tmpreg2, tmpreg1);
1754 }
1755 
1756 void MacroAssembler::floating_cmp(Register dst) {
1757   vmrs(dst, FPSCR);
1758   orr(dst, dst, 0x08000000);
1759   eor(dst, dst, AsmOperand(dst, lsl, 3));
1760   mov(dst, AsmOperand(dst, asr, 30));
1761 }
1762 
1763 void MacroAssembler::restore_default_fp_mode() {
1764 #ifndef __SOFTFP__
1765   // Round to Near mode, IEEE compatible, masked exceptions
1766   mov(Rtemp, 0);
1767   vmsr(FPSCR, Rtemp);
1768 #endif // !__SOFTFP__
1769 }
1770 
1771 // 24-bit word range == 26-bit byte range
1772 bool check26(int offset) {
1773   // this could be simplified, but it mimics encoding and decoding
1774   // an actual branch insrtuction
1775   int off1 = offset << 6 >> 8;
1776   int encoded = off1 & ((1<<24)-1);
1777   int decoded = encoded << 8 >> 6;
1778   return offset == decoded;
1779 }
1780 
1781 // Perform some slight adjustments so the default 32MB code cache
1782 // is fully reachable.
1783 static inline address first_cache_address() {
1784   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
1785 }
1786 static inline address last_cache_address() {
1787   return CodeCache::high_bound() - Assembler::InstructionSize;
1788 }
1789 
1790 
1791 // Can we reach target using unconditional branch or call from anywhere
1792 // in the code cache (because code can be relocated)?
1793 bool MacroAssembler::_reachable_from_cache(address target) {
1794 #ifdef __thumb__
1795   if ((1 & (intptr_t)target) != 0) {
1796     // Return false to avoid 'b' if we need switching to THUMB mode.
1797     return false;
1798   }
1799 #endif
1800 
1801   address cl = first_cache_address();
1802   address ch = last_cache_address();
1803 
1804   if (ForceUnreachable) {
1805     // Only addresses from CodeCache can be treated as reachable.
1806     if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
1807       return false;
1808     }
1809   }
1810 
1811   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
1812   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
1813 
1814   return check26(loffset - 8) && check26(hoffset - 8);
1815 }
1816 
1817 bool MacroAssembler::reachable_from_cache(address target) {
1818   assert(CodeCache::contains(pc()), "not supported");
1819   return _reachable_from_cache(target);
1820 }
1821 
1822 // Can we reach the entire code cache from anywhere else in the code cache?
1823 bool MacroAssembler::_cache_fully_reachable() {
1824   address cl = first_cache_address();
1825   address ch = last_cache_address();
1826   return _reachable_from_cache(cl) && _reachable_from_cache(ch);
1827 }
1828 
1829 bool MacroAssembler::cache_fully_reachable() {
1830   assert(CodeCache::contains(pc()), "not supported");
1831   return _cache_fully_reachable();
1832 }
1833 
1834 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch, AsmCondition cond) {
1835   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
1836   if (reachable_from_cache(target)) {
1837     relocate(rtype);
1838     b(target, cond);
1839     return;
1840   }
1841 
1842   // Note: relocate is not needed for the code below,
1843   // encoding targets in absolute format.
1844   if (ignore_non_patchable_relocations()) {
1845     rtype = relocInfo::none;
1846   }
1847 
1848   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
1849     // Note: this version cannot be (atomically) patched
1850     mov_slow(scratch, (intptr_t)target, cond);
1851     bx(scratch, cond);
1852   } else {
1853     Label skip;
1854     InlinedAddress address_literal(target);
1855     if (cond != al) {
1856       b(skip, inverse(cond));
1857     }
1858     relocate(rtype);
1859     ldr_literal(PC, address_literal);
1860     bind_literal(address_literal);
1861     bind(skip);
1862   }
1863 }
1864 
1865 // Similar to jump except that:
1866 // - near calls are valid only if any destination in the cache is near
1867 // - no movt/movw (not atomically patchable)
1868 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch, AsmCondition cond) {
1869   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
1870   if (cache_fully_reachable()) {
1871     // Note: this assumes that all possible targets (the initial one
1872     // and the addressed patched to) are all in the code cache.
1873     assert(CodeCache::contains(target), "target might be too far");
1874     relocate(rtype);
1875     b(target, cond);
1876     return;
1877   }
1878 
1879   // Discard the relocation information if not needed for CacheCompiledCode
1880   // since the next encodings are all in absolute format.
1881   if (ignore_non_patchable_relocations()) {
1882     rtype = relocInfo::none;
1883   }
1884 
1885   {
1886     Label skip;
1887     InlinedAddress address_literal(target);
1888     if (cond != al) {
1889       b(skip, inverse(cond));
1890     }
1891     relocate(rtype);
1892     ldr_literal(PC, address_literal);
1893     bind_literal(address_literal);
1894     bind(skip);
1895   }
1896 }
1897 
1898 void MacroAssembler::call(address target, RelocationHolder rspec, AsmCondition cond) {
1899   Register scratch = LR;
1900   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
1901   if (reachable_from_cache(target)) {
1902     relocate(rspec);
1903     bl(target, cond);
1904     return;
1905   }
1906 
1907   // Note: relocate is not needed for the code below,
1908   // encoding targets in absolute format.
1909   if (ignore_non_patchable_relocations()) {
1910     // This assumes the information was needed only for relocating the code.
1911     rspec = RelocationHolder::none;
1912   }
1913 
1914   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
1915     // Note: this version cannot be (atomically) patched
1916     mov_slow(scratch, (intptr_t)target, cond);
1917     blx(scratch, cond);
1918     return;
1919   }
1920 
1921   {
1922     Label ret_addr;
1923     if (cond != al) {
1924       b(ret_addr, inverse(cond));
1925     }
1926 
1927 
1928     InlinedAddress address_literal(target);
1929     relocate(rspec);
1930     adr(LR, ret_addr);
1931     ldr_literal(PC, address_literal);
1932 
1933     bind_literal(address_literal);
1934     bind(ret_addr);
1935   }
1936 }
1937 
1938 
1939 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
1940   assert(rspec.type() == relocInfo::static_call_type ||
1941          rspec.type() == relocInfo::none ||
1942          rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
1943 
1944   // Always generate the relocation information, needed for patching
1945   relocate(rspec); // used by NativeCall::is_call_before()
1946   if (cache_fully_reachable()) {
1947     // Note: this assumes that all possible targets (the initial one
1948     // and the addresses patched to) are all in the code cache.
1949     assert(CodeCache::contains(target), "target might be too far");
1950     bl(target);
1951   } else {
1952     Label ret_addr;
1953     InlinedAddress address_literal(target);
1954     adr(LR, ret_addr);
1955     ldr_literal(PC, address_literal);
1956     bind_literal(address_literal);
1957     bind(ret_addr);
1958   }
1959   return offset();
1960 }
1961 
1962 // ((OopHandle)result).resolve();
1963 void MacroAssembler::resolve_oop_handle(Register result) {
1964   // OopHandle::resolve is an indirection.
1965   ldr(result, Address(result, 0));
1966 }
1967 
1968 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
1969   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
1970   ldr(tmp, Address(method, Method::const_offset()));
1971   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
1972   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
1973   ldr(mirror, Address(tmp, mirror_offset));
1974   resolve_oop_handle(mirror);
1975 }
1976 
1977 
1978 ///////////////////////////////////////////////////////////////////////////////
1979 
1980 // Compressed pointers
1981 
1982 
1983 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
1984   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
1985 }
1986 
1987 
1988 // Blows src_klass.
1989 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
1990   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
1991 }
1992 
1993 
1994 
1995 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
1996   access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3);
1997 }
1998 
1999 // Blows src and flags.
2000 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2001   access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false);
2002 }
2003 
2004 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
2005   access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true);
2006 }
2007 
2008 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
2009                                     Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) {
2010   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2011   decorators = AccessInternal::decorator_fixup(decorators);
2012   bool as_raw = (decorators & AS_RAW) != 0;
2013   if (as_raw) {
2014     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2015   } else {
2016     bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
2017   }
2018 }
2019 
2020 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
2021                                      Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
2022   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2023   decorators = AccessInternal::decorator_fixup(decorators);
2024   bool as_raw = (decorators & AS_RAW) != 0;
2025   if (as_raw) {
2026     bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2027   } else {
2028     bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
2029   }
2030 }
2031 
2032 
2033 
2034 #ifdef COMPILER2
2035 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2)
2036 {
2037   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
2038 
2039   Register Rmark      = Rscratch2;
2040 
2041   assert(Roop != Rscratch, "");
2042   assert(Roop != Rmark, "");
2043   assert(Rbox != Rscratch, "");
2044   assert(Rbox != Rmark, "");
2045 
2046   Label fast_lock, done;
2047 
2048   if (UseBiasedLocking && !UseOptoBiasInlining) {
2049     Label failed;
2050     biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
2051     bind(failed);
2052   }
2053 
2054   ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
2055   tst(Rmark, markOopDesc::unlocked_value);
2056   b(fast_lock, ne);
2057 
2058   // Check for recursive lock
2059   // See comments in InterpreterMacroAssembler::lock_object for
2060   // explanations on the fast recursive locking check.
2061   // -1- test low 2 bits
2062   movs(Rscratch, AsmOperand(Rmark, lsl, 30));
2063   // -2- test (hdr - SP) if the low two bits are 0
2064   sub(Rscratch, Rmark, SP, eq);
2065   movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
2066   // If still 'eq' then recursive locking OK
2067   // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107)
2068   str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
2069   b(done);
2070 
2071   bind(fast_lock);
2072   str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
2073 
2074   bool allow_fallthrough_on_failure = true;
2075   bool one_shot = true;
2076   cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
2077 
2078   bind(done);
2079 
2080 }
2081 
2082 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2)
2083 {
2084   assert(VM_Version::supports_ldrex(), "unsupported, yet?");
2085 
2086   Register Rmark      = Rscratch2;
2087 
2088   assert(Roop != Rscratch, "");
2089   assert(Roop != Rmark, "");
2090   assert(Rbox != Rscratch, "");
2091   assert(Rbox != Rmark, "");
2092 
2093   Label done;
2094 
2095   if (UseBiasedLocking && !UseOptoBiasInlining) {
2096     biased_locking_exit(Roop, Rscratch, done);
2097   }
2098 
2099   ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
2100   // If hdr is NULL, we've got recursive locking and there's nothing more to do
2101   cmp(Rmark, 0);
2102   b(done, eq);
2103 
2104   // Restore the object header
2105   bool allow_fallthrough_on_failure = true;
2106   bool one_shot = true;
2107   cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
2108 
2109   bind(done);
2110 
2111 }
2112 #endif // COMPILER2