1 /*
   2  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2015, Linaro Ltd. All rights reserved.
   5  * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved.
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This code is free software; you can redistribute it and/or modify it
   9  * under the terms of the GNU General Public License version 2 only, as
  10  * published by the Free Software Foundation.
  11  *
  12  * This code is distributed in the hope that it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15  * version 2 for more details (a copy is included in the LICENSE file that
  16  * accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License version
  19  * 2 along with this work; if not, write to the Free Software Foundation,
  20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21  *
  22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  23  * or visit www.oracle.com if you need additional information or have any
  24  * questions.
  25  *
  26  */
  27 
  28 #include "precompiled.hpp"
  29 #include "asm/macroAssembler.inline.hpp"
  30 #include "gc/shared/barrierSet.hpp"
  31 #include "gc/shared/barrierSetAssembler.hpp"
  32 #include "interp_masm_aarch32.hpp"
  33 #include "interpreter/interpreter.hpp"
  34 #include "interpreter/interpreterRuntime.hpp"
  35 #include "logging/log.hpp"
  36 #include "oops/arrayOop.hpp"
  37 #include "oops/markOop.hpp"
  38 #include "oops/method.hpp"
  39 #include "oops/methodData.hpp"
  40 #include "prims/jvmtiExport.hpp"
  41 #include "prims/jvmtiThreadState.hpp"
  42 #include "runtime/basicLock.hpp"
  43 #include "runtime/biasedLocking.hpp"
  44 #include "runtime/frame.inline.hpp"
  45 #include "runtime/safepointMechanism.hpp"
  46 #include "runtime/sharedRuntime.hpp"
  47 #include "runtime/thread.inline.hpp"
  48 
  49 #include "vm_version_aarch32.hpp"
  50 #include "register_aarch32.hpp"
  51 
  52 
  53 // Implementation of InterpreterMacroAssembler
  54 
  55 void InterpreterMacroAssembler::narrow(Register result) {
  56   // Get method->_constMethod->_result_type
  57   ldr(rscratch1, Address(rfp, frame::get_interpreter_frame_method_offset() * wordSize));
  58   ldr(rscratch1, Address(rscratch1, Method::const_offset()));
  59   ldrb(rscratch1, Address(rscratch1, ConstMethod::result_type_offset()));
  60 
  61   Label done;
  62 
  63   // common case first
  64 
  65   cmp(rscratch1, T_INT);
  66   b(done, Assembler::EQ);
  67 
  68   // mask integer result to narrower return type.
  69   cmp(rscratch1, T_BOOLEAN);
  70   andr(result, result, 0x1, Assembler::EQ);
  71 
  72   cmp(rscratch1, T_BYTE);
  73   sxtb(result, result, Assembler::ror(), Assembler::EQ);
  74 
  75   cmp(rscratch1, T_CHAR);
  76   uxth(result, result, Assembler::ror(), Assembler::EQ);  // truncate upper 16 bits
  77 
  78   sxth(result, result, Assembler::ror(), Assembler::NE);  // sign-extend short
  79 
  80   // Nothing to do for T_INT
  81   bind(done);
  82 }
  83 
  84 void InterpreterMacroAssembler::jump_to_entry(address entry) {
  85   assert(entry, "Entry must have been generated by now");
  86   b(entry);
  87 }
  88 
  89 void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
  90   if (JvmtiExport::can_pop_frame()) {
  91     Label L;
  92     // Initiate popframe handling only if it is not already being
  93     // processed.  If the flag has the popframe_processing bit set, it
  94     // means that this code is called *during* popframe handling - we
  95     // don't want to reenter.
  96     // This method is only called just after the call into the vm in
  97     // call_VM_base, so the arg registers are available.
  98     ldr(rscratch1, Address(rthread, JavaThread::popframe_condition_offset()));
  99     tst(rscratch1, JavaThread::popframe_pending_bit);
 100     b(L, Assembler::EQ);
 101     tst(rscratch1, JavaThread::popframe_processing_bit);
 102     b(L, Assembler::NE);
 103     // Call Interpreter::remove_activation_preserving_args_entry() to get the
 104     // address of the same-named entrypoint in the generated interpreter code.
 105     call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
 106     b(r0);
 107     bind(L);
 108   }
 109 }
 110 
 111 
 112 void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
 113   ldr(r2, Address(rthread, JavaThread::jvmti_thread_state_offset()));
 114   const Address tos_addr(r2, JvmtiThreadState::earlyret_tos_offset());
 115   const Address oop_addr(r2, JvmtiThreadState::earlyret_oop_offset());
 116   const Address val_addr(r2, JvmtiThreadState::earlyret_value_offset());
 117   switch (state) {
 118     case atos: ldr(r0, oop_addr);
 119                mov(rscratch1, 0);
 120                str(rscratch1, oop_addr);
 121                verify_oop(r0, state);               break;
 122     case dtos:
 123         if(hasFPU()) {
 124             vldr_f64(d0, val_addr);              break;
 125         }//fall through otherwise
 126     case ltos: ldrd(r0, val_addr);                  break;
 127     case ftos:
 128         if(hasFPU()) {
 129             vldr_f32(d0, val_addr);              break;
 130         } //fall through otherwise
 131     case btos:                                   // fall through
 132     case ztos:                                   // fall through
 133     case ctos:                                   // fall through
 134     case stos:                                   // fall through
 135     case itos: ldr(r0, val_addr);                   break;
 136     case vtos: /* nothing to do */                  break;
 137     default  : ShouldNotReachHere();
 138   }
 139   // Clean up tos value in the thread object
 140   mov(rscratch1, (int) ilgl);
 141   str(rscratch1, tos_addr);
 142   mov(rscratch1, 0);
 143   str(rscratch1, val_addr);
 144 }
 145 
 146 
 147 void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
 148   if (JvmtiExport::can_force_early_return()) {
 149     Label L;
 150     ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset()));
 151     cbz(rscratch1, L); // if (thread->jvmti_thread_state() == NULL) exit;
 152 
 153     // Initiate earlyret handling only if it is not already being processed.
 154     // If the flag has the earlyret_processing bit set, it means that this code
 155     // is called *during* earlyret handling - we don't want to reenter.
 156     ldr(rscratch1, Address(rscratch1, JvmtiThreadState::earlyret_state_offset()));
 157     cmp(rscratch1, JvmtiThreadState::earlyret_pending);
 158     b(L, Assembler::NE);
 159 
 160     // Call Interpreter::remove_activation_early_entry() to get the address of the
 161     // same-named entrypoint in the generated interpreter code.
 162     ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset()));
 163     ldr(rscratch1, Address(rscratch1, JvmtiThreadState::earlyret_tos_offset()));
 164     call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), rscratch1);
 165     b(r0);
 166     bind(L);
 167   }
 168 }
 169 
 170 void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(
 171   Register reg,
 172   int bcp_offset) {
 173   assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
 174   ldrh(reg, Address(rbcp, bcp_offset));
 175   rev16(reg, reg);
 176 }
 177 
 178 void InterpreterMacroAssembler::get_dispatch() {
 179   mov(rdispatch, ExternalAddress((address)Interpreter::dispatch_table()));
 180 }
 181 
 182 void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
 183                                                        int bcp_offset,
 184                                                        size_t index_size) {
 185   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
 186   if (index_size == sizeof(u2)) {
 187     load_unsigned_short(index, Address(rbcp, bcp_offset));
 188   } else if (index_size == sizeof(u4)) {
 189     // assert(EnableInvokeDynamic, "giant index used only for JSR 292");
 190     ldr(index, Address(rbcp, bcp_offset));
 191     // Check if the secondary index definition is still ~x, otherwise
 192     // we have to change the following assembler code to calculate the
 193     // plain index.
 194     assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
 195     inv(index, index);  // convert to plain index
 196   } else if (index_size == sizeof(u1)) {
 197     load_unsigned_byte(index, Address(rbcp, bcp_offset));
 198   } else {
 199     ShouldNotReachHere();
 200   }
 201 }
 202 
 203 // Return
 204 // Rindex: index into constant pool
 205 // Rcache: address of cache entry - ConstantPoolCache::base_offset()
 206 //
 207 // A caller must add ConstantPoolCache::base_offset() to Rcache to get
 208 // the true address of the cache entry.
 209 //
 210 void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
 211                                                            Register index,
 212                                                            int bcp_offset,
 213                                                            size_t index_size) {
 214   assert_different_registers(cache, index);
 215   assert_different_registers(cache, rcpool);
 216   get_cache_index_at_bcp(index, bcp_offset, index_size);
 217   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
 218   // convert from field index to ConstantPoolCacheEntry
 219   // aarch32 already has the cache in rcpool so there is no need to
 220   // install it in cache. instead we pre-add the indexed offset to
 221   // rcpool and return it in cache. All clients of this method need to
 222   // be modified accordingly.
 223   add(cache, rcpool, index, lsl( exact_log2(4) + exact_log2(wordSize)));
 224 }
 225 
 226 
 227 void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
 228                                                                         Register index,
 229                                                                         Register bytecode,
 230                                                                         int byte_no,
 231                                                                         int bcp_offset,
 232                                                                         size_t index_size) {
 233   get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
 234   // We use a 32-bit load here since the layout of 64-bit words on
 235   // little-endian machines allow us that.
 236   // n.b. unlike x86 cache already includes the index offset
 237   ldr(bytecode, Address(cache,
 238                         ConstantPoolCache::base_offset()
 239                         + ConstantPoolCacheEntry::indices_offset()));
 240   const int shift_count = (1 + byte_no) * BitsPerByte;
 241   //ubfx(bytecode, bytecode, shift_count, BitsPerByte);
 242   assert(shift_count >= 0 && shift_count <= 24 && 0 == (shift_count & 7), "Invalid shift count");
 243   uxtb(bytecode, bytecode, ror(shift_count));
 244 }
 245 
 246 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
 247                                                                Register tmp,
 248                                                                int bcp_offset,
 249                                                                size_t index_size) {
 250   assert(cache != tmp, "must use different register");
 251   get_cache_index_at_bcp(tmp, bcp_offset, index_size);
 252   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
 253   // convert from field index to ConstantPoolCacheEntry index
 254   // and from word offset to byte offset
 255   assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
 256   ldr(cache, Address(rfp, frame::get_interpreter_frame_cache_offset() * wordSize));
 257   // skip past the header
 258   add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
 259   add(cache, cache, tmp, lsl(2 + LogBytesPerWord));  // construct pointer to cache entry
 260 }
 261 
 262 void InterpreterMacroAssembler::get_method_counters(Register method,
 263                                                     Register mcs, Label& skip) {
 264   Label has_counters;
 265   ldr(mcs, Address(method, Method::method_counters_offset()));
 266   cbnz(mcs, has_counters);
 267   call_VM(noreg, CAST_FROM_FN_PTR(address,
 268           InterpreterRuntime::build_method_counters), method);
 269   ldr(mcs, Address(method, Method::method_counters_offset()));
 270   cbz(mcs, skip); // No MethodCounters allocated, OutOfMemory
 271   bind(has_counters);
 272 }
 273 
 274 // Load object from cpool->resolved_references(index)
 275 void InterpreterMacroAssembler::load_resolved_reference_at_index(
 276                                            Register result, Register index, Register tmp) {
 277   assert_different_registers(result, index);
 278   // convert from field index to resolved_references() index and from
 279   // word index to byte offset. Since this is a java object, it can be compressed
 280 
 281   get_constant_pool(result);
 282   // load pointer for resolved_references[] objArray
 283   ldr(result, Address(result, ConstantPool::cache_offset_in_bytes()));
 284   ldr(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
 285   resolve_oop_handle(result, tmp);
 286   // Add in the index
 287   add(result, result, index, lsl(LogBytesPerHeapOop));
 288   load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 289 }
 290 
 291 void InterpreterMacroAssembler::load_resolved_klass_at_offset(
 292                              Register cpool, Register index, Register klass, Register temp) {
 293   add(temp, cpool, index, lsl(LogBytesPerWord));
 294   ldrh(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
 295   ldr(klass, Address(cpool,  ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
 296   add(klass, klass, temp, lsl(LogBytesPerWord));
 297   ldr(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
 298 }
 299 
 300 
 301 // Generate a subtype check: branch to ok_is_subtype if sub_klass is a
 302 // subtype of super_klass.
 303 //
 304 // Args:
 305 //      r0: superklass
 306 //      Rsub_klass: subklass
 307 //
 308 // Kills:
 309 //      r2, r5
 310 void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
 311                                                   Label& ok_is_subtype) {
 312   assert(Rsub_klass != r0, "r0 holds superklass");
 313   assert(Rsub_klass != r2, "r2 holds 2ndary super array length");
 314   assert(Rsub_klass != r14, "r14 holds 2ndary super array scan ptr");
 315 
 316   // Profile the not-null value's klass.
 317   profile_typecheck(r2, Rsub_klass, r14); // blows r2
 318 
 319   // Do the check.
 320   check_klass_subtype(Rsub_klass, r0, r2, ok_is_subtype); // blows r2
 321 
 322   // Profile the failure of the check.
 323   profile_typecheck_failed(r2); // blows r2
 324 }
 325 
 326 // Java Expression Stack
 327 
 328 void InterpreterMacroAssembler::pop_ptr(Register r) {
 329   ldr(r, post(sp, wordSize));
 330 }
 331 
 332 void InterpreterMacroAssembler::pop_i(Register r) {
 333   ldr(r, post(sp, wordSize));
 334 }
 335 
 336 void InterpreterMacroAssembler::pop_l(Register rLo, Register rHi) {
 337     assert(rHi->encoding() == rLo->encoding() + 1, "must use two consecutive registers");
 338     ldrd(rLo, post(sp, 2 * Interpreter::stackElementSize));
 339 }
 340 
 341 void InterpreterMacroAssembler::push_ptr(Register r) {
 342   str(r, pre(sp, -wordSize));
 343 }
 344 
 345 void InterpreterMacroAssembler::push_i(Register r) {
 346   str(r, pre(sp, -wordSize));
 347 }
 348 
 349 void InterpreterMacroAssembler::push_l(Register rLo, Register rHi) {
 350     assert(r2->encoding() == r1->encoding() + 1, "must use two consecutive registers");
 351     strd(rLo, pre(sp, -2 * wordSize));
 352 }
 353 
 354 void InterpreterMacroAssembler::pop_f(FloatRegister r) {
 355   vldmia_f32(sp, FloatRegSet(r).bits());
 356 }
 357 
 358 void InterpreterMacroAssembler::pop_d(FloatRegister r) {
 359   assert(is_even(r->encoding()), "not double!");
 360   vldmia_f64(sp, DoubleFloatRegSet(r).bits());
 361 }
 362 
 363 void InterpreterMacroAssembler::push_f(FloatRegister r) {
 364   vstmdb_f32(sp, FloatRegSet(r).bits());
 365 }
 366 
 367 void InterpreterMacroAssembler::push_d(FloatRegister r) {
 368   assert(is_even(r->encoding()), "not double!");
 369   vstmdb_f64(sp, DoubleFloatRegSet(r).bits());
 370 }
 371 
 372 void InterpreterMacroAssembler::pop(TosState state) {
 373   switch (state) {
 374   case atos: pop_ptr();                 break;
 375   case btos:
 376   case ztos:
 377   case ctos:
 378   case stos:
 379   case itos: pop_i();                   break;
 380   case ltos: pop_l();                   break;
 381   case ftos:
 382     if(hasFPU()) {
 383         pop_f();
 384     } else {
 385         pop_i();
 386     }
 387     break;
 388   case dtos:
 389     if(hasFPU()) {
 390         pop_d();
 391     } else {
 392         pop_l();
 393     }
 394     break;
 395   case vtos: /* nothing to do */        break;
 396   default:   ShouldNotReachHere();
 397   }
 398   verify_oop(r0, state);
 399 }
 400 
 401 void InterpreterMacroAssembler::push(TosState state) {
 402   verify_oop(r0, state);
 403   switch (state) {
 404   case atos: push_ptr();                break;
 405   case btos:
 406   case ztos:
 407   case ctos:
 408   case stos:
 409   case itos: push_i();                  break;
 410   case ltos: push_l();                  break;
 411   case ftos:
 412     if(hasFPU()) {
 413         push_f();
 414     } else {
 415         push_i();
 416     }
 417     break;
 418   case dtos:
 419     if(hasFPU()) {
 420         push_d();
 421     } else {
 422         push_l();
 423     }
 424     break;
 425   case vtos: /* nothing to do */        break;
 426   default  : ShouldNotReachHere();
 427   }
 428 }
 429 
 430 // Helpers for swap and dup
 431 void InterpreterMacroAssembler::load_ptr(int n, Register val) {
 432   ldr(val, Address(sp, Interpreter::expr_offset_in_bytes(n)));
 433 }
 434 
 435 void InterpreterMacroAssembler::store_ptr(int n, Register val) {
 436   str(val, Address(sp, Interpreter::expr_offset_in_bytes(n)));
 437 }
 438 
 439 // Load ftos/dtos from given address
 440 void InterpreterMacroAssembler::load_float(Address src) {
 441   if (hasFPU()) {
 442     vldr_f32(f0, src);
 443   } else {
 444     ldr(r0, src);
 445   }
 446 }
 447 
 448 void InterpreterMacroAssembler::load_double(Address src) {
 449   if (hasFPU()) {
 450     vldr_f64(d0, src);
 451   } else {
 452     ldrd(r0, r1, src);
 453   }
 454 }
 455 
 456 void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
 457   // set sender sp
 458   mov(r4, sp);
 459   // record last_sp
 460   str(sp, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
 461 }
 462 
 463 void print_method_name(Method* m, char * msg) {
 464   if(MacroAssembler::enable_debug) {
 465     printf("%s", msg);
 466     fflush(stdout);
 467     m->print_short_name();
 468     printf("\n");
 469     fflush(stdout);
 470   }
 471 }
 472 
 473 // Jump to from_interpreted entry of a call unless single stepping is possible
 474 // in this thread in which case we must call the i2i entry
 475 void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
 476   prepare_to_jump_from_interpreted();
 477 
 478   if (JvmtiExport::can_post_interpreter_events()) {
 479     Label run_compiled_code;
 480     // JVMTI events, such as single-stepping, are implemented partly by avoiding running
 481     // compiled code in threads for which the event is enabled.  Check here for
 482     // interp_only_mode if these events CAN be enabled.
 483     // interp_only is an int, on little endian it is sufficient to test the byte only
 484     // Is a cmpl faster?
 485     ldr(temp, Address(rthread, JavaThread::interp_only_mode_offset()));
 486     cbz(temp, run_compiled_code);
 487     ldr(temp, Address(method, Method::interpreter_entry_offset()));
 488     b(temp);
 489     bind(run_compiled_code);
 490   }
 491 
 492   ldr(temp, Address(method, Method::from_interpreted_offset()));
 493   b(temp);
 494 }
 495 
 496 // The following two routines provide a hook so that an implementation
 497 // can schedule the dispatch in two parts.  amd64 does not do this.
 498 void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
 499 }
 500 
 501 void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
 502     dispatch_next(state, step);
 503 }
 504 
 505 void InterpreterMacroAssembler::dispatch_base(TosState state,
 506                                               address* table,
 507                                               bool verifyoop,
 508                                               bool generate_poll) {
 509   if (VerifyActivationFrameSize) {
 510     Unimplemented();
 511   }
 512   if (verifyoop) {
 513     verify_oop(r0, state);
 514   }
 515 
 516   /* Debugging code */
 517   bytecode_seen(rscratch1, r3);
 518 
 519   /*{
 520     Label skip;
 521 
 522     mov(r3, (address)&MacroAssembler::bytecodes_executed);
 523     ldr(r2, r3);
 524     add(r2, r2, 1);
 525     str(r2, r3);
 526     // Print out every 16384 (needs to be a power of two).
 527     mov(r3, 16384 - 1);
 528     tst(r2, r3);
 529     b(skip, Assembler::NE);
 530     reg_printf_important("Executed %d bytecodes.\n", r2);
 531     bind(skip);
 532   }*/
 533 
 534 
 535   /*mov(r3, (address)&MacroAssembler::bytecodes_until_print);
 536   ldr(r2, Address(r3));
 537   cmp(r2, 0);
 538 
 539   sub(r2, r2, 1, Assembler::NE);
 540   str(r2, Address(r3), Assembler::NE);
 541 
 542   mov(r2, 1, Assembler::EQ);
 543   mov(r3, (address)&MacroAssembler::enable_debug, Assembler::EQ);
 544   str(r2, Address(r3), Assembler::EQ);
 545 
 546   mov(r3, (address)&MacroAssembler::enable_method_debug, Assembler::EQ);
 547   str(r2, Address(r3), Assembler::EQ);*/
 548 
 549   /*Label end;
 550   cmp(r2, 0);
 551   b(end, Assembler::NE);
 552   stop("got to end of bytecodes");
 553   bind(end);*/
 554 
 555   get_bytecode(r14, rscratch1);
 556   reg_printf("Dispatching bytecode %s (%d) @ BCP = %p\n", r14, rscratch1, rbcp);
 557   /* End debugging code */
 558 
 559   Label safepoint;
 560   address* const safepoint_table = Interpreter::safept_table(state);
 561   bool needs_thread_local_poll = generate_poll &&
 562     SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
 563 
 564   if (needs_thread_local_poll) {
 565     NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
 566     ldr(rscratch2, Address(rthread, Thread::polling_page_offset()));
 567     tbnz(rscratch2, exact_log2(SafepointMechanism::poll_bit()), safepoint);
 568   }
 569 
 570   if (table == Interpreter::dispatch_table(state)) {
 571     add(rscratch2, rscratch1, Interpreter::distance_from_dispatch_table(state));
 572     ldr(r15_pc, Address(rdispatch, rscratch2, lsl(2)));
 573   } else {
 574     mov(rscratch2, (address)table);
 575     ldr(r15_pc, Address(rscratch2, rscratch1, lsl(2)));
 576   }
 577 
 578   if (needs_thread_local_poll) {
 579     bind(safepoint);
 580     lea(rscratch2, ExternalAddress((address)safepoint_table));
 581     ldr(r15_pc, Address(rscratch2, rscratch1, lsl(2)));
 582   }
 583 }
 584 
 585 void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
 586   dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
 587 }
 588 
 589 void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
 590   dispatch_base(state, Interpreter::normal_table(state));
 591 }
 592 
 593 void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
 594   dispatch_base(state, Interpreter::normal_table(state), false);
 595 }
 596 
 597 
 598 void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
 599   // load next bytecode
 600   ldrb(rscratch1, Address(pre(rbcp, step)));
 601   dispatch_base(state, Interpreter::dispatch_table(state), generate_poll);
 602 }
 603 
 604 void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
 605   // load current bytecode
 606   ldrb(rscratch1, Address(rbcp, 0));
 607   dispatch_base(state, table);
 608 }
 609 
 610 // remove activation
 611 //
 612 // Unlock the receiver if this is a synchronized method.
 613 // Unlock any Java monitors from syncronized blocks.
 614 // Remove the activation from the stack.
 615 //
 616 // If there are locked Java monitors
 617 //    If throw_monitor_exception
 618 //       throws IllegalMonitorStateException
 619 //    Else if install_monitor_exception
 620 //       installs IllegalMonitorStateException
 621 //    Else
 622 //       no error processing
 623 void InterpreterMacroAssembler::remove_activation(
 624         TosState state,
 625         bool throw_monitor_exception,
 626         bool install_monitor_exception,
 627         bool notify_jvmdi) {
 628   // Note: Registers r3 xmm0 may be in use for the
 629   // result check if synchronized method
 630   Label unlocked, unlock, no_unlock;
 631 
 632   // get the value of _do_not_unlock_if_synchronized into r3
 633   const Address do_not_unlock_if_synchronized(rthread,
 634     in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
 635   ldrb(r3, do_not_unlock_if_synchronized);
 636   mov(rscratch1, 0);
 637   strb(rscratch1, do_not_unlock_if_synchronized); // reset the flag
 638 
 639  // get method access flags
 640   ldr(rscratch1, Address(rfp, frame::get_interpreter_frame_method_offset() * wordSize));
 641   ldr(r2, Address(rscratch1, Method::access_flags_offset()));
 642   tst(r2, JVM_ACC_SYNCHRONIZED);
 643   b(unlocked, Assembler::EQ);
 644 
 645   // Don't unlock anything if the _do_not_unlock_if_synchronized flag
 646   // is set.
 647   cbnz(r3, no_unlock);
 648 
 649   // unlock monitor
 650   push(state); // save result
 651 
 652   // BasicObjectLock will be first in list, since this is a
 653   // synchronized method. However, need to check that the object has
 654   // not been unlocked by an explicit monitorexit bytecode.
 655   const Address monitor(rfp, frame::get_interpreter_frame_initial_sp_offset() *
 656                         wordSize - (int) sizeof(BasicObjectLock));
 657   // We use c_rarg1 so that if we go slow path it will be the correct
 658   // register for unlock_object to pass to VM directly
 659   lea(c_rarg1, monitor); // address of first monitor
 660 
 661   ldr(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
 662   cbnz(r0, unlock);
 663 
 664   pop(state);
 665   if (throw_monitor_exception) {
 666     // Entry already unlocked, need to throw exception
 667     call_VM(noreg, CAST_FROM_FN_PTR(address,
 668                    InterpreterRuntime::throw_illegal_monitor_state_exception));
 669     should_not_reach_here();
 670   } else {
 671     // Monitor already unlocked during a stack unroll. If requested,
 672     // install an illegal_monitor_state_exception.  Continue with
 673     // stack unrolling.
 674     if (install_monitor_exception) {
 675       call_VM(noreg, CAST_FROM_FN_PTR(address,
 676                      InterpreterRuntime::new_illegal_monitor_state_exception));
 677     }
 678     b(unlocked);
 679   }
 680 
 681   bind(unlock);
 682   unlock_object(c_rarg1);
 683   pop(state);
 684 
 685   // Check that for block-structured locking (i.e., that all locked
 686   // objects has been unlocked)
 687   bind(unlocked);
 688 
 689   // r0: Might contain return value
 690   // FIXME r1 : Might contain the value too
 691 
 692   // Check that all monitors are unlocked
 693   {
 694     Label loop, exception, entry, restart;
 695     const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 696     const Address monitor_block_top(
 697         rfp, frame::get_interpreter_frame_monitor_block_top_offset() * wordSize);
 698     const Address monitor_block_bot(
 699         rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize);
 700 
 701     bind(restart);
 702     // We can't use c_rarg1 as it might contain a result
 703     ldr(c_rarg2, monitor_block_top); // points to current entry, starting
 704                                      // with top-most entry
 705     lea(r14, monitor_block_bot);  // points to word before bottom of
 706                                   // monitor block
 707     b(entry);
 708 
 709     // Entry already locked, need to throw exception
 710     bind(exception);
 711 
 712     if (throw_monitor_exception) {
 713       // Throw exception
 714       MacroAssembler::call_VM(noreg,
 715                               CAST_FROM_FN_PTR(address, InterpreterRuntime::
 716                                    throw_illegal_monitor_state_exception));
 717       should_not_reach_here();
 718     } else {
 719       // Stack unrolling. Unlock object and install illegal_monitor_exception.
 720       // Unlock does not block, so don't have to worry about the frame.
 721       // We don't have to preserve c_rarg1 since we are going to throw an exception.
 722 
 723       push(state);
 724       mov(c_rarg1, c_rarg2);
 725       unlock_object(c_rarg1);
 726       pop(state);
 727 
 728       if (install_monitor_exception) {
 729         call_VM(noreg, CAST_FROM_FN_PTR(address,
 730                                         InterpreterRuntime::
 731                                         new_illegal_monitor_state_exception));
 732       }
 733 
 734       b(restart);
 735     }
 736 
 737     bind(loop);
 738     // check if current entry is used
 739     ldr(rscratch1, Address(c_rarg2, BasicObjectLock::obj_offset_in_bytes()));
 740     cbnz(rscratch1, exception);
 741 
 742     add(c_rarg2, c_rarg2, entry_size); // otherwise advance to next entry
 743     bind(entry);
 744     cmp(c_rarg2, r14); // check if bottom reached
 745     b(loop, Assembler::NE); // if not at bottom then check this entry
 746   }
 747 
 748   bind(no_unlock);
 749 
 750   // jvmti support
 751   if (notify_jvmdi) {
 752     notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
 753   } else {
 754     notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
 755   }
 756 
 757   if (StackReservedPages > 0) {
 758     // testing if reserved zone needs to be re-enabled
 759     Label no_reserved_zone_enabling;
 760 
 761     ldr(rscratch1, Address(rthread, JavaThread::reserved_stack_activation_offset()));
 762     cmp(sp, rscratch1);
 763     b(no_reserved_zone_enabling, Assembler::LS);
 764 
 765     call_VM_leaf(
 766       CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), rthread);
 767     call_VM(noreg, CAST_FROM_FN_PTR(address,
 768                    InterpreterRuntime::throw_delayed_StackOverflowError));
 769     should_not_reach_here();
 770 
 771     bind(no_reserved_zone_enabling);
 772   }
 773 
 774   // remove activation
 775   // get sender sp
 776   ldr(rscratch1,
 777       Address(rfp, frame::get_interpreter_frame_sender_sp_offset() * wordSize));
 778   // remove frame anchor
 779   leave();
 780   // If we're returning to interpreted code we will shortly be
 781   // adjusting SP to allow some space for ESP.  If we're returning to
 782   // compiled code the saved sender SP was saved in sender_sp, so this
 783   // restores it.
 784   //bic(sp, rscratch1, 0xf); changed to not drop it as this is the sp
 785   mov(sp, rscratch1);
 786 }
 787 
 788 // Lock object
 789 //
 790 // Args:
 791 //      c_rarg1: BasicObjectLock to be used for locking
 792 //
 793 // Kills:
 794 //      r0
 795 //      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
 796 //      rscratch1, rscratch2 (scratch regs)
 797 void InterpreterMacroAssembler::lock_object(Register lock_reg)
 798 {
 799   reg_printf("LOCK:\n");
 800   assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
 801   if (UseHeavyMonitors) {
 802     call_VM(noreg,
 803             CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
 804             lock_reg);
 805   } else {
 806     Label done;
 807 
 808     const Register swap_reg = r0;
 809     const Register obj_reg = c_rarg3; // Will contain the oop
 810 
 811     const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
 812     const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
 813     const int mark_offset = lock_offset +
 814                             BasicLock::displaced_header_offset_in_bytes();
 815 
 816     Label slow_case;
 817 
 818     // Load object pointer into obj_reg %c_rarg3
 819     ldr(obj_reg, Address(lock_reg, obj_offset));
 820 
 821     if (UseBiasedLocking) {
 822       biased_locking_enter(obj_reg, swap_reg, rscratch2, rscratch1, false, done, &slow_case);
 823     }
 824 
 825     // Load (object->mark() | 1) into swap_reg
 826     ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 827     orr(swap_reg, rscratch1, 1);
 828 
 829     // Save (object->mark() | 1) into BasicLock's displaced header
 830     str(swap_reg, Address(lock_reg, mark_offset));
 831 
 832     assert(lock_offset == 0,
 833            "displached header must be first word in BasicObjectLock");
 834 
 835     Label fail;
 836     if (PrintBiasedLockingStatistics) {
 837       Label fast;
 838       cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
 839       bind(fast);
 840       atomic_inc(Address((address)BiasedLocking::fast_path_entry_count_addr()),
 841                   rscratch2, rscratch1);
 842       b(done);
 843       bind(fail);
 844     } else {
 845       cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
 846     }
 847 
 848     // Test if the oopMark is an obvious stack pointer, i.e.,
 849     //  1) (mark & 7) == 0, and
 850     //  2) rsp <= mark < mark + os::pagesize()
 851     //
 852     // These 3 tests can be done by evaluating the following
 853     // expression: ((mark - rsp) & (7 - os::vm_page_size())),
 854     // assuming both stack pointer and pagesize have their
 855     // least significant 3 bits clear.
 856     // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg
 857     // NOTE2: aarch32 does not like to subtract sp from rn so take a
 858     // copy
 859 
 860 
 861     //mov(rscratch1, sp);
 862     //sub(swap_reg, swap_reg, rscratch1);
 863     //ands(swap_reg, swap_reg, (unsigned long)(7 - os::vm_page_size()));
 864     sub(swap_reg, swap_reg, sp);
 865     mov(rscratch1, (os::vm_page_size() - 1) & ~0b11);
 866     bics(swap_reg, swap_reg, rscratch1);
 867 
 868     // Save the test result, for recursive case, the result is zero
 869     str(swap_reg, Address(lock_reg, mark_offset));
 870 
 871     if (PrintBiasedLockingStatistics) {
 872       b(slow_case, Assembler::NE);
 873       atomic_inc(Address((address)BiasedLocking::fast_path_entry_count_addr()),
 874                   rscratch2, rscratch1);
 875     }
 876     b(done, Assembler::EQ);
 877 
 878     bind(slow_case);
 879 
 880     // Call the runtime routine for slow case
 881     call_VM(noreg,
 882             CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
 883             lock_reg);
 884 
 885     bind(done);
 886   }
 887 }
 888 
 889 
 890 // Unlocks an object. Used in monitorexit bytecode and
 891 // remove_activation.  Throws an IllegalMonitorException if object is
 892 // not locked by current thread.
 893 //
 894 // Args:
 895 //      c_rarg1: BasicObjectLock for lock
 896 //
 897 // Kills:
 898 //      r0
 899 //      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
 900 //      rscratch1, rscratch2 (scratch regs)
 901 void InterpreterMacroAssembler::unlock_object(Register lock_reg)
 902 {
 903   assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
 904 
 905   reg_printf("UNLOCK:\n");
 906   if (UseHeavyMonitors) {
 907     call_VM(noreg,
 908             CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
 909             lock_reg);
 910   } else {
 911     Label done;
 912 
 913     //create_breakpoint();
 914     const Register swap_reg   = c_rarg0;
 915     const Register header_reg = c_rarg2;  // Will contain the old oopMark
 916     const Register obj_reg    = c_rarg3;  // Will contain the oop
 917 
 918     save_bcp(); // Save in case of exception
 919 
 920     // Convert from BasicObjectLock structure to object and BasicLock
 921     // structure Store the BasicLock address into %r0
 922     lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
 923 
 924     // Load oop into obj_reg(%c_rarg3)
 925     ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
 926 
 927     // Free entry
 928     mov(rscratch2, 0);
 929     str(rscratch2, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
 930 
 931     if (UseBiasedLocking) {
 932       biased_locking_exit(obj_reg, header_reg, done);
 933     }
 934 
 935     // Load the old header from BasicLock structure
 936     ldr(header_reg, Address(swap_reg,
 937                             BasicLock::displaced_header_offset_in_bytes()));
 938 
 939     // Test for recursion
 940     cbz(header_reg, done);
 941 
 942     // Atomic swap back the old header
 943     cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
 944 
 945     // Call the runtime routine for slow case.
 946     str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
 947     call_VM(noreg,
 948             CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
 949             lock_reg);
 950 
 951     bind(done);
 952 
 953     restore_bcp();
 954   }
 955 }
 956 
 957 void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
 958                                                          Label& zero_continue) {
 959   assert(ProfileInterpreter, "must be profiling interpreter");
 960   ldr(mdp, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize));
 961   cbz(mdp, zero_continue);
 962 }
 963 
 964 // Set the method data pointer for the current bcp.
 965 void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
 966   assert(ProfileInterpreter, "must be profiling interpreter");
 967   Label set_mdp;
 968   strd(r0, r1, Address(pre(sp, -2 * wordSize)));
 969 
 970   // Test MDO to avoid the call if it is NULL.
 971   ldr(r0, Address(rmethod, in_bytes(Method::method_data_offset())));
 972   cbz(r0, set_mdp);
 973   call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), rmethod, rbcp);
 974   // r0: mdi
 975   // mdo is guaranteed to be non-zero here, we checked for it before the call.
 976   ldr(r1, Address(rmethod, in_bytes(Method::method_data_offset())));
 977   lea(r1, Address(r1, in_bytes(MethodData::data_offset())));
 978   add(r0, r1, r0);
 979   str(r0, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize));
 980   bind(set_mdp);
 981   ldrd(r0, r1, Address(post(sp, 2 * wordSize)));
 982 }
 983 
 984 void InterpreterMacroAssembler::verify_method_data_pointer() {
 985   assert(ProfileInterpreter, "must be profiling interpreter");
 986 #ifdef ASSERT
 987   Label verify_continue;
 988   strd(r0, r1, Address(pre(sp, -2 * wordSize)));
 989   strd(r2, r3, Address(pre(sp, -2 * wordSize)));
 990   test_method_data_pointer(r3, verify_continue); // If mdp is zero, continue
 991   get_method(r1);
 992 
 993   // If the mdp is valid, it will point to a DataLayout header which is
 994   // consistent with the bcp.  The converse is highly probable also.
 995   ldrsh(r2, Address(r3, in_bytes(DataLayout::bci_offset())));
 996   ldr(rscratch1, Address(r1, Method::const_offset()));
 997   add(r2, r2, rscratch1);
 998   lea(r2, Address(r2, ConstMethod::codes_offset()));
 999   cmp(r2, rbcp);
1000   b(verify_continue, Assembler::EQ);
1001   // r1: method
1002   // rbcp: bcp // rbcp == 22
1003   // r3: mdp
1004   call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
1005                r1, rbcp, r3);
1006   bind(verify_continue);
1007   ldrd(r2, r3, Address(post(sp, 2 * wordSize)));
1008   ldrd(r0, r1, Address(post(sp, 2 * wordSize)));
1009 #endif // ASSERT
1010 }
1011 
1012 
1013 void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
1014                                                 int constant,
1015                                                 Register value) {
1016   assert(ProfileInterpreter, "must be profiling interpreter");
1017   Address data(mdp_in, constant);
1018   str(value, data);
1019 }
1020 
1021 
1022 void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
1023                                                       int constant,
1024                                                       bool decrement) {
1025   increment_mdp_data_at(mdp_in, noreg, constant, decrement);
1026 }
1027 
1028 void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
1029                                                       Register reg,
1030                                                       int constant,
1031                                                       bool decrement) {
1032   assert(ProfileInterpreter, "must be profiling interpreter");
1033   // %%% this does 64bit counters at best it is wasting space
1034   // at worst it is a rare bug when counters overflow
1035 
1036   assert_different_registers(rscratch2, rscratch1, mdp_in, reg);
1037 
1038   Address addr1(mdp_in, constant);
1039   Address addr2(rscratch2, reg, lsl(0));
1040   Address &addr = addr1;
1041   if (reg != noreg) {
1042     lea(rscratch2, addr1);
1043     addr = addr2;
1044   }
1045 
1046   if (decrement) {
1047     // Decrement the register.  Set condition codes.
1048     // Intel does this
1049     // addptr(data, (int32_t) -DataLayout::counter_increment);
1050     // If the decrement causes the counter to overflow, stay negative
1051     // Label L;
1052     // jcc(Assembler::negative, L);
1053     // addptr(data, (int32_t) DataLayout::counter_increment);
1054     // so we do this
1055     ldr(rscratch1, addr);
1056     subs(rscratch1, rscratch1, (unsigned)DataLayout::counter_increment);
1057     Label L;
1058     b(L, Assembler::LO);       // skip store if counter underflow
1059     str(rscratch1, addr);
1060     bind(L);
1061   } else {
1062     assert(DataLayout::counter_increment == 1,
1063            "flow-free idiom only works with 1");
1064     // Intel does this
1065     // Increment the register.  Set carry flag.
1066     // addptr(data, DataLayout::counter_increment);
1067     // If the increment causes the counter to overflow, pull back by 1.
1068     // sbbptr(data, (int32_t)0);
1069     // so we do this
1070     ldr(rscratch1, addr);
1071     adds(rscratch1, rscratch1, DataLayout::counter_increment);
1072     Label L;
1073     b(L, Assembler::CS);       // skip store if counter overflow
1074     str(rscratch1, addr);
1075     bind(L);
1076   }
1077 }
1078 
1079 void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
1080                                                 int flag_byte_constant) {
1081   assert(ProfileInterpreter, "must be profiling interpreter");
1082   int flags_offset = in_bytes(DataLayout::flags_offset());
1083   // Set the flag
1084   ldrb(rscratch1, Address(mdp_in, flags_offset));
1085   orr(rscratch1, rscratch1, flag_byte_constant);
1086   strb(rscratch1, Address(mdp_in, flags_offset));
1087 }
1088 
1089 
1090 void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
1091                                                  int offset,
1092                                                  Register value,
1093                                                  Register test_value_out,
1094                                                  Label& not_equal_continue) {
1095   assert(ProfileInterpreter, "must be profiling interpreter");
1096   if (test_value_out == noreg) {
1097     ldr(rscratch1, Address(mdp_in, offset));
1098     cmp(value, rscratch1);
1099   } else {
1100     // Put the test value into a register, so caller can use it:
1101     ldr(test_value_out, Address(mdp_in, offset));
1102     cmp(value, test_value_out);
1103   }
1104   b(not_equal_continue, Assembler::NE);
1105 }
1106 
1107 
1108 void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
1109                                                      int offset_of_disp) {
1110   assert(ProfileInterpreter, "must be profiling interpreter");
1111   ldr(rscratch1, Address(mdp_in, offset_of_disp));
1112   add(mdp_in, mdp_in, rscratch1);
1113   str(mdp_in, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize));
1114 }
1115 
1116 
1117 void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
1118                                                      Register reg,
1119                                                      int offset_of_disp) {
1120   assert(ProfileInterpreter, "must be profiling interpreter");
1121   lea(rscratch1, Address(mdp_in, offset_of_disp));
1122   ldr(rscratch1, Address(rscratch1, reg, lsl()));
1123   add(mdp_in, mdp_in, rscratch1);
1124   str(mdp_in, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize));
1125 }
1126 
1127 
1128 void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
1129                                                        int constant) {
1130   assert(ProfileInterpreter, "must be profiling interpreter");
1131   add(mdp_in, mdp_in, (unsigned) constant);
1132   str(mdp_in, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize));
1133 }
1134 
1135 
1136 void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
1137   assert(ProfileInterpreter, "must be profiling interpreter");
1138   // save/restore across call_VM
1139   mov(rscratch1, 0);
1140   strd(rscratch1, return_bci, Address(pre(sp, -2 * wordSize)));
1141   call_VM(noreg,
1142           CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
1143           return_bci);
1144   ldrd(rscratch1, return_bci, Address(post(sp, 2 * wordSize)));
1145 }
1146 
1147 
1148 void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
1149                                                      Register bumped_count) {
1150   if (ProfileInterpreter) {
1151     Label profile_continue;
1152 
1153     // If no method data exists, go to profile_continue.
1154     // Otherwise, assign to mdp
1155     test_method_data_pointer(mdp, profile_continue);
1156 
1157     // We are taking a branch.  Increment the taken count.
1158     // We inline increment_mdp_data_at to return bumped_count in a register
1159     //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
1160     Address data(mdp, in_bytes(JumpData::taken_offset()));
1161     ldr(bumped_count, data);
1162     assert(DataLayout::counter_increment == 1,
1163             "flow-free idiom only works with 1");
1164     // Intel does this to catch overflow
1165     // addptr(bumped_count, DataLayout::counter_increment);
1166     // sbbptr(bumped_count, 0);
1167     // so we do this
1168     adds(bumped_count, bumped_count, DataLayout::counter_increment);
1169     Label L;
1170     b(L, Assembler::CS);       // skip store if counter overflow
1171     str(bumped_count, data);
1172     bind(L);
1173     // The method data pointer needs to be updated to reflect the new target.
1174     update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
1175     bind(profile_continue);
1176   }
1177 }
1178 
1179 
1180 void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
1181   if (ProfileInterpreter) {
1182     Label profile_continue;
1183 
1184     // If no method data exists, go to profile_continue.
1185     test_method_data_pointer(mdp, profile_continue);
1186 
1187     // We are taking a branch.  Increment the not taken count.
1188     increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
1189 
1190     // The method data pointer needs to be updated to correspond to
1191     // the next bytecode
1192     update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
1193     bind(profile_continue);
1194   }
1195 }
1196 
1197 
1198 void InterpreterMacroAssembler::profile_call(Register mdp) {
1199   if (ProfileInterpreter) {
1200     Label profile_continue;
1201 
1202     // If no method data exists, go to profile_continue.
1203     test_method_data_pointer(mdp, profile_continue);
1204 
1205     // We are making a call.  Increment the count.
1206     increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
1207 
1208     // The method data pointer needs to be updated to reflect the new target.
1209     update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
1210     bind(profile_continue);
1211   }
1212 }
1213 
1214 void InterpreterMacroAssembler::profile_final_call(Register mdp) {
1215   if (ProfileInterpreter) {
1216     Label profile_continue;
1217 
1218     // If no method data exists, go to profile_continue.
1219     test_method_data_pointer(mdp, profile_continue);
1220 
1221     // We are making a call.  Increment the count.
1222     increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
1223 
1224     // The method data pointer needs to be updated to reflect the new target.
1225     update_mdp_by_constant(mdp,
1226                            in_bytes(VirtualCallData::
1227                                     virtual_call_data_size()));
1228     bind(profile_continue);
1229   }
1230 }
1231 
1232 
1233 void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
1234                                                      Register mdp,
1235                                                      Register reg2,
1236                                                      bool receiver_can_be_null) {
1237   if (ProfileInterpreter) {
1238     Label profile_continue;
1239 
1240     // If no method data exists, go to profile_continue.
1241     test_method_data_pointer(mdp, profile_continue);
1242 
1243     Label skip_receiver_profile;
1244     if (receiver_can_be_null) {
1245       Label not_null;
1246       // We are making a call.  Increment the count for null receiver.
1247       increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
1248       b(skip_receiver_profile);
1249       bind(not_null);
1250     }
1251 
1252     // Record the receiver type.
1253     record_klass_in_profile(receiver, mdp, reg2, true);
1254     bind(skip_receiver_profile);
1255 
1256     // The method data pointer needs to be updated to reflect the new target.
1257 #if INCLUDE_JVMCI
1258     if (MethodProfileWidth == 0) {
1259       update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
1260     }
1261 #else // INCLUDE_JVMCI
1262     update_mdp_by_constant(mdp,
1263                            in_bytes(VirtualCallData::
1264                                     virtual_call_data_size()));
1265 #endif // INCLUDE_JVMCI
1266     bind(profile_continue);
1267   }
1268 }
1269 
1270 #if INCLUDE_JVMCI
1271 void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
1272   assert_different_registers(method, mdp, reg2);
1273   if (ProfileInterpreter && MethodProfileWidth > 0) {
1274     Label profile_continue;
1275 
1276     // If no method data exists, go to profile_continue.
1277     test_method_data_pointer(mdp, profile_continue);
1278 
1279     Label done;
1280     record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
1281       &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
1282     bind(done);
1283 
1284     update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
1285     bind(profile_continue);
1286   }
1287 }
1288 #endif // INCLUDE_JVMCI
1289 
1290 // This routine creates a state machine for updating the multi-row
1291 // type profile at a virtual call site (or other type-sensitive bytecode).
1292 // The machine visits each row (of receiver/count) until the receiver type
1293 // is found, or until it runs out of rows.  At the same time, it remembers
1294 // the location of the first empty row.  (An empty row records null for its
1295 // receiver, and can be allocated for a newly-observed receiver type.)
1296 // Because there are two degrees of freedom in the state, a simple linear
1297 // search will not work; it must be a decision tree.  Hence this helper
1298 // function is recursive, to generate the required tree structured code.
1299 // It's the interpreter, so we are trading off code space for speed.
1300 // See below for example code.
1301 void InterpreterMacroAssembler::record_klass_in_profile_helper(
1302                                         Register receiver, Register mdp,
1303                                         Register reg2, int start_row,
1304                                         Label& done, bool is_virtual_call) {
1305   if (TypeProfileWidth == 0) {
1306     if (is_virtual_call) {
1307       increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
1308     }
1309 #if INCLUDE_JVMCI
1310     else if (EnableJVMCI) {
1311       increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()));
1312     }
1313 #endif // INCLUDE_JVMCI
1314   } else {
1315     int non_profiled_offset = -1;
1316     if (is_virtual_call) {
1317       non_profiled_offset = in_bytes(CounterData::count_offset());
1318     }
1319 #if INCLUDE_JVMCI
1320     else if (EnableJVMCI) {
1321       non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
1322     }
1323 #endif // INCLUDE_JVMCI
1324 
1325     record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
1326         &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
1327   }
1328 }
1329 
1330 void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
1331                                         Register reg2, int start_row, Label& done, int total_rows,
1332                                         OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
1333                                         int non_profiled_offset) {
1334   int last_row = total_rows - 1;
1335   assert(start_row <= last_row, "must be work left to do");
1336   // Test this row for both the item and for null.
1337   // Take any of three different outcomes:
1338   //   1. found item => increment count and goto done
1339   //   2. found null => keep looking for case 1, maybe allocate this cell
1340   //   3. found something else => keep looking for cases 1 and 2
1341   // Case 3 is handled by a recursive call.
1342   for (int row = start_row; row <= last_row; row++) {
1343     Label next_test;
1344     bool test_for_null_also = (row == start_row);
1345 
1346     // See if the item is item[n].
1347     int item_offset = in_bytes(item_offset_fn(row));
1348     test_mdp_data_at(mdp, item_offset, item,
1349                      (test_for_null_also ? reg2 : noreg),
1350                      next_test);
1351     // (Reg2 now contains the item from the CallData.)
1352 
1353     // The item is item[n].  Increment count[n].
1354     int count_offset = in_bytes(item_count_offset_fn(row));
1355     increment_mdp_data_at(mdp, count_offset);
1356     b(done);
1357     bind(next_test);
1358 
1359     if (test_for_null_also) {
1360       Label found_null;
1361       // Failed the equality check on item[n]...  Test for null.
1362       if (start_row == last_row) {
1363         // The only thing left to do is handle the null case.
1364         if (non_profiled_offset >= 0) {
1365           cbz(reg2, found_null);
1366           // Item did not match any saved item and there is no empty row for it.
1367           // Increment total counter to indicate polymorphic case.
1368           increment_mdp_data_at(mdp, non_profiled_offset);
1369           b(done);
1370           bind(found_null);
1371         } else {
1372           cbnz(reg2, done);
1373         }
1374         break;
1375       }
1376       // Since null is rare, make it be the branch-taken case.
1377       cbz(reg2,found_null);
1378 
1379       // Put all the "Case 3" tests here.
1380       record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
1381         item_offset_fn, item_count_offset_fn, non_profiled_offset);
1382 
1383       // Found a null.  Keep searching for a matching item,
1384       // but remember that this is an empty (unused) slot.
1385       bind(found_null);
1386     }
1387   }
1388 
1389   // In the fall-through case, we found no matching item, but we
1390   // observed the item[start_row] is NULL.
1391 
1392   // Fill in the item field and increment the count.
1393   int item_offset = in_bytes(item_offset_fn(start_row));
1394   set_mdp_data_at(mdp, item_offset, item);
1395   int count_offset = in_bytes(item_count_offset_fn(start_row));
1396   mov(reg2, DataLayout::counter_increment);
1397   set_mdp_data_at(mdp, count_offset, reg2);
1398   if (start_row > 0) {
1399     b(done);
1400   }
1401 }
1402 
1403 // Example state machine code for three profile rows:
1404 //   // main copy of decision tree, rooted at row[1]
1405 //   if (row[0].rec == rec) { row[0].incr(); goto done; }
1406 //   if (row[0].rec != NULL) {
1407 //     // inner copy of decision tree, rooted at row[1]
1408 //     if (row[1].rec == rec) { row[1].incr(); goto done; }
1409 //     if (row[1].rec != NULL) {
1410 //       // degenerate decision tree, rooted at row[2]
1411 //       if (row[2].rec == rec) { row[2].incr(); goto done; }
1412 //       if (row[2].rec != NULL) { count.incr(); goto done; } // overflow
1413 //       row[2].init(rec); goto done;
1414 //     } else {
1415 //       // remember row[1] is empty
1416 //       if (row[2].rec == rec) { row[2].incr(); goto done; }
1417 //       row[1].init(rec); goto done;
1418 //     }
1419 //   } else {
1420 //     // remember row[0] is empty
1421 //     if (row[1].rec == rec) { row[1].incr(); goto done; }
1422 //     if (row[2].rec == rec) { row[2].incr(); goto done; }
1423 //     row[0].init(rec); goto done;
1424 //   }
1425 //   done:
1426 
1427 void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
1428                                                         Register mdp, Register reg2,
1429                                                         bool is_virtual_call) {
1430   assert(ProfileInterpreter, "must be profiling");
1431   Label done;
1432 
1433   record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
1434 
1435   bind (done);
1436 }
1437 
1438 void InterpreterMacroAssembler::profile_ret(Register return_bci,
1439                                             Register mdp) {
1440   if (ProfileInterpreter) {
1441     Label profile_continue;
1442     uint row;
1443 
1444     // If no method data exists, go to profile_continue.
1445     test_method_data_pointer(mdp, profile_continue);
1446 
1447     // Update the total ret count.
1448     increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
1449 
1450     for (row = 0; row < RetData::row_limit(); row++) {
1451       Label next_test;
1452 
1453       // See if return_bci is equal to bci[n]:
1454       test_mdp_data_at(mdp,
1455                        in_bytes(RetData::bci_offset(row)),
1456                        return_bci, noreg,
1457                        next_test);
1458 
1459       // return_bci is equal to bci[n].  Increment the count.
1460       increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
1461 
1462       // The method data pointer needs to be updated to reflect the new target.
1463       update_mdp_by_offset(mdp,
1464                            in_bytes(RetData::bci_displacement_offset(row)));
1465       b(profile_continue);
1466       bind(next_test);
1467     }
1468 
1469     update_mdp_for_ret(return_bci);
1470 
1471     bind(profile_continue);
1472   }
1473 }
1474 
1475 void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
1476   if (ProfileInterpreter) {
1477     Label profile_continue;
1478 
1479     // If no method data exists, go to profile_continue.
1480     test_method_data_pointer(mdp, profile_continue);
1481 
1482     set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
1483 
1484     // The method data pointer needs to be updated.
1485     int mdp_delta = in_bytes(BitData::bit_data_size());
1486     if (TypeProfileCasts) {
1487       mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
1488     }
1489     update_mdp_by_constant(mdp, mdp_delta);
1490 
1491     bind(profile_continue);
1492   }
1493 }
1494 
1495 void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
1496   if (ProfileInterpreter && TypeProfileCasts) {
1497     Label profile_continue;
1498 
1499     // If no method data exists, go to profile_continue.
1500     test_method_data_pointer(mdp, profile_continue);
1501 
1502     int count_offset = in_bytes(CounterData::count_offset());
1503     // Back up the address, since we have already bumped the mdp.
1504     count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
1505 
1506     // *Decrement* the counter.  We expect to see zero or small negatives.
1507     increment_mdp_data_at(mdp, count_offset, true);
1508 
1509     bind (profile_continue);
1510   }
1511 }
1512 
1513 void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
1514   if (ProfileInterpreter) {
1515     Label profile_continue;
1516 
1517     // If no method data exists, go to profile_continue.
1518     test_method_data_pointer(mdp, profile_continue);
1519 
1520     // The method data pointer needs to be updated.
1521     int mdp_delta = in_bytes(BitData::bit_data_size());
1522     if (TypeProfileCasts) {
1523       mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
1524 
1525       // Record the object type.
1526       record_klass_in_profile(klass, mdp, reg2, false);
1527     }
1528     update_mdp_by_constant(mdp, mdp_delta);
1529 
1530     bind(profile_continue);
1531   }
1532 }
1533 
1534 void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
1535   if (ProfileInterpreter) {
1536     Label profile_continue;
1537 
1538     // If no method data exists, go to profile_continue.
1539     test_method_data_pointer(mdp, profile_continue);
1540 
1541     // Update the default case count
1542     increment_mdp_data_at(mdp,
1543                           in_bytes(MultiBranchData::default_count_offset()));
1544 
1545     // The method data pointer needs to be updated.
1546     update_mdp_by_offset(mdp,
1547                          in_bytes(MultiBranchData::
1548                                   default_displacement_offset()));
1549 
1550     bind(profile_continue);
1551   }
1552 }
1553 
1554 void InterpreterMacroAssembler::profile_switch_case(Register index,
1555                                                     Register mdp,
1556                                                     Register reg2) {
1557   if (ProfileInterpreter) {
1558     Label profile_continue;
1559 
1560     // If no method data exists, go to profile_continue.
1561     test_method_data_pointer(mdp, profile_continue);
1562 
1563     // Build the base (index * per_case_size_in_bytes()) +
1564     // case_array_offset_in_bytes()
1565     mov(reg2, in_bytes(MultiBranchData::per_case_size()));
1566     mov(rscratch1, in_bytes(MultiBranchData::case_array_offset()));
1567     Assembler::mla(index, index, reg2, rscratch1);
1568 
1569     // Update the case count
1570     increment_mdp_data_at(mdp,
1571                           index,
1572                           in_bytes(MultiBranchData::relative_count_offset()));
1573 
1574     // The method data pointer needs to be updated.
1575     update_mdp_by_offset(mdp,
1576                          index,
1577                          in_bytes(MultiBranchData::
1578                                   relative_displacement_offset()));
1579 
1580     bind(profile_continue);
1581   }
1582 }
1583 
1584 void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
1585   if (state == atos) {
1586     MacroAssembler::verify_oop(reg);
1587   }
1588 }
1589 
1590 void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
1591 
1592 
1593 void InterpreterMacroAssembler::notify_method_entry() {
1594   // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
1595   // track stack depth.  If it is possible to enter interp_only_mode we add
1596   // the code to check if the event should be sent.
1597   if (JvmtiExport::can_post_interpreter_events()) {
1598     Label L;
1599     ldr(r3, Address(rthread, JavaThread::interp_only_mode_offset()));
1600     cbz(r3, L);
1601     call_VM(noreg, CAST_FROM_FN_PTR(address,
1602                                     InterpreterRuntime::post_method_entry));
1603     bind(L);
1604   }
1605 
1606 #ifdef DTRACE_ENABLED
1607   {
1608     SkipIfEqual skip(this, &DTraceMethodProbes, false);
1609     get_method(c_rarg1);
1610     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
1611                  rthread, c_rarg1);
1612   }
1613 #endif
1614 
1615   // RedefineClasses() tracing support for obsolete method entry
1616   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1617     get_method(c_rarg1);
1618     call_VM_leaf(
1619       CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
1620       rthread, c_rarg1);
1621   }
1622 
1623 }
1624 
1625 
1626 void InterpreterMacroAssembler::notify_method_exit(
1627     TosState state, NotifyMethodExitMode mode) {
1628   // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
1629   // track stack depth.  If it is possible to enter interp_only_mode we add
1630   // the code to check if the event should be sent.
1631   if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
1632     Label L;
1633     // Note: frame::interpreter_frame_result has a dependency on how the
1634     // method result is saved across the call to post_method_exit. If this
1635     // is changed then the interpreter_frame_result implementation will
1636     // need to be updated too.
1637 
1638     push(state);
1639     ldr(r3, Address(rthread, JavaThread::interp_only_mode_offset()));
1640     cbz(r3, L);
1641     call_VM(noreg,
1642             CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
1643     bind(L);
1644     pop(state);
1645   }
1646 
1647 #ifdef DTRACE_ENABLED
1648   {
1649     SkipIfEqual skip(this, &DTraceMethodProbes, false);
1650     push(state);
1651     get_method(c_rarg1);
1652     call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
1653                  rthread, c_rarg1);
1654     pop(state);
1655   }
1656 #endif
1657 }
1658 
1659 
1660 // Jump if ((*counter_addr += increment) & mask) satisfies the condition.
1661 void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
1662                                                         int increment, Address mask,
1663                                                         Register scratch, Register scratch2,
1664                                                         bool preloaded, Condition cond,
1665                                                         Label* where) {
1666   if (!preloaded) {
1667     ldr(scratch, counter_addr);
1668   }
1669   add(scratch, scratch, increment);
1670   str(scratch, counter_addr);
1671   ldr(scratch2, mask);
1672   ands(scratch, scratch, scratch2);
1673   if (where)
1674     b(*where, cond);
1675 }
1676 
1677 void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
1678                                                   int number_of_arguments,
1679                                                   Label *retaddr) {
1680   // interpreter specific
1681   //
1682   // Note: No need to save/restore rbcp & rlocals pointer since these
1683   //       are callee saved registers and no blocking/ GC can happen
1684   //       in leaf calls.
1685 #ifdef ASSERT
1686   {
1687     Label L;
1688     ldr(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
1689     cbz(rscratch1, L);
1690     stop("InterpreterMacroAssembler::call_VM_leaf_base:"
1691          " last_sp != NULL");
1692     bind(L);
1693   }
1694 #endif /* ASSERT */
1695   // super call
1696   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, retaddr);
1697 }
1698 
1699 void InterpreterMacroAssembler::call_VM_base(Register oop_result,
1700                                              Register java_thread,
1701                                              Register last_java_sp,
1702                                              address  entry_point,
1703                                              int      number_of_arguments,
1704                                              bool     check_exceptions) {
1705   // interpreter specific
1706   //
1707   // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
1708   //       really make a difference for these runtime calls, since they are
1709   //       slow anyway. Btw., bcp must be saved/restored since it may change
1710   //       due to GC.
1711   // assert(java_thread == noreg , "not expecting a precomputed java thread");
1712   save_bcp();
1713 #ifdef ASSERT
1714   {
1715     Label L;
1716     ldr(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
1717     cbz(rscratch1, L);
1718     stop("InterpreterMacroAssembler::call_VM_base:"
1719          " last_sp != NULL");
1720     bind(L);
1721   }
1722 #endif /* ASSERT */
1723   // super call
1724   MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
1725                                entry_point, number_of_arguments,
1726                                check_exceptions);
1727 // interpreter specific
1728   restore_bcp();
1729   //restore_locals();
1730 }
1731 
1732 void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
1733   assert_different_registers(obj, rscratch1);
1734   Label update, next, none;
1735 
1736   verify_oop(obj);
1737 
1738   cbnz(obj, update);
1739   orptr(mdo_addr, TypeEntries::null_seen);
1740   b(next);
1741 
1742   bind(update);
1743   load_klass(obj, obj);
1744 
1745   ldr(rscratch1, mdo_addr);
1746   eor(obj, obj, rscratch1);
1747   bics(rscratch1, obj, ~TypeEntries::type_klass_mask);
1748   b(next, Assembler::EQ); // klass seen before, nothing to
1749                            // do. The unknown bit may have been
1750                            // set already but no need to check.
1751 
1752   tst(obj, TypeEntries::type_unknown);
1753   b(next, Assembler::NE); // already unknown. Nothing to do anymore.
1754 
1755   ldr(rscratch1, mdo_addr);
1756   cbz(rscratch1, none);
1757   cmp(rscratch1, TypeEntries::null_seen);
1758   b(none, Assembler::EQ);
1759   // There is a chance that the checks above (re-reading profiling
1760   // data from memory) fail if another thread has just set the
1761   // profiling to this obj's klass
1762   ldr(rscratch1, mdo_addr);
1763   eor(obj, obj, rscratch1);
1764   bics(rscratch1, obj, ~TypeEntries::type_klass_mask);
1765   b(next, Assembler::EQ);
1766 
1767   // different than before. Cannot keep accurate profile.
1768   orptr(mdo_addr, TypeEntries::type_unknown);
1769   b(next);
1770 
1771   bind(none);
1772   // first time here. Set profile type.
1773   str(obj, mdo_addr);
1774 
1775   bind(next);
1776 }
1777 
1778 void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
1779   if (!ProfileInterpreter) {
1780     return;
1781   }
1782 
1783   if (MethodData::profile_arguments() || MethodData::profile_return()) {
1784     Label profile_continue;
1785 
1786     test_method_data_pointer(mdp, profile_continue);
1787 
1788     int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
1789 
1790     ldrb(rscratch1, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
1791     cmp(rscratch1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
1792     b(profile_continue, Assembler::NE);
1793 
1794     if (MethodData::profile_arguments()) {
1795       Label done;
1796       int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
1797       add(mdp, mdp, off_to_args);
1798 
1799       for (int i = 0; i < TypeProfileArgsLimit; i++) {
1800         if (i > 0 || MethodData::profile_return()) {
1801           // If return value type is profiled we may have no argument to profile
1802           ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args));
1803           sub(tmp, tmp, i*TypeStackSlotEntries::per_arg_count());
1804           cmp(tmp, TypeStackSlotEntries::per_arg_count());
1805           b(done, Assembler::LT);
1806         }
1807         ldr(tmp, Address(callee, Method::const_offset()));
1808         load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
1809         // stack offset o (zero based) from the start of the argument
1810         // list, for n arguments translates into offset n - o - 1 from
1811         // the end of the argument list
1812         ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args));
1813         sub(tmp, tmp, rscratch1);
1814         sub(tmp, tmp, 1);
1815         Address arg_addr = argument_address(tmp);
1816         ldr(tmp, arg_addr);
1817 
1818         Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
1819         profile_obj_type(tmp, mdo_arg_addr);
1820 
1821         int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
1822         add(mdp, mdp, to_add);
1823         off_to_args += to_add;
1824       }
1825 
1826       if (MethodData::profile_return()) {
1827         ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args));
1828         sub(tmp, tmp, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
1829       }
1830 
1831       bind(done);
1832 
1833       if (MethodData::profile_return()) {
1834         // We're right after the type profile for the last
1835         // argument. tmp is the number of cells left in the
1836         // CallTypeData/VirtualCallTypeData to reach its end. Non null
1837         // if there's a return to profile.
1838         assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
1839         add(mdp, mdp, tmp, lsl(exact_log2(DataLayout::cell_size)));
1840       }
1841       str(mdp, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize));
1842     } else {
1843       assert(MethodData::profile_return(), "either profile call args or call ret");
1844       update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
1845     }
1846 
1847     // mdp points right after the end of the
1848     // CallTypeData/VirtualCallTypeData, right after the cells for the
1849     // return value type if there's one
1850 
1851     bind(profile_continue);
1852   }
1853 }
1854 
1855 void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
1856   assert_different_registers(mdp, ret, tmp, rbcp);
1857   if (ProfileInterpreter && MethodData::profile_return()) {
1858     Label profile_continue, done;
1859 
1860     test_method_data_pointer(mdp, profile_continue);
1861 
1862     if (MethodData::profile_return_jsr292_only()) {
1863       assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
1864 
1865       // If we don't profile all invoke bytecodes we must make sure
1866       // it's a bytecode we indeed profile. We can't go back to the
1867       // begining of the ProfileData we intend to update to check its
1868       // type because we're right after it and we don't known its
1869       // length
1870       Label do_profile;
1871       ldrb(rscratch1, Address(rbcp, 0));
1872       cmp(rscratch1, Bytecodes::_invokedynamic);
1873       b(do_profile, Assembler::EQ);
1874       cmp(rscratch1, Bytecodes::_invokehandle);
1875       b(do_profile, Assembler::EQ);
1876       get_method(tmp);
1877       ldrh(rscratch1, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
1878       mov(tmp, vmIntrinsics::_compiledLambdaForm);
1879       cmp(rscratch1, tmp);
1880       b(profile_continue, Assembler::NE);
1881 
1882       bind(do_profile);
1883     }
1884 
1885     Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
1886     mov(tmp, ret);
1887     profile_obj_type(tmp, mdo_ret_addr);
1888 
1889     bind(profile_continue);
1890   }
1891 }
1892 
1893 void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
1894   assert_different_registers(rscratch1, rscratch2, mdp, tmp1, tmp2);
1895   if (ProfileInterpreter && MethodData::profile_parameters()) {
1896     Label profile_continue, done;
1897 
1898     test_method_data_pointer(mdp, profile_continue);
1899 
1900     // Load the offset of the area within the MDO used for
1901     // parameters. If it's negative we're not profiling any parameters
1902     ldr(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
1903     cmp(tmp1, 0u);
1904     b(profile_continue, Assembler::LT);
1905 
1906     // Compute a pointer to the area for parameters from the offset
1907     // and move the pointer to the slot for the last
1908     // parameters. Collect profiling from last parameter down.
1909     // mdo start + parameters offset + array length - 1
1910     add(mdp, mdp, tmp1);
1911     ldr(tmp1, Address(mdp, ArrayData::array_len_offset()));
1912     sub(tmp1, tmp1, TypeStackSlotEntries::per_arg_count());
1913 
1914     Label loop;
1915     bind(loop);
1916 
1917     int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
1918     int type_base = in_bytes(ParametersTypeData::type_offset(0));
1919     int per_arg_scale = exact_log2(DataLayout::cell_size);
1920     add(rscratch1, mdp, off_base);
1921     add(rscratch2, mdp, type_base);
1922 
1923     Address arg_off(rscratch1, tmp1, lsl(per_arg_scale));
1924     Address arg_type(rscratch2, tmp1, lsl(per_arg_scale));
1925 
1926     // load offset on the stack from the slot for this parameter
1927     ldr(tmp2, arg_off);
1928     neg(tmp2, tmp2);
1929     // read the parameter from the local area
1930     ldr(tmp2, Address(rlocals, tmp2, lsl(Interpreter::logStackElementSize)));
1931 
1932     // profile the parameter
1933     profile_obj_type(tmp2, arg_type);
1934 
1935     // go to next parameter
1936     subs(tmp1, tmp1, TypeStackSlotEntries::per_arg_count());
1937     b(loop, Assembler::GE);
1938 
1939     bind(profile_continue);
1940   }
1941 }