1 /* 2 * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "gc/shared/barrierSet.inline.hpp" 27 #include "gc/shared/cardTableModRefBS.inline.hpp" 28 #include "gc/shared/collectedHeap.hpp" 29 #include "interp_masm_arm.hpp" 30 #include "interpreter/interpreter.hpp" 31 #include "interpreter/interpreterRuntime.hpp" 32 #include "logging/log.hpp" 33 #include "oops/arrayOop.hpp" 34 #include "oops/markOop.hpp" 35 #include "oops/method.hpp" 36 #include "oops/methodData.hpp" 37 #include "prims/jvmtiExport.hpp" 38 #include "prims/jvmtiThreadState.hpp" 39 #include "runtime/basicLock.hpp" 40 #include "runtime/biasedLocking.hpp" 41 #include "runtime/sharedRuntime.hpp" 42 43 #if INCLUDE_ALL_GCS 44 #include "gc/g1/g1CollectedHeap.inline.hpp" 45 #include "gc/g1/g1SATBCardTableModRefBS.hpp" 46 #include "gc/g1/heapRegion.hpp" 47 #endif // INCLUDE_ALL_GCS 48 49 //-------------------------------------------------------------------- 50 // Implementation of InterpreterMacroAssembler 51 52 53 54 55 InterpreterMacroAssembler::InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) { 56 } 57 58 void InterpreterMacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 59 #if defined(ASSERT) && !defined(AARCH64) 60 // Ensure that last_sp is not filled. 61 { Label L; 62 ldr(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); 63 cbz(Rtemp, L); 64 stop("InterpreterMacroAssembler::call_VM_helper: last_sp != NULL"); 65 bind(L); 66 } 67 #endif // ASSERT && !AARCH64 68 69 // Rbcp must be saved/restored since it may change due to GC. 70 save_bcp(); 71 72 #ifdef AARCH64 73 check_no_cached_stack_top(Rtemp); 74 save_stack_top(); 75 check_extended_sp(Rtemp); 76 cut_sp_before_call(); 77 #endif // AARCH64 78 79 // super call 80 MacroAssembler::call_VM_helper(oop_result, entry_point, number_of_arguments, check_exceptions); 81 82 #ifdef AARCH64 83 // Restore SP to extended SP 84 restore_sp_after_call(Rtemp); 85 check_stack_top(); 86 clear_cached_stack_top(); 87 #endif // AARCH64 88 89 // Restore interpreter specific registers. 90 restore_bcp(); 91 restore_method(); 92 } 93 94 void InterpreterMacroAssembler::jump_to_entry(address entry) { 95 assert(entry, "Entry must have been generated by now"); 96 b(entry); 97 } 98 99 void InterpreterMacroAssembler::check_and_handle_popframe() { 100 if (can_pop_frame()) { 101 Label L; 102 const Register popframe_cond = R2_tmp; 103 104 // Initiate popframe handling only if it is not already being processed. If the flag 105 // has the popframe_processing bit set, it means that this code is called *during* popframe 106 // handling - we don't want to reenter. 107 108 ldr_s32(popframe_cond, Address(Rthread, JavaThread::popframe_condition_offset())); 109 tbz(popframe_cond, exact_log2(JavaThread::popframe_pending_bit), L); 110 tbnz(popframe_cond, exact_log2(JavaThread::popframe_processing_bit), L); 111 112 // Call Interpreter::remove_activation_preserving_args_entry() to get the 113 // address of the same-named entrypoint in the generated interpreter code. 114 call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); 115 116 // Call indirectly to avoid generation ordering problem. 117 jump(R0); 118 119 bind(L); 120 } 121 } 122 123 124 // Blows R2, Rtemp. Sets TOS cached value. 125 void InterpreterMacroAssembler::load_earlyret_value(TosState state) { 126 const Register thread_state = R2_tmp; 127 128 ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset())); 129 130 const Address tos_addr(thread_state, JvmtiThreadState::earlyret_tos_offset()); 131 const Address oop_addr(thread_state, JvmtiThreadState::earlyret_oop_offset()); 132 const Address val_addr(thread_state, JvmtiThreadState::earlyret_value_offset()); 133 #ifndef AARCH64 134 const Address val_addr_hi(thread_state, JvmtiThreadState::earlyret_value_offset() 135 + in_ByteSize(wordSize)); 136 #endif // !AARCH64 137 138 Register zero = zero_register(Rtemp); 139 140 switch (state) { 141 case atos: ldr(R0_tos, oop_addr); 142 str(zero, oop_addr); 143 interp_verify_oop(R0_tos, state, __FILE__, __LINE__); 144 break; 145 146 #ifdef AARCH64 147 case ltos: ldr(R0_tos, val_addr); break; 148 #else 149 case ltos: ldr(R1_tos_hi, val_addr_hi); // fall through 150 #endif // AARCH64 151 case btos: // fall through 152 case ztos: // fall through 153 case ctos: // fall through 154 case stos: // fall through 155 case itos: ldr_s32(R0_tos, val_addr); break; 156 #ifdef __SOFTFP__ 157 case dtos: ldr(R1_tos_hi, val_addr_hi); // fall through 158 case ftos: ldr(R0_tos, val_addr); break; 159 #else 160 case ftos: ldr_float (S0_tos, val_addr); break; 161 case dtos: ldr_double(D0_tos, val_addr); break; 162 #endif // __SOFTFP__ 163 case vtos: /* nothing to do */ break; 164 default : ShouldNotReachHere(); 165 } 166 // Clean up tos value in the thread object 167 str(zero, val_addr); 168 #ifndef AARCH64 169 str(zero, val_addr_hi); 170 #endif // !AARCH64 171 172 mov(Rtemp, (int) ilgl); 173 str_32(Rtemp, tos_addr); 174 } 175 176 177 // Blows R2, Rtemp. 178 void InterpreterMacroAssembler::check_and_handle_earlyret() { 179 if (can_force_early_return()) { 180 Label L; 181 const Register thread_state = R2_tmp; 182 183 ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset())); 184 cbz(thread_state, L); // if (thread->jvmti_thread_state() == NULL) exit; 185 186 // Initiate earlyret handling only if it is not already being processed. 187 // If the flag has the earlyret_processing bit set, it means that this code 188 // is called *during* earlyret handling - we don't want to reenter. 189 190 ldr_s32(Rtemp, Address(thread_state, JvmtiThreadState::earlyret_state_offset())); 191 cmp(Rtemp, JvmtiThreadState::earlyret_pending); 192 b(L, ne); 193 194 // Call Interpreter::remove_activation_early_entry() to get the address of the 195 // same-named entrypoint in the generated interpreter code. 196 197 ldr_s32(R0, Address(thread_state, JvmtiThreadState::earlyret_tos_offset())); 198 call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), R0); 199 200 jump(R0); 201 202 bind(L); 203 } 204 } 205 206 207 // Sets reg. Blows Rtemp. 208 void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { 209 assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); 210 assert(reg != Rtemp, "should be different registers"); 211 212 ldrb(Rtemp, Address(Rbcp, bcp_offset)); 213 ldrb(reg, Address(Rbcp, bcp_offset+1)); 214 orr(reg, reg, AsmOperand(Rtemp, lsl, BitsPerByte)); 215 } 216 217 void InterpreterMacroAssembler::get_index_at_bcp(Register index, int bcp_offset, Register tmp_reg, size_t index_size) { 218 assert_different_registers(index, tmp_reg); 219 if (index_size == sizeof(u2)) { 220 // load bytes of index separately to avoid unaligned access 221 ldrb(index, Address(Rbcp, bcp_offset+1)); 222 ldrb(tmp_reg, Address(Rbcp, bcp_offset)); 223 orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); 224 } else if (index_size == sizeof(u4)) { 225 // TODO-AARCH64: consider using unaligned access here 226 ldrb(index, Address(Rbcp, bcp_offset+3)); 227 ldrb(tmp_reg, Address(Rbcp, bcp_offset+2)); 228 orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); 229 ldrb(tmp_reg, Address(Rbcp, bcp_offset+1)); 230 orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); 231 ldrb(tmp_reg, Address(Rbcp, bcp_offset)); 232 orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); 233 // Check if the secondary index definition is still ~x, otherwise 234 // we have to change the following assembler code to calculate the 235 // plain index. 236 assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); 237 mvn_32(index, index); // convert to plain index 238 } else if (index_size == sizeof(u1)) { 239 ldrb(index, Address(Rbcp, bcp_offset)); 240 } else { 241 ShouldNotReachHere(); 242 } 243 } 244 245 // Sets cache, index. 246 void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size) { 247 assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); 248 assert_different_registers(cache, index); 249 250 get_index_at_bcp(index, bcp_offset, cache, index_size); 251 252 // load constant pool cache pointer 253 ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize)); 254 255 // convert from field index to ConstantPoolCacheEntry index 256 assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); 257 // TODO-AARCH64 merge this shift with shift "add(..., Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord))" after this method is called 258 logical_shift_left(index, index, 2); 259 } 260 261 // Sets cache, index, bytecode. 262 void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size) { 263 get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); 264 // caution index and bytecode can be the same 265 add(bytecode, cache, AsmOperand(index, lsl, LogBytesPerWord)); 266 #ifdef AARCH64 267 add(bytecode, bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); 268 ldarb(bytecode, bytecode); 269 #else 270 ldrb(bytecode, Address(bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()))); 271 TemplateTable::volatile_barrier(MacroAssembler::LoadLoad, noreg, true); 272 #endif // AARCH64 273 } 274 275 // Sets cache. Blows reg_tmp. 276 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register reg_tmp, int bcp_offset, size_t index_size) { 277 assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); 278 assert_different_registers(cache, reg_tmp); 279 280 get_index_at_bcp(reg_tmp, bcp_offset, cache, index_size); 281 282 // load constant pool cache pointer 283 ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize)); 284 285 // skip past the header 286 add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); 287 // convert from field index to ConstantPoolCacheEntry index 288 // and from word offset to byte offset 289 assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); 290 add(cache, cache, AsmOperand(reg_tmp, lsl, 2 + LogBytesPerWord)); 291 } 292 293 // Load object from cpool->resolved_references(index) 294 void InterpreterMacroAssembler::load_resolved_reference_at_index( 295 Register result, Register index) { 296 assert_different_registers(result, index); 297 get_constant_pool(result); 298 299 Register cache = result; 300 // load pointer for resolved_references[] objArray 301 ldr(cache, Address(result, ConstantPool::resolved_references_offset_in_bytes())); 302 // JNIHandles::resolve(result) 303 ldr(cache, Address(cache, 0)); 304 // Add in the index 305 // convert from field index to resolved_references() index and from 306 // word index to byte offset. Since this is a java object, it can be compressed 307 add(cache, cache, AsmOperand(index, lsl, LogBytesPerHeapOop)); 308 load_heap_oop(result, Address(cache, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 309 } 310 311 // Generate a subtype check: branch to not_subtype if sub_klass is 312 // not a subtype of super_klass. 313 // Profiling code for the subtype check failure (profile_typecheck_failed) 314 // should be explicitly generated by the caller in the not_subtype case. 315 // Blows Rtemp, tmp1, tmp2. 316 void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, 317 Register Rsuper_klass, 318 Label ¬_subtype, 319 Register tmp1, 320 Register tmp2) { 321 322 assert_different_registers(Rsub_klass, Rsuper_klass, tmp1, tmp2, Rtemp); 323 Label ok_is_subtype, loop, update_cache; 324 325 const Register super_check_offset = tmp1; 326 const Register cached_super = tmp2; 327 328 // Profile the not-null value's klass. 329 profile_typecheck(tmp1, Rsub_klass); 330 331 // Load the super-klass's check offset into 332 ldr_u32(super_check_offset, Address(Rsuper_klass, Klass::super_check_offset_offset())); 333 334 // Check for self 335 cmp(Rsub_klass, Rsuper_klass); 336 337 // Load from the sub-klass's super-class display list, or a 1-word cache of 338 // the secondary superclass list, or a failing value with a sentinel offset 339 // if the super-klass is an interface or exceptionally deep in the Java 340 // hierarchy and we have to scan the secondary superclass list the hard way. 341 // See if we get an immediate positive hit 342 ldr(cached_super, Address(Rsub_klass, super_check_offset)); 343 344 cond_cmp(Rsuper_klass, cached_super, ne); 345 b(ok_is_subtype, eq); 346 347 // Check for immediate negative hit 348 cmp(super_check_offset, in_bytes(Klass::secondary_super_cache_offset())); 349 b(not_subtype, ne); 350 351 // Now do a linear scan of the secondary super-klass chain. 352 const Register supers_arr = tmp1; 353 const Register supers_cnt = tmp2; 354 const Register cur_super = Rtemp; 355 356 // Load objArrayOop of secondary supers. 357 ldr(supers_arr, Address(Rsub_klass, Klass::secondary_supers_offset())); 358 359 ldr_u32(supers_cnt, Address(supers_arr, Array<Klass*>::length_offset_in_bytes())); // Load the array length 360 #ifdef AARCH64 361 cbz(supers_cnt, not_subtype); 362 add(supers_arr, supers_arr, Array<Klass*>::base_offset_in_bytes()); 363 #else 364 cmp(supers_cnt, 0); 365 366 // Skip to the start of array elements and prefetch the first super-klass. 367 ldr(cur_super, Address(supers_arr, Array<Klass*>::base_offset_in_bytes(), pre_indexed), ne); 368 b(not_subtype, eq); 369 #endif // AARCH64 370 371 bind(loop); 372 373 #ifdef AARCH64 374 ldr(cur_super, Address(supers_arr, wordSize, post_indexed)); 375 #endif // AARCH64 376 377 cmp(cur_super, Rsuper_klass); 378 b(update_cache, eq); 379 380 subs(supers_cnt, supers_cnt, 1); 381 382 #ifndef AARCH64 383 ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne); 384 #endif // !AARCH64 385 386 b(loop, ne); 387 388 b(not_subtype); 389 390 bind(update_cache); 391 // Must be equal but missed in cache. Update cache. 392 str(Rsuper_klass, Address(Rsub_klass, Klass::secondary_super_cache_offset())); 393 394 bind(ok_is_subtype); 395 } 396 397 398 // The 1st part of the store check. 399 // Sets card_table_base register. 400 void InterpreterMacroAssembler::store_check_part1(Register card_table_base) { 401 // Check barrier set type (should be card table) and element size 402 BarrierSet* bs = Universe::heap()->barrier_set(); 403 assert(bs->kind() == BarrierSet::CardTableForRS || 404 bs->kind() == BarrierSet::CardTableExtension, 405 "Wrong barrier set kind"); 406 407 CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs); 408 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "Adjust store check code"); 409 410 // Load card table base address. 411 412 /* Performance note. 413 414 There is an alternative way of loading card table base address 415 from thread descriptor, which may look more efficient: 416 417 ldr(card_table_base, Address(Rthread, JavaThread::card_table_base_offset())); 418 419 However, performance measurements of micro benchmarks and specJVM98 420 showed that loading of card table base from thread descriptor is 421 7-18% slower compared to loading of literal embedded into the code. 422 Possible cause is a cache miss (card table base address resides in a 423 rarely accessed area of thread descriptor). 424 */ 425 // TODO-AARCH64 Investigate if mov_slow is faster than ldr from Rthread on AArch64 426 mov_address(card_table_base, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference); 427 } 428 429 // The 2nd part of the store check. 430 void InterpreterMacroAssembler::store_check_part2(Register obj, Register card_table_base, Register tmp) { 431 assert_different_registers(obj, card_table_base, tmp); 432 433 assert(CardTableModRefBS::dirty_card_val() == 0, "Dirty card value must be 0 due to optimizations."); 434 #ifdef AARCH64 435 add(card_table_base, card_table_base, AsmOperand(obj, lsr, CardTableModRefBS::card_shift)); 436 Address card_table_addr(card_table_base); 437 #else 438 Address card_table_addr(card_table_base, obj, lsr, CardTableModRefBS::card_shift); 439 #endif 440 441 if (UseCondCardMark) { 442 if (UseConcMarkSweepGC) { 443 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), noreg); 444 } 445 Label already_dirty; 446 447 ldrb(tmp, card_table_addr); 448 cbz(tmp, already_dirty); 449 450 set_card(card_table_base, card_table_addr, tmp); 451 bind(already_dirty); 452 453 } else { 454 if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { 455 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg); 456 } 457 set_card(card_table_base, card_table_addr, tmp); 458 } 459 } 460 461 void InterpreterMacroAssembler::set_card(Register card_table_base, Address card_table_addr, Register tmp) { 462 #ifdef AARCH64 463 strb(ZR, card_table_addr); 464 #else 465 CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(Universe::heap()->barrier_set()); 466 if ((((uintptr_t)ct->byte_map_base & 0xff) == 0)) { 467 // Card table is aligned so the lowest byte of the table address base is zero. 468 // This works only if the code is not saved for later use, possibly 469 // in a context where the base would no longer be aligned. 470 strb(card_table_base, card_table_addr); 471 } else { 472 mov(tmp, 0); 473 strb(tmp, card_table_addr); 474 } 475 #endif // AARCH64 476 } 477 478 ////////////////////////////////////////////////////////////////////////////////// 479 480 481 // Java Expression Stack 482 483 void InterpreterMacroAssembler::pop_ptr(Register r) { 484 assert(r != Rstack_top, "unpredictable instruction"); 485 ldr(r, Address(Rstack_top, wordSize, post_indexed)); 486 } 487 488 void InterpreterMacroAssembler::pop_i(Register r) { 489 assert(r != Rstack_top, "unpredictable instruction"); 490 ldr_s32(r, Address(Rstack_top, wordSize, post_indexed)); 491 zap_high_non_significant_bits(r); 492 } 493 494 #ifdef AARCH64 495 void InterpreterMacroAssembler::pop_l(Register r) { 496 assert(r != Rstack_top, "unpredictable instruction"); 497 ldr(r, Address(Rstack_top, 2*wordSize, post_indexed)); 498 } 499 #else 500 void InterpreterMacroAssembler::pop_l(Register lo, Register hi) { 501 assert_different_registers(lo, hi); 502 assert(lo < hi, "lo must be < hi"); 503 pop(RegisterSet(lo) | RegisterSet(hi)); 504 } 505 #endif // AARCH64 506 507 void InterpreterMacroAssembler::pop_f(FloatRegister fd) { 508 #ifdef AARCH64 509 ldr_s(fd, Address(Rstack_top, wordSize, post_indexed)); 510 #else 511 fpops(fd); 512 #endif // AARCH64 513 } 514 515 void InterpreterMacroAssembler::pop_d(FloatRegister fd) { 516 #ifdef AARCH64 517 ldr_d(fd, Address(Rstack_top, 2*wordSize, post_indexed)); 518 #else 519 fpopd(fd); 520 #endif // AARCH64 521 } 522 523 524 // Transition vtos -> state. Blows R0, R1. Sets TOS cached value. 525 void InterpreterMacroAssembler::pop(TosState state) { 526 switch (state) { 527 case atos: pop_ptr(R0_tos); break; 528 case btos: // fall through 529 case ztos: // fall through 530 case ctos: // fall through 531 case stos: // fall through 532 case itos: pop_i(R0_tos); break; 533 #ifdef AARCH64 534 case ltos: pop_l(R0_tos); break; 535 #else 536 case ltos: pop_l(R0_tos_lo, R1_tos_hi); break; 537 #endif // AARCH64 538 #ifdef __SOFTFP__ 539 case ftos: pop_i(R0_tos); break; 540 case dtos: pop_l(R0_tos_lo, R1_tos_hi); break; 541 #else 542 case ftos: pop_f(S0_tos); break; 543 case dtos: pop_d(D0_tos); break; 544 #endif // __SOFTFP__ 545 case vtos: /* nothing to do */ break; 546 default : ShouldNotReachHere(); 547 } 548 interp_verify_oop(R0_tos, state, __FILE__, __LINE__); 549 } 550 551 void InterpreterMacroAssembler::push_ptr(Register r) { 552 assert(r != Rstack_top, "unpredictable instruction"); 553 str(r, Address(Rstack_top, -wordSize, pre_indexed)); 554 check_stack_top_on_expansion(); 555 } 556 557 void InterpreterMacroAssembler::push_i(Register r) { 558 assert(r != Rstack_top, "unpredictable instruction"); 559 str_32(r, Address(Rstack_top, -wordSize, pre_indexed)); 560 check_stack_top_on_expansion(); 561 } 562 563 #ifdef AARCH64 564 void InterpreterMacroAssembler::push_l(Register r) { 565 assert(r != Rstack_top, "unpredictable instruction"); 566 stp(r, ZR, Address(Rstack_top, -2*wordSize, pre_indexed)); 567 check_stack_top_on_expansion(); 568 } 569 #else 570 void InterpreterMacroAssembler::push_l(Register lo, Register hi) { 571 assert_different_registers(lo, hi); 572 assert(lo < hi, "lo must be < hi"); 573 push(RegisterSet(lo) | RegisterSet(hi)); 574 } 575 #endif // AARCH64 576 577 void InterpreterMacroAssembler::push_f() { 578 #ifdef AARCH64 579 str_s(S0_tos, Address(Rstack_top, -wordSize, pre_indexed)); 580 check_stack_top_on_expansion(); 581 #else 582 fpushs(S0_tos); 583 #endif // AARCH64 584 } 585 586 void InterpreterMacroAssembler::push_d() { 587 #ifdef AARCH64 588 str_d(D0_tos, Address(Rstack_top, -2*wordSize, pre_indexed)); 589 check_stack_top_on_expansion(); 590 #else 591 fpushd(D0_tos); 592 #endif // AARCH64 593 } 594 595 // Transition state -> vtos. Blows Rtemp. 596 void InterpreterMacroAssembler::push(TosState state) { 597 interp_verify_oop(R0_tos, state, __FILE__, __LINE__); 598 switch (state) { 599 case atos: push_ptr(R0_tos); break; 600 case btos: // fall through 601 case ztos: // fall through 602 case ctos: // fall through 603 case stos: // fall through 604 case itos: push_i(R0_tos); break; 605 #ifdef AARCH64 606 case ltos: push_l(R0_tos); break; 607 #else 608 case ltos: push_l(R0_tos_lo, R1_tos_hi); break; 609 #endif // AARCH64 610 #ifdef __SOFTFP__ 611 case ftos: push_i(R0_tos); break; 612 case dtos: push_l(R0_tos_lo, R1_tos_hi); break; 613 #else 614 case ftos: push_f(); break; 615 case dtos: push_d(); break; 616 #endif // __SOFTFP__ 617 case vtos: /* nothing to do */ break; 618 default : ShouldNotReachHere(); 619 } 620 } 621 622 623 #ifndef AARCH64 624 625 // Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value. 626 void InterpreterMacroAssembler::convert_retval_to_tos(TosState state) { 627 #if (!defined __SOFTFP__ && !defined __ABI_HARD__) 628 // According to interpreter calling conventions, result is returned in R0/R1, 629 // but templates expect ftos in S0, and dtos in D0. 630 if (state == ftos) { 631 fmsr(S0_tos, R0); 632 } else if (state == dtos) { 633 fmdrr(D0_tos, R0, R1); 634 } 635 #endif // !__SOFTFP__ && !__ABI_HARD__ 636 } 637 638 // Converts TOS cached value to return value in R0/R1 (according to interpreter calling conventions). 639 void InterpreterMacroAssembler::convert_tos_to_retval(TosState state) { 640 #if (!defined __SOFTFP__ && !defined __ABI_HARD__) 641 // According to interpreter calling conventions, result is returned in R0/R1, 642 // so ftos (S0) and dtos (D0) are moved to R0/R1. 643 if (state == ftos) { 644 fmrs(R0, S0_tos); 645 } else if (state == dtos) { 646 fmrrd(R0, R1, D0_tos); 647 } 648 #endif // !__SOFTFP__ && !__ABI_HARD__ 649 } 650 651 #endif // !AARCH64 652 653 654 // Helpers for swap and dup 655 void InterpreterMacroAssembler::load_ptr(int n, Register val) { 656 ldr(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n))); 657 } 658 659 void InterpreterMacroAssembler::store_ptr(int n, Register val) { 660 str(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n))); 661 } 662 663 664 void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { 665 #ifdef AARCH64 666 check_no_cached_stack_top(Rtemp); 667 save_stack_top(); 668 cut_sp_before_call(); 669 mov(Rparams, Rstack_top); 670 #endif // AARCH64 671 672 // set sender sp 673 mov(Rsender_sp, SP); 674 675 #ifndef AARCH64 676 // record last_sp 677 str(Rsender_sp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); 678 #endif // !AARCH64 679 } 680 681 // Jump to from_interpreted entry of a call unless single stepping is possible 682 // in this thread in which case we must call the i2i entry 683 void InterpreterMacroAssembler::jump_from_interpreted(Register method) { 684 assert_different_registers(method, Rtemp); 685 686 prepare_to_jump_from_interpreted(); 687 688 if (can_post_interpreter_events()) { 689 // JVMTI events, such as single-stepping, are implemented partly by avoiding running 690 // compiled code in threads for which the event is enabled. Check here for 691 // interp_only_mode if these events CAN be enabled. 692 693 ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); 694 #ifdef AARCH64 695 { 696 Label not_interp_only_mode; 697 698 cbz(Rtemp, not_interp_only_mode); 699 indirect_jump(Address(method, Method::interpreter_entry_offset()), Rtemp); 700 701 bind(not_interp_only_mode); 702 } 703 #else 704 cmp(Rtemp, 0); 705 ldr(PC, Address(method, Method::interpreter_entry_offset()), ne); 706 #endif // AARCH64 707 } 708 709 indirect_jump(Address(method, Method::from_interpreted_offset()), Rtemp); 710 } 711 712 713 void InterpreterMacroAssembler::restore_dispatch() { 714 mov_slow(RdispatchTable, (address)Interpreter::dispatch_table(vtos)); 715 } 716 717 718 // The following two routines provide a hook so that an implementation 719 // can schedule the dispatch in two parts. 720 void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { 721 // Nothing ARM-specific to be done here. 722 } 723 724 void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { 725 dispatch_next(state, step); 726 } 727 728 void InterpreterMacroAssembler::dispatch_base(TosState state, 729 DispatchTableMode table_mode, 730 bool verifyoop) { 731 if (VerifyActivationFrameSize) { 732 Label L; 733 #ifdef AARCH64 734 mov(Rtemp, SP); 735 sub(Rtemp, FP, Rtemp); 736 #else 737 sub(Rtemp, FP, SP); 738 #endif // AARCH64 739 int min_frame_size = (frame::link_offset - frame::interpreter_frame_initial_sp_offset) * wordSize; 740 cmp(Rtemp, min_frame_size); 741 b(L, ge); 742 stop("broken stack frame"); 743 bind(L); 744 } 745 746 if (verifyoop) { 747 interp_verify_oop(R0_tos, state, __FILE__, __LINE__); 748 } 749 750 if((state == itos) || (state == btos) || (state == ztos) || (state == ctos) || (state == stos)) { 751 zap_high_non_significant_bits(R0_tos); 752 } 753 754 #ifdef ASSERT 755 Label L; 756 mov_slow(Rtemp, (address)Interpreter::dispatch_table(vtos)); 757 cmp(Rtemp, RdispatchTable); 758 b(L, eq); 759 stop("invalid RdispatchTable"); 760 bind(L); 761 #endif 762 763 if (table_mode == DispatchDefault) { 764 if (state == vtos) { 765 indirect_jump(Address::indexed_ptr(RdispatchTable, R3_bytecode), Rtemp); 766 } else { 767 #ifdef AARCH64 768 sub(Rtemp, R3_bytecode, (Interpreter::distance_from_dispatch_table(vtos) - 769 Interpreter::distance_from_dispatch_table(state))); 770 indirect_jump(Address::indexed_ptr(RdispatchTable, Rtemp), Rtemp); 771 #else 772 // on 32-bit ARM this method is faster than the one above. 773 sub(Rtemp, RdispatchTable, (Interpreter::distance_from_dispatch_table(vtos) - 774 Interpreter::distance_from_dispatch_table(state)) * wordSize); 775 indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); 776 #endif 777 } 778 } else { 779 assert(table_mode == DispatchNormal, "invalid dispatch table mode"); 780 address table = (address) Interpreter::normal_table(state); 781 mov_slow(Rtemp, table); 782 indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); 783 } 784 785 nop(); // to avoid filling CPU pipeline with invalid instructions 786 nop(); 787 } 788 789 void InterpreterMacroAssembler::dispatch_only(TosState state) { 790 dispatch_base(state, DispatchDefault); 791 } 792 793 794 void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { 795 dispatch_base(state, DispatchNormal); 796 } 797 798 void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { 799 dispatch_base(state, DispatchNormal, false); 800 } 801 802 void InterpreterMacroAssembler::dispatch_next(TosState state, int step) { 803 // load next bytecode and advance Rbcp 804 ldrb(R3_bytecode, Address(Rbcp, step, pre_indexed)); 805 dispatch_base(state, DispatchDefault); 806 } 807 808 void InterpreterMacroAssembler::narrow(Register result) { 809 // mask integer result to narrower return type. 810 const Register Rtmp = R2; 811 812 // get method type 813 ldr(Rtmp, Address(Rmethod, Method::const_offset())); 814 ldrb(Rtmp, Address(Rtmp, ConstMethod::result_type_offset())); 815 816 Label notBool, notByte, notChar, done; 817 cmp(Rtmp, T_INT); 818 b(done, eq); 819 820 cmp(Rtmp, T_BOOLEAN); 821 b(notBool, ne); 822 and_32(result, result, 1); 823 b(done); 824 825 bind(notBool); 826 cmp(Rtmp, T_BYTE); 827 b(notByte, ne); 828 sign_extend(result, result, 8); 829 b(done); 830 831 bind(notByte); 832 cmp(Rtmp, T_CHAR); 833 b(notChar, ne); 834 zero_extend(result, result, 16); 835 b(done); 836 837 bind(notChar); 838 // cmp(Rtmp, T_SHORT); 839 // b(done, ne); 840 sign_extend(result, result, 16); 841 842 // Nothing to do 843 bind(done); 844 } 845 846 // remove activation 847 // 848 // Unlock the receiver if this is a synchronized method. 849 // Unlock any Java monitors from syncronized blocks. 850 // Remove the activation from the stack. 851 // 852 // If there are locked Java monitors 853 // If throw_monitor_exception 854 // throws IllegalMonitorStateException 855 // Else if install_monitor_exception 856 // installs IllegalMonitorStateException 857 // Else 858 // no error processing 859 void InterpreterMacroAssembler::remove_activation(TosState state, Register ret_addr, 860 bool throw_monitor_exception, 861 bool install_monitor_exception, 862 bool notify_jvmdi) { 863 Label unlock, unlocked, no_unlock; 864 865 // Note: Registers R0, R1, S0 and D0 (TOS cached value) may be in use for the result. 866 867 const Address do_not_unlock_if_synchronized(Rthread, 868 JavaThread::do_not_unlock_if_synchronized_offset()); 869 870 const Register Rflag = R2; 871 const Register Raccess_flags = R3; 872 873 restore_method(); 874 875 ldrb(Rflag, do_not_unlock_if_synchronized); 876 877 // get method access flags 878 ldr_u32(Raccess_flags, Address(Rmethod, Method::access_flags_offset())); 879 880 strb(zero_register(Rtemp), do_not_unlock_if_synchronized); // reset the flag 881 882 // check if method is synchronized 883 884 tbz(Raccess_flags, JVM_ACC_SYNCHRONIZED_BIT, unlocked); 885 886 // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. 887 cbnz(Rflag, no_unlock); 888 889 // unlock monitor 890 push(state); // save result 891 892 // BasicObjectLock will be first in list, since this is a synchronized method. However, need 893 // to check that the object has not been unlocked by an explicit monitorexit bytecode. 894 895 const Register Rmonitor = R1; // fixed in unlock_object() 896 const Register Robj = R2; 897 898 // address of first monitor 899 sub(Rmonitor, FP, - frame::interpreter_frame_monitor_block_bottom_offset * wordSize + (int)sizeof(BasicObjectLock)); 900 901 ldr(Robj, Address(Rmonitor, BasicObjectLock::obj_offset_in_bytes())); 902 cbnz(Robj, unlock); 903 904 pop(state); 905 906 if (throw_monitor_exception) { 907 // Entry already unlocked, need to throw exception 908 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); 909 should_not_reach_here(); 910 } else { 911 // Monitor already unlocked during a stack unroll. 912 // If requested, install an illegal_monitor_state_exception. 913 // Continue with stack unrolling. 914 if (install_monitor_exception) { 915 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); 916 } 917 b(unlocked); 918 } 919 920 921 // Exception case for the check that all monitors are unlocked. 922 const Register Rcur = R2; 923 Label restart_check_monitors_unlocked, exception_monitor_is_still_locked; 924 925 bind(exception_monitor_is_still_locked); 926 // Monitor entry is still locked, need to throw exception. 927 // Rcur: monitor entry. 928 929 if (throw_monitor_exception) { 930 // Throw exception 931 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); 932 should_not_reach_here(); 933 } else { 934 // Stack unrolling. Unlock object and install illegal_monitor_exception 935 // Unlock does not block, so don't have to worry about the frame 936 937 push(state); 938 mov(R1, Rcur); 939 unlock_object(R1); 940 941 if (install_monitor_exception) { 942 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); 943 } 944 945 pop(state); 946 b(restart_check_monitors_unlocked); 947 } 948 949 bind(unlock); 950 unlock_object(Rmonitor); 951 pop(state); 952 953 // Check that for block-structured locking (i.e., that all locked objects has been unlocked) 954 bind(unlocked); 955 956 // Check that all monitors are unlocked 957 { 958 Label loop; 959 960 const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; 961 const Register Rbottom = R3; 962 const Register Rcur_obj = Rtemp; 963 964 bind(restart_check_monitors_unlocked); 965 966 ldr(Rcur, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); 967 // points to current entry, starting with top-most entry 968 sub(Rbottom, FP, -frame::interpreter_frame_monitor_block_bottom_offset * wordSize); 969 // points to word before bottom of monitor block 970 971 cmp(Rcur, Rbottom); // check if there are no monitors 972 #ifndef AARCH64 973 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); 974 // prefetch monitor's object 975 #endif // !AARCH64 976 b(no_unlock, eq); 977 978 bind(loop); 979 #ifdef AARCH64 980 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes())); 981 #endif // AARCH64 982 // check if current entry is used 983 cbnz(Rcur_obj, exception_monitor_is_still_locked); 984 985 add(Rcur, Rcur, entry_size); // otherwise advance to next entry 986 cmp(Rcur, Rbottom); // check if bottom reached 987 #ifndef AARCH64 988 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); 989 // prefetch monitor's object 990 #endif // !AARCH64 991 b(loop, ne); // if not at bottom then check this entry 992 } 993 994 bind(no_unlock); 995 996 // jvmti support 997 if (notify_jvmdi) { 998 notify_method_exit(state, NotifyJVMTI); // preserve TOSCA 999 } else { 1000 notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA 1001 } 1002 1003 // remove activation 1004 #ifdef AARCH64 1005 ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); 1006 ldp(FP, LR, Address(FP)); 1007 mov(SP, Rtemp); 1008 #else 1009 mov(Rtemp, FP); 1010 ldmia(FP, RegisterSet(FP) | RegisterSet(LR)); 1011 ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize)); 1012 #endif 1013 1014 if (ret_addr != LR) { 1015 mov(ret_addr, LR); 1016 } 1017 } 1018 1019 1020 // At certain points in the method invocation the monitor of 1021 // synchronized methods hasn't been entered yet. 1022 // To correctly handle exceptions at these points, we set the thread local 1023 // variable _do_not_unlock_if_synchronized to true. The remove_activation will 1024 // check this flag. 1025 void InterpreterMacroAssembler::set_do_not_unlock_if_synchronized(bool flag, Register tmp) { 1026 const Address do_not_unlock_if_synchronized(Rthread, 1027 JavaThread::do_not_unlock_if_synchronized_offset()); 1028 if (flag) { 1029 mov(tmp, 1); 1030 strb(tmp, do_not_unlock_if_synchronized); 1031 } else { 1032 strb(zero_register(tmp), do_not_unlock_if_synchronized); 1033 } 1034 } 1035 1036 // Lock object 1037 // 1038 // Argument: R1 : Points to BasicObjectLock to be used for locking. 1039 // Must be initialized with object to lock. 1040 // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. 1041 void InterpreterMacroAssembler::lock_object(Register Rlock) { 1042 assert(Rlock == R1, "the second argument"); 1043 1044 if (UseHeavyMonitors) { 1045 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); 1046 } else { 1047 Label done; 1048 1049 const Register Robj = R2; 1050 const Register Rmark = R3; 1051 assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); 1052 1053 const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); 1054 const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); 1055 const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); 1056 1057 Label already_locked, slow_case; 1058 1059 // Load object pointer 1060 ldr(Robj, Address(Rlock, obj_offset)); 1061 1062 if (UseBiasedLocking) { 1063 biased_locking_enter(Robj, Rmark/*scratched*/, R0, false, Rtemp, done, slow_case); 1064 } 1065 1066 #ifdef AARCH64 1067 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1068 ldr(Rmark, Robj); 1069 1070 // Test if object is already locked 1071 assert(markOopDesc::unlocked_value == 1, "adjust this code"); 1072 tbz(Rmark, exact_log2(markOopDesc::unlocked_value), already_locked); 1073 1074 #else // AARCH64 1075 1076 // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. 1077 // That would be acceptable as ether CAS or slow case path is taken in that case. 1078 // Exception to that is if the object is locked by the calling thread, then the recursive test will pass (guaranteed as 1079 // loads are satisfied from a store queue if performed on the same processor). 1080 1081 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1082 ldr(Rmark, Address(Robj, oopDesc::mark_offset_in_bytes())); 1083 1084 // Test if object is already locked 1085 tst(Rmark, markOopDesc::unlocked_value); 1086 b(already_locked, eq); 1087 1088 #endif // !AARCH64 1089 // Save old object->mark() into BasicLock's displaced header 1090 str(Rmark, Address(Rlock, mark_offset)); 1091 1092 cas_for_lock_acquire(Rmark, Rlock, Robj, Rtemp, slow_case); 1093 1094 #ifndef PRODUCT 1095 if (PrintBiasedLockingStatistics) { 1096 cond_atomic_inc32(al, BiasedLocking::fast_path_entry_count_addr()); 1097 } 1098 #endif //!PRODUCT 1099 1100 b(done); 1101 1102 // If we got here that means the object is locked by ether calling thread or another thread. 1103 bind(already_locked); 1104 // Handling of locked objects: recursive locks and slow case. 1105 1106 // Fast check for recursive lock. 1107 // 1108 // Can apply the optimization only if this is a stack lock 1109 // allocated in this thread. For efficiency, we can focus on 1110 // recently allocated stack locks (instead of reading the stack 1111 // base and checking whether 'mark' points inside the current 1112 // thread stack): 1113 // 1) (mark & 3) == 0 1114 // 2) SP <= mark < SP + os::pagesize() 1115 // 1116 // Warning: SP + os::pagesize can overflow the stack base. We must 1117 // neither apply the optimization for an inflated lock allocated 1118 // just above the thread stack (this is why condition 1 matters) 1119 // nor apply the optimization if the stack lock is inside the stack 1120 // of another thread. The latter is avoided even in case of overflow 1121 // because we have guard pages at the end of all stacks. Hence, if 1122 // we go over the stack base and hit the stack of another thread, 1123 // this should not be in a writeable area that could contain a 1124 // stack lock allocated by that thread. As a consequence, a stack 1125 // lock less than page size away from SP is guaranteed to be 1126 // owned by the current thread. 1127 // 1128 // Note: assuming SP is aligned, we can check the low bits of 1129 // (mark-SP) instead of the low bits of mark. In that case, 1130 // assuming page size is a power of 2, we can merge the two 1131 // conditions into a single test: 1132 // => ((mark - SP) & (3 - os::pagesize())) == 0 1133 1134 #ifdef AARCH64 1135 // Use the single check since the immediate is OK for AARCH64 1136 sub(R0, Rmark, Rstack_top); 1137 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); 1138 Assembler::LogicalImmediate imm(mask, false); 1139 ands(R0, R0, imm); 1140 1141 // For recursive case store 0 into lock record. 1142 // It is harmless to store it unconditionally as lock record contains some garbage 1143 // value in its _displaced_header field by this moment. 1144 str(ZR, Address(Rlock, mark_offset)); 1145 1146 #else // AARCH64 1147 // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand. 1148 // Check independently the low bits and the distance to SP. 1149 // -1- test low 2 bits 1150 movs(R0, AsmOperand(Rmark, lsl, 30)); 1151 // -2- test (mark - SP) if the low two bits are 0 1152 sub(R0, Rmark, SP, eq); 1153 movs(R0, AsmOperand(R0, lsr, exact_log2(os::vm_page_size())), eq); 1154 // If still 'eq' then recursive locking OK: store 0 into lock record 1155 str(R0, Address(Rlock, mark_offset), eq); 1156 1157 #endif // AARCH64 1158 1159 #ifndef PRODUCT 1160 if (PrintBiasedLockingStatistics) { 1161 cond_atomic_inc32(eq, BiasedLocking::fast_path_entry_count_addr()); 1162 } 1163 #endif // !PRODUCT 1164 1165 b(done, eq); 1166 1167 bind(slow_case); 1168 1169 // Call the runtime routine for slow case 1170 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); 1171 1172 bind(done); 1173 } 1174 } 1175 1176 1177 // Unlocks an object. Used in monitorexit bytecode and remove_activation. 1178 // 1179 // Argument: R1: Points to BasicObjectLock structure for lock 1180 // Throw an IllegalMonitorException if object is not locked by current thread 1181 // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. 1182 void InterpreterMacroAssembler::unlock_object(Register Rlock) { 1183 assert(Rlock == R1, "the second argument"); 1184 1185 if (UseHeavyMonitors) { 1186 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); 1187 } else { 1188 Label done, slow_case; 1189 1190 const Register Robj = R2; 1191 const Register Rmark = R3; 1192 const Register Rresult = R0; 1193 assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); 1194 1195 const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); 1196 const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); 1197 const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); 1198 1199 const Register Rzero = zero_register(Rtemp); 1200 1201 // Load oop into Robj 1202 ldr(Robj, Address(Rlock, obj_offset)); 1203 1204 // Free entry 1205 str(Rzero, Address(Rlock, obj_offset)); 1206 1207 if (UseBiasedLocking) { 1208 biased_locking_exit(Robj, Rmark, done); 1209 } 1210 1211 // Load the old header from BasicLock structure 1212 ldr(Rmark, Address(Rlock, mark_offset)); 1213 1214 // Test for recursion (zero mark in BasicLock) 1215 cbz(Rmark, done); 1216 1217 bool allow_fallthrough_on_failure = true; 1218 1219 cas_for_lock_release(Rlock, Rmark, Robj, Rtemp, slow_case, allow_fallthrough_on_failure); 1220 1221 b(done, eq); 1222 1223 bind(slow_case); 1224 1225 // Call the runtime routine for slow case. 1226 str(Robj, Address(Rlock, obj_offset)); // restore obj 1227 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); 1228 1229 bind(done); 1230 } 1231 } 1232 1233 1234 // Test ImethodDataPtr. If it is null, continue at the specified label 1235 void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& zero_continue) { 1236 assert(ProfileInterpreter, "must be profiling interpreter"); 1237 ldr(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); 1238 cbz(mdp, zero_continue); 1239 } 1240 1241 1242 // Set the method data pointer for the current bcp. 1243 // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. 1244 void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { 1245 assert(ProfileInterpreter, "must be profiling interpreter"); 1246 Label set_mdp; 1247 1248 // Test MDO to avoid the call if it is NULL. 1249 ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); 1250 cbz(Rtemp, set_mdp); 1251 1252 mov(R0, Rmethod); 1253 mov(R1, Rbcp); 1254 call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), R0, R1); 1255 // R0/W0: mdi 1256 1257 // mdo is guaranteed to be non-zero here, we checked for it before the call. 1258 ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); 1259 add(Rtemp, Rtemp, in_bytes(MethodData::data_offset())); 1260 add_ptr_scaled_int32(Rtemp, Rtemp, R0, 0); 1261 1262 bind(set_mdp); 1263 str(Rtemp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); 1264 } 1265 1266 1267 void InterpreterMacroAssembler::verify_method_data_pointer() { 1268 assert(ProfileInterpreter, "must be profiling interpreter"); 1269 #ifdef ASSERT 1270 Label verify_continue; 1271 save_caller_save_registers(); 1272 1273 const Register Rmdp = R2; 1274 test_method_data_pointer(Rmdp, verify_continue); // If mdp is zero, continue 1275 1276 // If the mdp is valid, it will point to a DataLayout header which is 1277 // consistent with the bcp. The converse is highly probable also. 1278 1279 ldrh(R3, Address(Rmdp, DataLayout::bci_offset())); 1280 ldr(Rtemp, Address(Rmethod, Method::const_offset())); 1281 add(R3, R3, Rtemp); 1282 add(R3, R3, in_bytes(ConstMethod::codes_offset())); 1283 cmp(R3, Rbcp); 1284 b(verify_continue, eq); 1285 1286 mov(R0, Rmethod); 1287 mov(R1, Rbcp); 1288 call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), R0, R1, Rmdp); 1289 1290 bind(verify_continue); 1291 restore_caller_save_registers(); 1292 #endif // ASSERT 1293 } 1294 1295 1296 void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, int offset, Register value) { 1297 assert(ProfileInterpreter, "must be profiling interpreter"); 1298 assert_different_registers(mdp_in, value); 1299 str(value, Address(mdp_in, offset)); 1300 } 1301 1302 1303 // Increments mdp data. Sets bumped_count register to adjusted counter. 1304 void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, 1305 int offset, 1306 Register bumped_count, 1307 bool decrement) { 1308 assert(ProfileInterpreter, "must be profiling interpreter"); 1309 1310 // Counter address 1311 Address data(mdp_in, offset); 1312 assert_different_registers(mdp_in, bumped_count); 1313 1314 increment_mdp_data_at(data, bumped_count, decrement); 1315 } 1316 1317 void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, int flag_byte_constant) { 1318 assert_different_registers(mdp_in, Rtemp); 1319 assert(ProfileInterpreter, "must be profiling interpreter"); 1320 assert((0 < flag_byte_constant) && (flag_byte_constant < (1 << BitsPerByte)), "flag mask is out of range"); 1321 1322 // Set the flag 1323 ldrb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset()))); 1324 orr(Rtemp, Rtemp, (unsigned)flag_byte_constant); 1325 strb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset()))); 1326 } 1327 1328 1329 // Increments mdp data. Sets bumped_count register to adjusted counter. 1330 void InterpreterMacroAssembler::increment_mdp_data_at(Address data, 1331 Register bumped_count, 1332 bool decrement) { 1333 assert(ProfileInterpreter, "must be profiling interpreter"); 1334 1335 ldr(bumped_count, data); 1336 if (decrement) { 1337 // Decrement the register. Set condition codes. 1338 subs(bumped_count, bumped_count, DataLayout::counter_increment); 1339 // Avoid overflow. 1340 #ifdef AARCH64 1341 assert(DataLayout::counter_increment == 1, "required for cinc"); 1342 cinc(bumped_count, bumped_count, pl); 1343 #else 1344 add(bumped_count, bumped_count, DataLayout::counter_increment, pl); 1345 #endif // AARCH64 1346 } else { 1347 // Increment the register. Set condition codes. 1348 adds(bumped_count, bumped_count, DataLayout::counter_increment); 1349 // Avoid overflow. 1350 #ifdef AARCH64 1351 assert(DataLayout::counter_increment == 1, "required for cinv"); 1352 cinv(bumped_count, bumped_count, mi); // inverts 0x80..00 back to 0x7f..ff 1353 #else 1354 sub(bumped_count, bumped_count, DataLayout::counter_increment, mi); 1355 #endif // AARCH64 1356 } 1357 str(bumped_count, data); 1358 } 1359 1360 1361 void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, 1362 int offset, 1363 Register value, 1364 Register test_value_out, 1365 Label& not_equal_continue) { 1366 assert(ProfileInterpreter, "must be profiling interpreter"); 1367 assert_different_registers(mdp_in, test_value_out, value); 1368 1369 ldr(test_value_out, Address(mdp_in, offset)); 1370 cmp(test_value_out, value); 1371 1372 b(not_equal_continue, ne); 1373 } 1374 1375 1376 void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, int offset_of_disp, Register reg_temp) { 1377 assert(ProfileInterpreter, "must be profiling interpreter"); 1378 assert_different_registers(mdp_in, reg_temp); 1379 1380 ldr(reg_temp, Address(mdp_in, offset_of_disp)); 1381 add(mdp_in, mdp_in, reg_temp); 1382 str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); 1383 } 1384 1385 1386 void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, Register reg_offset, Register reg_tmp) { 1387 assert(ProfileInterpreter, "must be profiling interpreter"); 1388 assert_different_registers(mdp_in, reg_offset, reg_tmp); 1389 1390 ldr(reg_tmp, Address(mdp_in, reg_offset)); 1391 add(mdp_in, mdp_in, reg_tmp); 1392 str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); 1393 } 1394 1395 1396 void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int constant) { 1397 assert(ProfileInterpreter, "must be profiling interpreter"); 1398 add(mdp_in, mdp_in, constant); 1399 str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); 1400 } 1401 1402 1403 // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 1404 void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { 1405 assert(ProfileInterpreter, "must be profiling interpreter"); 1406 assert_different_registers(return_bci, R0, R1, R2, R3, Rtemp); 1407 1408 mov(R1, return_bci); 1409 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), R1); 1410 } 1411 1412 1413 // Sets mdp, bumped_count registers, blows Rtemp. 1414 void InterpreterMacroAssembler::profile_taken_branch(Register mdp, Register bumped_count) { 1415 assert_different_registers(mdp, bumped_count); 1416 1417 if (ProfileInterpreter) { 1418 Label profile_continue; 1419 1420 // If no method data exists, go to profile_continue. 1421 // Otherwise, assign to mdp 1422 test_method_data_pointer(mdp, profile_continue); 1423 1424 // We are taking a branch. Increment the taken count. 1425 increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()), bumped_count); 1426 1427 // The method data pointer needs to be updated to reflect the new target. 1428 update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()), Rtemp); 1429 1430 bind (profile_continue); 1431 } 1432 } 1433 1434 1435 // Sets mdp, blows Rtemp. 1436 void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { 1437 assert_different_registers(mdp, Rtemp); 1438 1439 if (ProfileInterpreter) { 1440 Label profile_continue; 1441 1442 // If no method data exists, go to profile_continue. 1443 test_method_data_pointer(mdp, profile_continue); 1444 1445 // We are taking a branch. Increment the not taken count. 1446 increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()), Rtemp); 1447 1448 // The method data pointer needs to be updated to correspond to the next bytecode 1449 update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); 1450 1451 bind (profile_continue); 1452 } 1453 } 1454 1455 1456 // Sets mdp, blows Rtemp. 1457 void InterpreterMacroAssembler::profile_call(Register mdp) { 1458 assert_different_registers(mdp, Rtemp); 1459 1460 if (ProfileInterpreter) { 1461 Label profile_continue; 1462 1463 // If no method data exists, go to profile_continue. 1464 test_method_data_pointer(mdp, profile_continue); 1465 1466 // We are making a call. Increment the count. 1467 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); 1468 1469 // The method data pointer needs to be updated to reflect the new target. 1470 update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); 1471 1472 bind (profile_continue); 1473 } 1474 } 1475 1476 1477 // Sets mdp, blows Rtemp. 1478 void InterpreterMacroAssembler::profile_final_call(Register mdp) { 1479 if (ProfileInterpreter) { 1480 Label profile_continue; 1481 1482 // If no method data exists, go to profile_continue. 1483 test_method_data_pointer(mdp, profile_continue); 1484 1485 // We are making a call. Increment the count. 1486 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); 1487 1488 // The method data pointer needs to be updated to reflect the new target. 1489 update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); 1490 1491 bind (profile_continue); 1492 } 1493 } 1494 1495 1496 // Sets mdp, blows Rtemp. 1497 void InterpreterMacroAssembler::profile_virtual_call(Register mdp, Register receiver, bool receiver_can_be_null) { 1498 assert_different_registers(mdp, receiver, Rtemp); 1499 1500 if (ProfileInterpreter) { 1501 Label profile_continue; 1502 1503 // If no method data exists, go to profile_continue. 1504 test_method_data_pointer(mdp, profile_continue); 1505 1506 Label skip_receiver_profile; 1507 if (receiver_can_be_null) { 1508 Label not_null; 1509 cbnz(receiver, not_null); 1510 // We are making a call. Increment the count for null receiver. 1511 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); 1512 b(skip_receiver_profile); 1513 bind(not_null); 1514 } 1515 1516 // Record the receiver type. 1517 record_klass_in_profile(receiver, mdp, Rtemp, true); 1518 bind(skip_receiver_profile); 1519 1520 // The method data pointer needs to be updated to reflect the new target. 1521 update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); 1522 bind(profile_continue); 1523 } 1524 } 1525 1526 1527 void InterpreterMacroAssembler::record_klass_in_profile_helper( 1528 Register receiver, Register mdp, 1529 Register reg_tmp, 1530 int start_row, Label& done, bool is_virtual_call) { 1531 if (TypeProfileWidth == 0) 1532 return; 1533 1534 assert_different_registers(receiver, mdp, reg_tmp); 1535 1536 int last_row = VirtualCallData::row_limit() - 1; 1537 assert(start_row <= last_row, "must be work left to do"); 1538 // Test this row for both the receiver and for null. 1539 // Take any of three different outcomes: 1540 // 1. found receiver => increment count and goto done 1541 // 2. found null => keep looking for case 1, maybe allocate this cell 1542 // 3. found something else => keep looking for cases 1 and 2 1543 // Case 3 is handled by a recursive call. 1544 for (int row = start_row; row <= last_row; row++) { 1545 Label next_test; 1546 1547 // See if the receiver is receiver[n]. 1548 int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); 1549 1550 test_mdp_data_at(mdp, recvr_offset, receiver, reg_tmp, next_test); 1551 1552 // The receiver is receiver[n]. Increment count[n]. 1553 int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); 1554 increment_mdp_data_at(mdp, count_offset, reg_tmp); 1555 b(done); 1556 1557 bind(next_test); 1558 // reg_tmp now contains the receiver from the CallData. 1559 1560 if (row == start_row) { 1561 Label found_null; 1562 // Failed the equality check on receiver[n]... Test for null. 1563 if (start_row == last_row) { 1564 // The only thing left to do is handle the null case. 1565 if (is_virtual_call) { 1566 cbz(reg_tmp, found_null); 1567 // Receiver did not match any saved receiver and there is no empty row for it. 1568 // Increment total counter to indicate polymorphic case. 1569 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), reg_tmp); 1570 b(done); 1571 bind(found_null); 1572 } else { 1573 cbnz(reg_tmp, done); 1574 } 1575 break; 1576 } 1577 // Since null is rare, make it be the branch-taken case. 1578 cbz(reg_tmp, found_null); 1579 1580 // Put all the "Case 3" tests here. 1581 record_klass_in_profile_helper(receiver, mdp, reg_tmp, start_row + 1, done, is_virtual_call); 1582 1583 // Found a null. Keep searching for a matching receiver, 1584 // but remember that this is an empty (unused) slot. 1585 bind(found_null); 1586 } 1587 } 1588 1589 // In the fall-through case, we found no matching receiver, but we 1590 // observed the receiver[start_row] is NULL. 1591 1592 // Fill in the receiver field and increment the count. 1593 int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); 1594 set_mdp_data_at(mdp, recvr_offset, receiver); 1595 int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); 1596 mov(reg_tmp, DataLayout::counter_increment); 1597 set_mdp_data_at(mdp, count_offset, reg_tmp); 1598 if (start_row > 0) { 1599 b(done); 1600 } 1601 } 1602 1603 void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, 1604 Register mdp, 1605 Register reg_tmp, 1606 bool is_virtual_call) { 1607 assert(ProfileInterpreter, "must be profiling"); 1608 assert_different_registers(receiver, mdp, reg_tmp); 1609 1610 Label done; 1611 1612 record_klass_in_profile_helper(receiver, mdp, reg_tmp, 0, done, is_virtual_call); 1613 1614 bind (done); 1615 } 1616 1617 // Sets mdp, blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 1618 void InterpreterMacroAssembler::profile_ret(Register mdp, Register return_bci) { 1619 assert_different_registers(mdp, return_bci, Rtemp, R0, R1, R2, R3); 1620 1621 if (ProfileInterpreter) { 1622 Label profile_continue; 1623 uint row; 1624 1625 // If no method data exists, go to profile_continue. 1626 test_method_data_pointer(mdp, profile_continue); 1627 1628 // Update the total ret count. 1629 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); 1630 1631 for (row = 0; row < RetData::row_limit(); row++) { 1632 Label next_test; 1633 1634 // See if return_bci is equal to bci[n]: 1635 test_mdp_data_at(mdp, in_bytes(RetData::bci_offset(row)), return_bci, 1636 Rtemp, next_test); 1637 1638 // return_bci is equal to bci[n]. Increment the count. 1639 increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)), Rtemp); 1640 1641 // The method data pointer needs to be updated to reflect the new target. 1642 update_mdp_by_offset(mdp, in_bytes(RetData::bci_displacement_offset(row)), Rtemp); 1643 b(profile_continue); 1644 bind(next_test); 1645 } 1646 1647 update_mdp_for_ret(return_bci); 1648 1649 bind(profile_continue); 1650 } 1651 } 1652 1653 1654 // Sets mdp. 1655 void InterpreterMacroAssembler::profile_null_seen(Register mdp) { 1656 if (ProfileInterpreter) { 1657 Label profile_continue; 1658 1659 // If no method data exists, go to profile_continue. 1660 test_method_data_pointer(mdp, profile_continue); 1661 1662 set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); 1663 1664 // The method data pointer needs to be updated. 1665 int mdp_delta = in_bytes(BitData::bit_data_size()); 1666 if (TypeProfileCasts) { 1667 mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); 1668 } 1669 update_mdp_by_constant(mdp, mdp_delta); 1670 1671 bind (profile_continue); 1672 } 1673 } 1674 1675 1676 // Sets mdp, blows Rtemp. 1677 void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { 1678 assert_different_registers(mdp, Rtemp); 1679 1680 if (ProfileInterpreter && TypeProfileCasts) { 1681 Label profile_continue; 1682 1683 // If no method data exists, go to profile_continue. 1684 test_method_data_pointer(mdp, profile_continue); 1685 1686 int count_offset = in_bytes(CounterData::count_offset()); 1687 // Back up the address, since we have already bumped the mdp. 1688 count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); 1689 1690 // *Decrement* the counter. We expect to see zero or small negatives. 1691 increment_mdp_data_at(mdp, count_offset, Rtemp, true); 1692 1693 bind (profile_continue); 1694 } 1695 } 1696 1697 1698 // Sets mdp, blows Rtemp. 1699 void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass) 1700 { 1701 assert_different_registers(mdp, klass, Rtemp); 1702 1703 if (ProfileInterpreter) { 1704 Label profile_continue; 1705 1706 // If no method data exists, go to profile_continue. 1707 test_method_data_pointer(mdp, profile_continue); 1708 1709 // The method data pointer needs to be updated. 1710 int mdp_delta = in_bytes(BitData::bit_data_size()); 1711 if (TypeProfileCasts) { 1712 mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); 1713 1714 // Record the object type. 1715 record_klass_in_profile(klass, mdp, Rtemp, false); 1716 } 1717 update_mdp_by_constant(mdp, mdp_delta); 1718 1719 bind(profile_continue); 1720 } 1721 } 1722 1723 1724 // Sets mdp, blows Rtemp. 1725 void InterpreterMacroAssembler::profile_switch_default(Register mdp) { 1726 assert_different_registers(mdp, Rtemp); 1727 1728 if (ProfileInterpreter) { 1729 Label profile_continue; 1730 1731 // If no method data exists, go to profile_continue. 1732 test_method_data_pointer(mdp, profile_continue); 1733 1734 // Update the default case count 1735 increment_mdp_data_at(mdp, in_bytes(MultiBranchData::default_count_offset()), Rtemp); 1736 1737 // The method data pointer needs to be updated. 1738 update_mdp_by_offset(mdp, in_bytes(MultiBranchData::default_displacement_offset()), Rtemp); 1739 1740 bind(profile_continue); 1741 } 1742 } 1743 1744 1745 // Sets mdp. Blows reg_tmp1, reg_tmp2. Index could be the same as reg_tmp2. 1746 void InterpreterMacroAssembler::profile_switch_case(Register mdp, Register index, Register reg_tmp1, Register reg_tmp2) { 1747 assert_different_registers(mdp, reg_tmp1, reg_tmp2); 1748 assert_different_registers(mdp, reg_tmp1, index); 1749 1750 if (ProfileInterpreter) { 1751 Label profile_continue; 1752 1753 const int count_offset = in_bytes(MultiBranchData::case_array_offset()) + 1754 in_bytes(MultiBranchData::relative_count_offset()); 1755 1756 const int displacement_offset = in_bytes(MultiBranchData::case_array_offset()) + 1757 in_bytes(MultiBranchData::relative_displacement_offset()); 1758 1759 // If no method data exists, go to profile_continue. 1760 test_method_data_pointer(mdp, profile_continue); 1761 1762 // Build the base (index * per_case_size_in_bytes()) 1763 logical_shift_left(reg_tmp1, index, exact_log2(in_bytes(MultiBranchData::per_case_size()))); 1764 1765 // Update the case count 1766 add(reg_tmp1, reg_tmp1, count_offset); 1767 increment_mdp_data_at(Address(mdp, reg_tmp1), reg_tmp2); 1768 1769 // The method data pointer needs to be updated. 1770 add(reg_tmp1, reg_tmp1, displacement_offset - count_offset); 1771 update_mdp_by_offset(mdp, reg_tmp1, reg_tmp2); 1772 1773 bind (profile_continue); 1774 } 1775 } 1776 1777 1778 void InterpreterMacroAssembler::byteswap_u32(Register r, Register rtmp1, Register rtmp2) { 1779 #ifdef AARCH64 1780 rev_w(r, r); 1781 #else 1782 if (VM_Version::supports_rev()) { 1783 rev(r, r); 1784 } else { 1785 eor(rtmp1, r, AsmOperand(r, ror, 16)); 1786 mvn(rtmp2, 0x0000ff00); 1787 andr(rtmp1, rtmp2, AsmOperand(rtmp1, lsr, 8)); 1788 eor(r, rtmp1, AsmOperand(r, ror, 8)); 1789 } 1790 #endif // AARCH64 1791 } 1792 1793 1794 void InterpreterMacroAssembler::inc_global_counter(address address_of_counter, int offset, Register tmp1, Register tmp2, bool avoid_overflow) { 1795 const intx addr = (intx) (address_of_counter + offset); 1796 1797 assert ((addr & 0x3) == 0, "address of counter should be aligned"); 1798 const intx offset_mask = right_n_bits(AARCH64_ONLY(12 + 2) NOT_AARCH64(12)); 1799 1800 const address base = (address) (addr & ~offset_mask); 1801 const int offs = (int) (addr & offset_mask); 1802 1803 const Register addr_base = tmp1; 1804 const Register val = tmp2; 1805 1806 mov_slow(addr_base, base); 1807 ldr_s32(val, Address(addr_base, offs)); 1808 1809 if (avoid_overflow) { 1810 adds_32(val, val, 1); 1811 #ifdef AARCH64 1812 Label L; 1813 b(L, mi); 1814 str_32(val, Address(addr_base, offs)); 1815 bind(L); 1816 #else 1817 str(val, Address(addr_base, offs), pl); 1818 #endif // AARCH64 1819 } else { 1820 add_32(val, val, 1); 1821 str_32(val, Address(addr_base, offs)); 1822 } 1823 } 1824 1825 void InterpreterMacroAssembler::interp_verify_oop(Register reg, TosState state, const char *file, int line) { 1826 if (state == atos) { MacroAssembler::_verify_oop(reg, "broken oop", file, line); } 1827 } 1828 1829 // Inline assembly for: 1830 // 1831 // if (thread is in interp_only_mode) { 1832 // InterpreterRuntime::post_method_entry(); 1833 // } 1834 // if (DTraceMethodProbes) { 1835 // SharedRuntime::dtrace_method_entry(method, receiver); 1836 // } 1837 // if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { 1838 // SharedRuntime::rc_trace_method_entry(method, receiver); 1839 // } 1840 1841 void InterpreterMacroAssembler::notify_method_entry() { 1842 // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to 1843 // track stack depth. If it is possible to enter interp_only_mode we add 1844 // the code to check if the event should be sent. 1845 if (can_post_interpreter_events()) { 1846 Label L; 1847 1848 ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); 1849 cbz(Rtemp, L); 1850 1851 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry)); 1852 1853 bind(L); 1854 } 1855 1856 // Note: Disable DTrace runtime check for now to eliminate overhead on each method entry 1857 if (DTraceMethodProbes) { 1858 Label Lcontinue; 1859 1860 ldrb_global(Rtemp, (address)&DTraceMethodProbes); 1861 cbz(Rtemp, Lcontinue); 1862 1863 mov(R0, Rthread); 1864 mov(R1, Rmethod); 1865 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), R0, R1); 1866 1867 bind(Lcontinue); 1868 } 1869 // RedefineClasses() tracing support for obsolete method entry 1870 if (log_is_enabled(Trace, redefine, class, obsolete)) { 1871 mov(R0, Rthread); 1872 mov(R1, Rmethod); 1873 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 1874 R0, R1); 1875 } 1876 } 1877 1878 1879 void InterpreterMacroAssembler::notify_method_exit( 1880 TosState state, NotifyMethodExitMode mode, 1881 bool native, Register result_lo, Register result_hi, FloatRegister result_fp) { 1882 // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to 1883 // track stack depth. If it is possible to enter interp_only_mode we add 1884 // the code to check if the event should be sent. 1885 if (mode == NotifyJVMTI && can_post_interpreter_events()) { 1886 Label L; 1887 // Note: frame::interpreter_frame_result has a dependency on how the 1888 // method result is saved across the call to post_method_exit. If this 1889 // is changed then the interpreter_frame_result implementation will 1890 // need to be updated too. 1891 1892 ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); 1893 cbz(Rtemp, L); 1894 1895 if (native) { 1896 // For c++ and template interpreter push both result registers on the 1897 // stack in native, we don't know the state. 1898 // On AArch64 result registers are stored into the frame at known locations. 1899 // See frame::interpreter_frame_result for code that gets the result values from here. 1900 assert(result_lo != noreg, "result registers should be defined"); 1901 1902 #ifdef AARCH64 1903 assert(result_hi == noreg, "result_hi is not used on AArch64"); 1904 assert(result_fp != fnoreg, "FP result register must be defined"); 1905 1906 str_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); 1907 str(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); 1908 #else 1909 assert(result_hi != noreg, "result registers should be defined"); 1910 1911 #ifdef __ABI_HARD__ 1912 assert(result_fp != fnoreg, "FP result register must be defined"); 1913 sub(SP, SP, 2 * wordSize); 1914 fstd(result_fp, Address(SP)); 1915 #endif // __ABI_HARD__ 1916 1917 push(RegisterSet(result_lo) | RegisterSet(result_hi)); 1918 #endif // AARCH64 1919 1920 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); 1921 1922 #ifdef AARCH64 1923 ldr_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); 1924 ldr(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); 1925 #else 1926 pop(RegisterSet(result_lo) | RegisterSet(result_hi)); 1927 #ifdef __ABI_HARD__ 1928 fldd(result_fp, Address(SP)); 1929 add(SP, SP, 2 * wordSize); 1930 #endif // __ABI_HARD__ 1931 #endif // AARCH64 1932 1933 } else { 1934 // For the template interpreter, the value on tos is the size of the 1935 // state. (c++ interpreter calls jvmti somewhere else). 1936 push(state); 1937 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); 1938 pop(state); 1939 } 1940 1941 bind(L); 1942 } 1943 1944 // Note: Disable DTrace runtime check for now to eliminate overhead on each method exit 1945 if (DTraceMethodProbes) { 1946 Label Lcontinue; 1947 1948 ldrb_global(Rtemp, (address)&DTraceMethodProbes); 1949 cbz(Rtemp, Lcontinue); 1950 1951 push(state); 1952 1953 mov(R0, Rthread); 1954 mov(R1, Rmethod); 1955 1956 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), R0, R1); 1957 1958 pop(state); 1959 1960 bind(Lcontinue); 1961 } 1962 } 1963 1964 1965 #ifndef PRODUCT 1966 1967 void InterpreterMacroAssembler::trace_state(const char* msg) { 1968 int push_size = save_caller_save_registers(); 1969 1970 Label Lcontinue; 1971 InlinedString Lmsg0("%s: FP=" INTPTR_FORMAT ", SP=" INTPTR_FORMAT "\n"); 1972 InlinedString Lmsg(msg); 1973 InlinedAddress Lprintf((address)printf); 1974 1975 ldr_literal(R0, Lmsg0); 1976 ldr_literal(R1, Lmsg); 1977 mov(R2, FP); 1978 add(R3, SP, push_size); // original SP (without saved registers) 1979 ldr_literal(Rtemp, Lprintf); 1980 call(Rtemp); 1981 1982 b(Lcontinue); 1983 1984 bind_literal(Lmsg0); 1985 bind_literal(Lmsg); 1986 bind_literal(Lprintf); 1987 1988 1989 bind(Lcontinue); 1990 1991 restore_caller_save_registers(); 1992 } 1993 1994 #endif 1995 1996 // Jump if ((*counter_addr += increment) & mask) satisfies the condition. 1997 void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, 1998 int increment, Address mask_addr, 1999 Register scratch, Register scratch2, 2000 AsmCondition cond, Label* where) { 2001 // caution: scratch2 and base address of counter_addr can be the same 2002 assert_different_registers(scratch, scratch2); 2003 ldr_u32(scratch, counter_addr); 2004 add(scratch, scratch, increment); 2005 str_32(scratch, counter_addr); 2006 2007 #ifdef AARCH64 2008 ldr_u32(scratch2, mask_addr); 2009 ands_w(ZR, scratch, scratch2); 2010 #else 2011 ldr(scratch2, mask_addr); 2012 andrs(scratch, scratch, scratch2); 2013 #endif // AARCH64 2014 b(*where, cond); 2015 } 2016 2017 void InterpreterMacroAssembler::get_method_counters(Register method, 2018 Register Rcounters, 2019 Label& skip) { 2020 const Address method_counters(method, Method::method_counters_offset()); 2021 Label has_counters; 2022 2023 ldr(Rcounters, method_counters); 2024 cbnz(Rcounters, has_counters); 2025 2026 #ifdef AARCH64 2027 const Register tmp = Rcounters; 2028 const int saved_regs_size = 20*wordSize; 2029 2030 // Note: call_VM will cut SP according to Rstack_top value before call, and restore SP to 2031 // extended_sp value from frame after the call. 2032 // So make sure there is enough stack space to save registers and adjust Rstack_top accordingly. 2033 { 2034 Label enough_stack_space; 2035 check_extended_sp(tmp); 2036 sub(Rstack_top, Rstack_top, saved_regs_size); 2037 cmp(SP, Rstack_top); 2038 b(enough_stack_space, ls); 2039 2040 align_reg(tmp, Rstack_top, StackAlignmentInBytes); 2041 mov(SP, tmp); 2042 str(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); 2043 2044 bind(enough_stack_space); 2045 check_stack_top(); 2046 2047 int offset = 0; 2048 stp(R0, R1, Address(Rstack_top, offset)); offset += 2*wordSize; 2049 stp(R2, R3, Address(Rstack_top, offset)); offset += 2*wordSize; 2050 stp(R4, R5, Address(Rstack_top, offset)); offset += 2*wordSize; 2051 stp(R6, R7, Address(Rstack_top, offset)); offset += 2*wordSize; 2052 stp(R8, R9, Address(Rstack_top, offset)); offset += 2*wordSize; 2053 stp(R10, R11, Address(Rstack_top, offset)); offset += 2*wordSize; 2054 stp(R12, R13, Address(Rstack_top, offset)); offset += 2*wordSize; 2055 stp(R14, R15, Address(Rstack_top, offset)); offset += 2*wordSize; 2056 stp(R16, R17, Address(Rstack_top, offset)); offset += 2*wordSize; 2057 stp(R18, LR, Address(Rstack_top, offset)); offset += 2*wordSize; 2058 assert (offset == saved_regs_size, "should be"); 2059 } 2060 #else 2061 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(R14)); 2062 #endif // AARCH64 2063 2064 mov(R1, method); 2065 call_VM(noreg, CAST_FROM_FN_PTR(address, 2066 InterpreterRuntime::build_method_counters), R1); 2067 2068 #ifdef AARCH64 2069 { 2070 int offset = 0; 2071 ldp(R0, R1, Address(Rstack_top, offset)); offset += 2*wordSize; 2072 ldp(R2, R3, Address(Rstack_top, offset)); offset += 2*wordSize; 2073 ldp(R4, R5, Address(Rstack_top, offset)); offset += 2*wordSize; 2074 ldp(R6, R7, Address(Rstack_top, offset)); offset += 2*wordSize; 2075 ldp(R8, R9, Address(Rstack_top, offset)); offset += 2*wordSize; 2076 ldp(R10, R11, Address(Rstack_top, offset)); offset += 2*wordSize; 2077 ldp(R12, R13, Address(Rstack_top, offset)); offset += 2*wordSize; 2078 ldp(R14, R15, Address(Rstack_top, offset)); offset += 2*wordSize; 2079 ldp(R16, R17, Address(Rstack_top, offset)); offset += 2*wordSize; 2080 ldp(R18, LR, Address(Rstack_top, offset)); offset += 2*wordSize; 2081 assert (offset == saved_regs_size, "should be"); 2082 2083 add(Rstack_top, Rstack_top, saved_regs_size); 2084 } 2085 #else 2086 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(R14)); 2087 #endif // AARCH64 2088 2089 ldr(Rcounters, method_counters); 2090 cbz(Rcounters, skip); // No MethodCounters created, OutOfMemory 2091 2092 bind(has_counters); 2093 }