1 /* 2 * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "gc/shared/barrierSet.inline.hpp" 27 #include "gc/shared/cardTableModRefBS.inline.hpp" 28 #include "gc/shared/collectedHeap.hpp" 29 #include "interp_masm_arm.hpp" 30 #include "interpreter/interpreter.hpp" 31 #include "interpreter/interpreterRuntime.hpp" 32 #include "logging/log.hpp" 33 #include "oops/arrayOop.hpp" 34 #include "oops/markOop.hpp" 35 #include "oops/method.hpp" 36 #include "oops/methodData.hpp" 37 #include "prims/jvmtiExport.hpp" 38 #include "prims/jvmtiThreadState.hpp" 39 #include "runtime/basicLock.hpp" 40 #include "runtime/biasedLocking.hpp" 41 #include "runtime/sharedRuntime.hpp" 42 43 #if INCLUDE_ALL_GCS 44 #include "gc/g1/g1CollectedHeap.inline.hpp" 45 #include "gc/g1/g1SATBCardTableModRefBS.hpp" 46 #include "gc/g1/heapRegion.hpp" 47 #endif // INCLUDE_ALL_GCS 48 49 //-------------------------------------------------------------------- 50 // Implementation of InterpreterMacroAssembler 51 52 53 54 55 InterpreterMacroAssembler::InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) { 56 } 57 58 void InterpreterMacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 59 #if defined(ASSERT) && !defined(AARCH64) 60 // Ensure that last_sp is not filled. 61 { Label L; 62 ldr(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); 63 cbz(Rtemp, L); 64 stop("InterpreterMacroAssembler::call_VM_helper: last_sp != NULL"); 65 bind(L); 66 } 67 #endif // ASSERT && !AARCH64 68 69 // Rbcp must be saved/restored since it may change due to GC. 70 save_bcp(); 71 72 #ifdef AARCH64 73 check_no_cached_stack_top(Rtemp); 74 save_stack_top(); 75 check_extended_sp(Rtemp); 76 cut_sp_before_call(); 77 #endif // AARCH64 78 79 // super call 80 MacroAssembler::call_VM_helper(oop_result, entry_point, number_of_arguments, check_exceptions); 81 82 #ifdef AARCH64 83 // Restore SP to extended SP 84 restore_sp_after_call(Rtemp); 85 check_stack_top(); 86 clear_cached_stack_top(); 87 #endif // AARCH64 88 89 // Restore interpreter specific registers. 90 restore_bcp(); 91 restore_method(); 92 } 93 94 void InterpreterMacroAssembler::jump_to_entry(address entry) { 95 assert(entry, "Entry must have been generated by now"); 96 b(entry); 97 } 98 99 void InterpreterMacroAssembler::check_and_handle_popframe() { 100 if (can_pop_frame()) { 101 Label L; 102 const Register popframe_cond = R2_tmp; 103 104 // Initiate popframe handling only if it is not already being processed. If the flag 105 // has the popframe_processing bit set, it means that this code is called *during* popframe 106 // handling - we don't want to reenter. 107 108 ldr_s32(popframe_cond, Address(Rthread, JavaThread::popframe_condition_offset())); 109 tbz(popframe_cond, exact_log2(JavaThread::popframe_pending_bit), L); 110 tbnz(popframe_cond, exact_log2(JavaThread::popframe_processing_bit), L); 111 112 // Call Interpreter::remove_activation_preserving_args_entry() to get the 113 // address of the same-named entrypoint in the generated interpreter code. 114 call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); 115 116 // Call indirectly to avoid generation ordering problem. 117 jump(R0); 118 119 bind(L); 120 } 121 } 122 123 124 // Blows R2, Rtemp. Sets TOS cached value. 125 void InterpreterMacroAssembler::load_earlyret_value(TosState state) { 126 const Register thread_state = R2_tmp; 127 128 ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset())); 129 130 const Address tos_addr(thread_state, JvmtiThreadState::earlyret_tos_offset()); 131 const Address oop_addr(thread_state, JvmtiThreadState::earlyret_oop_offset()); 132 const Address val_addr(thread_state, JvmtiThreadState::earlyret_value_offset()); 133 #ifndef AARCH64 134 const Address val_addr_hi(thread_state, JvmtiThreadState::earlyret_value_offset() 135 + in_ByteSize(wordSize)); 136 #endif // !AARCH64 137 138 Register zero = zero_register(Rtemp); 139 140 switch (state) { 141 case atos: ldr(R0_tos, oop_addr); 142 str(zero, oop_addr); 143 interp_verify_oop(R0_tos, state, __FILE__, __LINE__); 144 break; 145 146 #ifdef AARCH64 147 case ltos: ldr(R0_tos, val_addr); break; 148 #else 149 case ltos: ldr(R1_tos_hi, val_addr_hi); // fall through 150 #endif // AARCH64 151 case btos: // fall through 152 case ztos: // fall through 153 case ctos: // fall through 154 case stos: // fall through 155 case itos: ldr_s32(R0_tos, val_addr); break; 156 #ifdef __SOFTFP__ 157 case dtos: ldr(R1_tos_hi, val_addr_hi); // fall through 158 case ftos: ldr(R0_tos, val_addr); break; 159 #else 160 case ftos: ldr_float (S0_tos, val_addr); break; 161 case dtos: ldr_double(D0_tos, val_addr); break; 162 #endif // __SOFTFP__ 163 case vtos: /* nothing to do */ break; 164 default : ShouldNotReachHere(); 165 } 166 // Clean up tos value in the thread object 167 str(zero, val_addr); 168 #ifndef AARCH64 169 str(zero, val_addr_hi); 170 #endif // !AARCH64 171 172 mov(Rtemp, (int) ilgl); 173 str_32(Rtemp, tos_addr); 174 } 175 176 177 // Blows R2, Rtemp. 178 void InterpreterMacroAssembler::check_and_handle_earlyret() { 179 if (can_force_early_return()) { 180 Label L; 181 const Register thread_state = R2_tmp; 182 183 ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset())); 184 cbz(thread_state, L); // if (thread->jvmti_thread_state() == NULL) exit; 185 186 // Initiate earlyret handling only if it is not already being processed. 187 // If the flag has the earlyret_processing bit set, it means that this code 188 // is called *during* earlyret handling - we don't want to reenter. 189 190 ldr_s32(Rtemp, Address(thread_state, JvmtiThreadState::earlyret_state_offset())); 191 cmp(Rtemp, JvmtiThreadState::earlyret_pending); 192 b(L, ne); 193 194 // Call Interpreter::remove_activation_early_entry() to get the address of the 195 // same-named entrypoint in the generated interpreter code. 196 197 ldr_s32(R0, Address(thread_state, JvmtiThreadState::earlyret_tos_offset())); 198 call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), R0); 199 200 jump(R0); 201 202 bind(L); 203 } 204 } 205 206 207 // Sets reg. Blows Rtemp. 208 void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { 209 assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); 210 assert(reg != Rtemp, "should be different registers"); 211 212 ldrb(Rtemp, Address(Rbcp, bcp_offset)); 213 ldrb(reg, Address(Rbcp, bcp_offset+1)); 214 orr(reg, reg, AsmOperand(Rtemp, lsl, BitsPerByte)); 215 } 216 217 void InterpreterMacroAssembler::get_index_at_bcp(Register index, int bcp_offset, Register tmp_reg, size_t index_size) { 218 assert_different_registers(index, tmp_reg); 219 if (index_size == sizeof(u2)) { 220 // load bytes of index separately to avoid unaligned access 221 ldrb(index, Address(Rbcp, bcp_offset+1)); 222 ldrb(tmp_reg, Address(Rbcp, bcp_offset)); 223 orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); 224 } else if (index_size == sizeof(u4)) { 225 // TODO-AARCH64: consider using unaligned access here 226 ldrb(index, Address(Rbcp, bcp_offset+3)); 227 ldrb(tmp_reg, Address(Rbcp, bcp_offset+2)); 228 orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); 229 ldrb(tmp_reg, Address(Rbcp, bcp_offset+1)); 230 orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); 231 ldrb(tmp_reg, Address(Rbcp, bcp_offset)); 232 orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); 233 // Check if the secondary index definition is still ~x, otherwise 234 // we have to change the following assembler code to calculate the 235 // plain index. 236 assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); 237 mvn_32(index, index); // convert to plain index 238 } else if (index_size == sizeof(u1)) { 239 ldrb(index, Address(Rbcp, bcp_offset)); 240 } else { 241 ShouldNotReachHere(); 242 } 243 } 244 245 // Sets cache, index. 246 void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size) { 247 assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); 248 assert_different_registers(cache, index); 249 250 get_index_at_bcp(index, bcp_offset, cache, index_size); 251 252 // load constant pool cache pointer 253 ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize)); 254 255 // convert from field index to ConstantPoolCacheEntry index 256 assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); 257 // TODO-AARCH64 merge this shift with shift "add(..., Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord))" after this method is called 258 logical_shift_left(index, index, 2); 259 } 260 261 // Sets cache, index, bytecode. 262 void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size) { 263 get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); 264 // caution index and bytecode can be the same 265 add(bytecode, cache, AsmOperand(index, lsl, LogBytesPerWord)); 266 #ifdef AARCH64 267 add(bytecode, bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); 268 ldarb(bytecode, bytecode); 269 #else 270 ldrb(bytecode, Address(bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()))); 271 TemplateTable::volatile_barrier(MacroAssembler::LoadLoad, noreg, true); 272 #endif // AARCH64 273 } 274 275 // Sets cache. Blows reg_tmp. 276 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register reg_tmp, int bcp_offset, size_t index_size) { 277 assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); 278 assert_different_registers(cache, reg_tmp); 279 280 get_index_at_bcp(reg_tmp, bcp_offset, cache, index_size); 281 282 // load constant pool cache pointer 283 ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize)); 284 285 // skip past the header 286 add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); 287 // convert from field index to ConstantPoolCacheEntry index 288 // and from word offset to byte offset 289 assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); 290 add(cache, cache, AsmOperand(reg_tmp, lsl, 2 + LogBytesPerWord)); 291 } 292 293 // Load object from cpool->resolved_references(index) 294 void InterpreterMacroAssembler::load_resolved_reference_at_index( 295 Register result, Register index) { 296 assert_different_registers(result, index); 297 get_constant_pool(result); 298 299 Register cache = result; 300 // load pointer for resolved_references[] objArray 301 ldr(cache, Address(result, ConstantPool::resolved_references_offset_in_bytes())); 302 // JNIHandles::resolve(result) 303 ldr(cache, Address(cache, 0)); 304 // Add in the index 305 // convert from field index to resolved_references() index and from 306 // word index to byte offset. Since this is a java object, it can be compressed 307 add(cache, cache, AsmOperand(index, lsl, LogBytesPerHeapOop)); 308 load_heap_oop(result, Address(cache, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 309 } 310 311 // Generate a subtype check: branch to not_subtype if sub_klass is 312 // not a subtype of super_klass. 313 // Profiling code for the subtype check failure (profile_typecheck_failed) 314 // should be explicitly generated by the caller in the not_subtype case. 315 // Blows Rtemp, tmp1, tmp2. 316 void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, 317 Register Rsuper_klass, 318 Label ¬_subtype, 319 Register tmp1, 320 Register tmp2) { 321 322 assert_different_registers(Rsub_klass, Rsuper_klass, tmp1, tmp2, Rtemp); 323 Label ok_is_subtype, loop, update_cache; 324 325 const Register super_check_offset = tmp1; 326 const Register cached_super = tmp2; 327 328 // Profile the not-null value's klass. 329 profile_typecheck(tmp1, Rsub_klass); 330 331 // Load the super-klass's check offset into 332 ldr_u32(super_check_offset, Address(Rsuper_klass, Klass::super_check_offset_offset())); 333 334 // Check for self 335 cmp(Rsub_klass, Rsuper_klass); 336 337 // Load from the sub-klass's super-class display list, or a 1-word cache of 338 // the secondary superclass list, or a failing value with a sentinel offset 339 // if the super-klass is an interface or exceptionally deep in the Java 340 // hierarchy and we have to scan the secondary superclass list the hard way. 341 // See if we get an immediate positive hit 342 ldr(cached_super, Address(Rsub_klass, super_check_offset)); 343 344 cond_cmp(Rsuper_klass, cached_super, ne); 345 b(ok_is_subtype, eq); 346 347 // Check for immediate negative hit 348 cmp(super_check_offset, in_bytes(Klass::secondary_super_cache_offset())); 349 b(not_subtype, ne); 350 351 // Now do a linear scan of the secondary super-klass chain. 352 const Register supers_arr = tmp1; 353 const Register supers_cnt = tmp2; 354 const Register cur_super = Rtemp; 355 356 // Load objArrayOop of secondary supers. 357 ldr(supers_arr, Address(Rsub_klass, Klass::secondary_supers_offset())); 358 359 ldr_u32(supers_cnt, Address(supers_arr, Array<Klass*>::length_offset_in_bytes())); // Load the array length 360 #ifdef AARCH64 361 cbz(supers_cnt, not_subtype); 362 add(supers_arr, supers_arr, Array<Klass*>::base_offset_in_bytes()); 363 #else 364 cmp(supers_cnt, 0); 365 366 // Skip to the start of array elements and prefetch the first super-klass. 367 ldr(cur_super, Address(supers_arr, Array<Klass*>::base_offset_in_bytes(), pre_indexed), ne); 368 b(not_subtype, eq); 369 #endif // AARCH64 370 371 bind(loop); 372 373 #ifdef AARCH64 374 ldr(cur_super, Address(supers_arr, wordSize, post_indexed)); 375 #endif // AARCH64 376 377 cmp(cur_super, Rsuper_klass); 378 b(update_cache, eq); 379 380 subs(supers_cnt, supers_cnt, 1); 381 382 #ifndef AARCH64 383 ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne); 384 #endif // !AARCH64 385 386 b(loop, ne); 387 388 b(not_subtype); 389 390 bind(update_cache); 391 // Must be equal but missed in cache. Update cache. 392 str(Rsuper_klass, Address(Rsub_klass, Klass::secondary_super_cache_offset())); 393 394 bind(ok_is_subtype); 395 } 396 397 398 // The 1st part of the store check. 399 // Sets card_table_base register. 400 void InterpreterMacroAssembler::store_check_part1(Register card_table_base) { 401 // Check barrier set type (should be card table) and element size 402 BarrierSet* bs = Universe::heap()->barrier_set(); 403 assert(bs->kind() == BarrierSet::CardTableForRS || 404 bs->kind() == BarrierSet::CardTableExtension, 405 "Wrong barrier set kind"); 406 407 CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs); 408 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "Adjust store check code"); 409 410 // Load card table base address. 411 412 /* Performance note. 413 414 There is an alternative way of loading card table base address 415 from thread descriptor, which may look more efficient: 416 417 ldr(card_table_base, Address(Rthread, JavaThread::card_table_base_offset())); 418 419 However, performance measurements of micro benchmarks and specJVM98 420 showed that loading of card table base from thread descriptor is 421 7-18% slower compared to loading of literal embedded into the code. 422 Possible cause is a cache miss (card table base address resides in a 423 rarely accessed area of thread descriptor). 424 */ 425 // TODO-AARCH64 Investigate if mov_slow is faster than ldr from Rthread on AArch64 426 mov_address(card_table_base, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference); 427 } 428 429 // The 2nd part of the store check. 430 void InterpreterMacroAssembler::store_check_part2(Register obj, Register card_table_base, Register tmp) { 431 assert_different_registers(obj, card_table_base, tmp); 432 433 assert(CardTableModRefBS::dirty_card_val() == 0, "Dirty card value must be 0 due to optimizations."); 434 #ifdef AARCH64 435 add(card_table_base, card_table_base, AsmOperand(obj, lsr, CardTableModRefBS::card_shift)); 436 Address card_table_addr(card_table_base); 437 #else 438 Address card_table_addr(card_table_base, obj, lsr, CardTableModRefBS::card_shift); 439 #endif 440 441 if (UseCondCardMark) { 442 if (UseConcMarkSweepGC) { 443 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), noreg); 444 } 445 Label already_dirty; 446 447 ldrb(tmp, card_table_addr); 448 cbz(tmp, already_dirty); 449 450 set_card(card_table_base, card_table_addr, tmp); 451 bind(already_dirty); 452 453 } else { 454 if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { 455 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg); 456 } 457 set_card(card_table_base, card_table_addr, tmp); 458 } 459 } 460 461 void InterpreterMacroAssembler::set_card(Register card_table_base, Address card_table_addr, Register tmp) { 462 #ifdef AARCH64 463 strb(ZR, card_table_addr); 464 #else 465 CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(Universe::heap()->barrier_set()); 466 if ((((uintptr_t)ct->byte_map_base & 0xff) == 0)) { 467 // Card table is aligned so the lowest byte of the table address base is zero. 468 // This works only if the code is not saved for later use, possibly 469 // in a context where the base would no longer be aligned. 470 strb(card_table_base, card_table_addr); 471 } else { 472 mov(tmp, 0); 473 strb(tmp, card_table_addr); 474 } 475 #endif // AARCH64 476 } 477 478 ////////////////////////////////////////////////////////////////////////////////// 479 #if INCLUDE_ALL_GCS 480 481 // G1 pre-barrier. 482 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 483 // If store_addr != noreg, then previous value is loaded from [store_addr]; 484 // in such case store_addr and new_val registers are preserved; 485 // otherwise pre_val register is preserved. 486 void InterpreterMacroAssembler::g1_write_barrier_pre(Register store_addr, 487 Register new_val, 488 Register pre_val, 489 Register tmp1, 490 Register tmp2) { 491 Label done; 492 Label runtime; 493 494 if (store_addr != noreg) { 495 assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); 496 } else { 497 assert (new_val == noreg, "should be"); 498 assert_different_registers(pre_val, tmp1, tmp2, noreg); 499 } 500 501 Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 502 SATBMarkQueue::byte_offset_of_active())); 503 Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 504 SATBMarkQueue::byte_offset_of_index())); 505 Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 506 SATBMarkQueue::byte_offset_of_buf())); 507 508 // Is marking active? 509 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); 510 ldrb(tmp1, in_progress); 511 cbz(tmp1, done); 512 513 // Do we need to load the previous value? 514 if (store_addr != noreg) { 515 load_heap_oop(pre_val, Address(store_addr, 0)); 516 } 517 518 // Is the previous value null? 519 cbz(pre_val, done); 520 521 // Can we store original value in the thread's buffer? 522 // Is index == 0? 523 // (The index field is typed as size_t.) 524 525 ldr(tmp1, index); // tmp1 := *index_adr 526 ldr(tmp2, buffer); 527 528 subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize 529 b(runtime, lt); // If negative, goto runtime 530 531 str(tmp1, index); // *index_adr := tmp1 532 533 // Record the previous value 534 str(pre_val, Address(tmp2, tmp1)); 535 b(done); 536 537 bind(runtime); 538 539 // save the live input values 540 #ifdef AARCH64 541 if (store_addr != noreg) { 542 raw_push(store_addr, new_val); 543 } else { 544 raw_push(pre_val, ZR); 545 } 546 #else 547 if (store_addr != noreg) { 548 // avoid raw_push to support any ordering of store_addr and new_val 549 push(RegisterSet(store_addr) | RegisterSet(new_val)); 550 } else { 551 push(pre_val); 552 } 553 #endif // AARCH64 554 555 if (pre_val != R0) { 556 mov(R0, pre_val); 557 } 558 mov(R1, Rthread); 559 560 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); 561 562 #ifdef AARCH64 563 if (store_addr != noreg) { 564 raw_pop(store_addr, new_val); 565 } else { 566 raw_pop(pre_val, ZR); 567 } 568 #else 569 if (store_addr != noreg) { 570 pop(RegisterSet(store_addr) | RegisterSet(new_val)); 571 } else { 572 pop(pre_val); 573 } 574 #endif // AARCH64 575 576 bind(done); 577 } 578 579 // G1 post-barrier. 580 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 581 void InterpreterMacroAssembler::g1_write_barrier_post(Register store_addr, 582 Register new_val, 583 Register tmp1, 584 Register tmp2, 585 Register tmp3) { 586 587 Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + 588 DirtyCardQueue::byte_offset_of_index())); 589 Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + 590 DirtyCardQueue::byte_offset_of_buf())); 591 592 BarrierSet* bs = Universe::heap()->barrier_set(); 593 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 594 Label done; 595 Label runtime; 596 597 // Does store cross heap regions? 598 599 eor(tmp1, store_addr, new_val); 600 #ifdef AARCH64 601 logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); 602 cbz(tmp1, done); 603 #else 604 movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); 605 b(done, eq); 606 #endif 607 608 // crosses regions, storing NULL? 609 610 cbz(new_val, done); 611 612 // storing region crossing non-NULL, is card already dirty? 613 const Register card_addr = tmp1; 614 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 615 616 mov_address(tmp2, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference); 617 add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTableModRefBS::card_shift)); 618 619 ldrb(tmp2, Address(card_addr)); 620 cmp(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val()); 621 b(done, eq); 622 623 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); 624 625 assert(CardTableModRefBS::dirty_card_val() == 0, "adjust this code"); 626 ldrb(tmp2, Address(card_addr)); 627 cbz(tmp2, done); 628 629 // storing a region crossing, non-NULL oop, card is clean. 630 // dirty card and log. 631 632 strb(zero_register(tmp2), Address(card_addr)); 633 634 ldr(tmp2, queue_index); 635 ldr(tmp3, buffer); 636 637 subs(tmp2, tmp2, wordSize); 638 b(runtime, lt); // go to runtime if now negative 639 640 str(tmp2, queue_index); 641 642 str(card_addr, Address(tmp3, tmp2)); 643 b(done); 644 645 bind(runtime); 646 647 if (card_addr != R0) { 648 mov(R0, card_addr); 649 } 650 mov(R1, Rthread); 651 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); 652 653 bind(done); 654 } 655 656 #endif // INCLUDE_ALL_GCS 657 ////////////////////////////////////////////////////////////////////////////////// 658 659 660 // Java Expression Stack 661 662 void InterpreterMacroAssembler::pop_ptr(Register r) { 663 assert(r != Rstack_top, "unpredictable instruction"); 664 ldr(r, Address(Rstack_top, wordSize, post_indexed)); 665 } 666 667 void InterpreterMacroAssembler::pop_i(Register r) { 668 assert(r != Rstack_top, "unpredictable instruction"); 669 ldr_s32(r, Address(Rstack_top, wordSize, post_indexed)); 670 zap_high_non_significant_bits(r); 671 } 672 673 #ifdef AARCH64 674 void InterpreterMacroAssembler::pop_l(Register r) { 675 assert(r != Rstack_top, "unpredictable instruction"); 676 ldr(r, Address(Rstack_top, 2*wordSize, post_indexed)); 677 } 678 #else 679 void InterpreterMacroAssembler::pop_l(Register lo, Register hi) { 680 assert_different_registers(lo, hi); 681 assert(lo < hi, "lo must be < hi"); 682 pop(RegisterSet(lo) | RegisterSet(hi)); 683 } 684 #endif // AARCH64 685 686 void InterpreterMacroAssembler::pop_f(FloatRegister fd) { 687 #ifdef AARCH64 688 ldr_s(fd, Address(Rstack_top, wordSize, post_indexed)); 689 #else 690 fpops(fd); 691 #endif // AARCH64 692 } 693 694 void InterpreterMacroAssembler::pop_d(FloatRegister fd) { 695 #ifdef AARCH64 696 ldr_d(fd, Address(Rstack_top, 2*wordSize, post_indexed)); 697 #else 698 fpopd(fd); 699 #endif // AARCH64 700 } 701 702 703 // Transition vtos -> state. Blows R0, R1. Sets TOS cached value. 704 void InterpreterMacroAssembler::pop(TosState state) { 705 switch (state) { 706 case atos: pop_ptr(R0_tos); break; 707 case btos: // fall through 708 case ztos: // fall through 709 case ctos: // fall through 710 case stos: // fall through 711 case itos: pop_i(R0_tos); break; 712 #ifdef AARCH64 713 case ltos: pop_l(R0_tos); break; 714 #else 715 case ltos: pop_l(R0_tos_lo, R1_tos_hi); break; 716 #endif // AARCH64 717 #ifdef __SOFTFP__ 718 case ftos: pop_i(R0_tos); break; 719 case dtos: pop_l(R0_tos_lo, R1_tos_hi); break; 720 #else 721 case ftos: pop_f(S0_tos); break; 722 case dtos: pop_d(D0_tos); break; 723 #endif // __SOFTFP__ 724 case vtos: /* nothing to do */ break; 725 default : ShouldNotReachHere(); 726 } 727 interp_verify_oop(R0_tos, state, __FILE__, __LINE__); 728 } 729 730 void InterpreterMacroAssembler::push_ptr(Register r) { 731 assert(r != Rstack_top, "unpredictable instruction"); 732 str(r, Address(Rstack_top, -wordSize, pre_indexed)); 733 check_stack_top_on_expansion(); 734 } 735 736 void InterpreterMacroAssembler::push_i(Register r) { 737 assert(r != Rstack_top, "unpredictable instruction"); 738 str_32(r, Address(Rstack_top, -wordSize, pre_indexed)); 739 check_stack_top_on_expansion(); 740 } 741 742 #ifdef AARCH64 743 void InterpreterMacroAssembler::push_l(Register r) { 744 assert(r != Rstack_top, "unpredictable instruction"); 745 stp(r, ZR, Address(Rstack_top, -2*wordSize, pre_indexed)); 746 check_stack_top_on_expansion(); 747 } 748 #else 749 void InterpreterMacroAssembler::push_l(Register lo, Register hi) { 750 assert_different_registers(lo, hi); 751 assert(lo < hi, "lo must be < hi"); 752 push(RegisterSet(lo) | RegisterSet(hi)); 753 } 754 #endif // AARCH64 755 756 void InterpreterMacroAssembler::push_f() { 757 #ifdef AARCH64 758 str_s(S0_tos, Address(Rstack_top, -wordSize, pre_indexed)); 759 check_stack_top_on_expansion(); 760 #else 761 fpushs(S0_tos); 762 #endif // AARCH64 763 } 764 765 void InterpreterMacroAssembler::push_d() { 766 #ifdef AARCH64 767 str_d(D0_tos, Address(Rstack_top, -2*wordSize, pre_indexed)); 768 check_stack_top_on_expansion(); 769 #else 770 fpushd(D0_tos); 771 #endif // AARCH64 772 } 773 774 // Transition state -> vtos. Blows Rtemp. 775 void InterpreterMacroAssembler::push(TosState state) { 776 interp_verify_oop(R0_tos, state, __FILE__, __LINE__); 777 switch (state) { 778 case atos: push_ptr(R0_tos); break; 779 case btos: // fall through 780 case ztos: // fall through 781 case ctos: // fall through 782 case stos: // fall through 783 case itos: push_i(R0_tos); break; 784 #ifdef AARCH64 785 case ltos: push_l(R0_tos); break; 786 #else 787 case ltos: push_l(R0_tos_lo, R1_tos_hi); break; 788 #endif // AARCH64 789 #ifdef __SOFTFP__ 790 case ftos: push_i(R0_tos); break; 791 case dtos: push_l(R0_tos_lo, R1_tos_hi); break; 792 #else 793 case ftos: push_f(); break; 794 case dtos: push_d(); break; 795 #endif // __SOFTFP__ 796 case vtos: /* nothing to do */ break; 797 default : ShouldNotReachHere(); 798 } 799 } 800 801 802 #ifndef AARCH64 803 804 // Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value. 805 void InterpreterMacroAssembler::convert_retval_to_tos(TosState state) { 806 #if (!defined __SOFTFP__ && !defined __ABI_HARD__) 807 // According to interpreter calling conventions, result is returned in R0/R1, 808 // but templates expect ftos in S0, and dtos in D0. 809 if (state == ftos) { 810 fmsr(S0_tos, R0); 811 } else if (state == dtos) { 812 fmdrr(D0_tos, R0, R1); 813 } 814 #endif // !__SOFTFP__ && !__ABI_HARD__ 815 } 816 817 // Converts TOS cached value to return value in R0/R1 (according to interpreter calling conventions). 818 void InterpreterMacroAssembler::convert_tos_to_retval(TosState state) { 819 #if (!defined __SOFTFP__ && !defined __ABI_HARD__) 820 // According to interpreter calling conventions, result is returned in R0/R1, 821 // so ftos (S0) and dtos (D0) are moved to R0/R1. 822 if (state == ftos) { 823 fmrs(R0, S0_tos); 824 } else if (state == dtos) { 825 fmrrd(R0, R1, D0_tos); 826 } 827 #endif // !__SOFTFP__ && !__ABI_HARD__ 828 } 829 830 #endif // !AARCH64 831 832 833 // Helpers for swap and dup 834 void InterpreterMacroAssembler::load_ptr(int n, Register val) { 835 ldr(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n))); 836 } 837 838 void InterpreterMacroAssembler::store_ptr(int n, Register val) { 839 str(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n))); 840 } 841 842 843 void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { 844 #ifdef AARCH64 845 check_no_cached_stack_top(Rtemp); 846 save_stack_top(); 847 cut_sp_before_call(); 848 mov(Rparams, Rstack_top); 849 #endif // AARCH64 850 851 // set sender sp 852 mov(Rsender_sp, SP); 853 854 #ifndef AARCH64 855 // record last_sp 856 str(Rsender_sp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); 857 #endif // !AARCH64 858 } 859 860 // Jump to from_interpreted entry of a call unless single stepping is possible 861 // in this thread in which case we must call the i2i entry 862 void InterpreterMacroAssembler::jump_from_interpreted(Register method) { 863 assert_different_registers(method, Rtemp); 864 865 prepare_to_jump_from_interpreted(); 866 867 if (can_post_interpreter_events()) { 868 // JVMTI events, such as single-stepping, are implemented partly by avoiding running 869 // compiled code in threads for which the event is enabled. Check here for 870 // interp_only_mode if these events CAN be enabled. 871 872 ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); 873 #ifdef AARCH64 874 { 875 Label not_interp_only_mode; 876 877 cbz(Rtemp, not_interp_only_mode); 878 indirect_jump(Address(method, Method::interpreter_entry_offset()), Rtemp); 879 880 bind(not_interp_only_mode); 881 } 882 #else 883 cmp(Rtemp, 0); 884 ldr(PC, Address(method, Method::interpreter_entry_offset()), ne); 885 #endif // AARCH64 886 } 887 888 indirect_jump(Address(method, Method::from_interpreted_offset()), Rtemp); 889 } 890 891 892 void InterpreterMacroAssembler::restore_dispatch() { 893 mov_slow(RdispatchTable, (address)Interpreter::dispatch_table(vtos)); 894 } 895 896 897 // The following two routines provide a hook so that an implementation 898 // can schedule the dispatch in two parts. 899 void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { 900 // Nothing ARM-specific to be done here. 901 } 902 903 void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { 904 dispatch_next(state, step); 905 } 906 907 void InterpreterMacroAssembler::dispatch_base(TosState state, 908 DispatchTableMode table_mode, 909 bool verifyoop) { 910 if (VerifyActivationFrameSize) { 911 Label L; 912 #ifdef AARCH64 913 mov(Rtemp, SP); 914 sub(Rtemp, FP, Rtemp); 915 #else 916 sub(Rtemp, FP, SP); 917 #endif // AARCH64 918 int min_frame_size = (frame::link_offset - frame::interpreter_frame_initial_sp_offset) * wordSize; 919 cmp(Rtemp, min_frame_size); 920 b(L, ge); 921 stop("broken stack frame"); 922 bind(L); 923 } 924 925 if (verifyoop) { 926 interp_verify_oop(R0_tos, state, __FILE__, __LINE__); 927 } 928 929 if((state == itos) || (state == btos) || (state == ztos) || (state == ctos) || (state == stos)) { 930 zap_high_non_significant_bits(R0_tos); 931 } 932 933 #ifdef ASSERT 934 Label L; 935 mov_slow(Rtemp, (address)Interpreter::dispatch_table(vtos)); 936 cmp(Rtemp, RdispatchTable); 937 b(L, eq); 938 stop("invalid RdispatchTable"); 939 bind(L); 940 #endif 941 942 if (table_mode == DispatchDefault) { 943 if (state == vtos) { 944 indirect_jump(Address::indexed_ptr(RdispatchTable, R3_bytecode), Rtemp); 945 } else { 946 #ifdef AARCH64 947 sub(Rtemp, R3_bytecode, (Interpreter::distance_from_dispatch_table(vtos) - 948 Interpreter::distance_from_dispatch_table(state))); 949 indirect_jump(Address::indexed_ptr(RdispatchTable, Rtemp), Rtemp); 950 #else 951 // on 32-bit ARM this method is faster than the one above. 952 sub(Rtemp, RdispatchTable, (Interpreter::distance_from_dispatch_table(vtos) - 953 Interpreter::distance_from_dispatch_table(state)) * wordSize); 954 indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); 955 #endif 956 } 957 } else { 958 assert(table_mode == DispatchNormal, "invalid dispatch table mode"); 959 address table = (address) Interpreter::normal_table(state); 960 mov_slow(Rtemp, table); 961 indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); 962 } 963 964 nop(); // to avoid filling CPU pipeline with invalid instructions 965 nop(); 966 } 967 968 void InterpreterMacroAssembler::dispatch_only(TosState state) { 969 dispatch_base(state, DispatchDefault); 970 } 971 972 973 void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { 974 dispatch_base(state, DispatchNormal); 975 } 976 977 void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { 978 dispatch_base(state, DispatchNormal, false); 979 } 980 981 void InterpreterMacroAssembler::dispatch_next(TosState state, int step) { 982 // load next bytecode and advance Rbcp 983 ldrb(R3_bytecode, Address(Rbcp, step, pre_indexed)); 984 dispatch_base(state, DispatchDefault); 985 } 986 987 void InterpreterMacroAssembler::narrow(Register result) { 988 // mask integer result to narrower return type. 989 const Register Rtmp = R2; 990 991 // get method type 992 ldr(Rtmp, Address(Rmethod, Method::const_offset())); 993 ldrb(Rtmp, Address(Rtmp, ConstMethod::result_type_offset())); 994 995 Label notBool, notByte, notChar, done; 996 cmp(Rtmp, T_INT); 997 b(done, eq); 998 999 cmp(Rtmp, T_BOOLEAN); 1000 b(notBool, ne); 1001 and_32(result, result, 1); 1002 b(done); 1003 1004 bind(notBool); 1005 cmp(Rtmp, T_BYTE); 1006 b(notByte, ne); 1007 sign_extend(result, result, 8); 1008 b(done); 1009 1010 bind(notByte); 1011 cmp(Rtmp, T_CHAR); 1012 b(notChar, ne); 1013 zero_extend(result, result, 16); 1014 b(done); 1015 1016 bind(notChar); 1017 // cmp(Rtmp, T_SHORT); 1018 // b(done, ne); 1019 sign_extend(result, result, 16); 1020 1021 // Nothing to do 1022 bind(done); 1023 } 1024 1025 // remove activation 1026 // 1027 // Unlock the receiver if this is a synchronized method. 1028 // Unlock any Java monitors from syncronized blocks. 1029 // Remove the activation from the stack. 1030 // 1031 // If there are locked Java monitors 1032 // If throw_monitor_exception 1033 // throws IllegalMonitorStateException 1034 // Else if install_monitor_exception 1035 // installs IllegalMonitorStateException 1036 // Else 1037 // no error processing 1038 void InterpreterMacroAssembler::remove_activation(TosState state, Register ret_addr, 1039 bool throw_monitor_exception, 1040 bool install_monitor_exception, 1041 bool notify_jvmdi) { 1042 Label unlock, unlocked, no_unlock; 1043 1044 // Note: Registers R0, R1, S0 and D0 (TOS cached value) may be in use for the result. 1045 1046 const Address do_not_unlock_if_synchronized(Rthread, 1047 JavaThread::do_not_unlock_if_synchronized_offset()); 1048 1049 const Register Rflag = R2; 1050 const Register Raccess_flags = R3; 1051 1052 restore_method(); 1053 1054 ldrb(Rflag, do_not_unlock_if_synchronized); 1055 1056 // get method access flags 1057 ldr_u32(Raccess_flags, Address(Rmethod, Method::access_flags_offset())); 1058 1059 strb(zero_register(Rtemp), do_not_unlock_if_synchronized); // reset the flag 1060 1061 // check if method is synchronized 1062 1063 tbz(Raccess_flags, JVM_ACC_SYNCHRONIZED_BIT, unlocked); 1064 1065 // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. 1066 cbnz(Rflag, no_unlock); 1067 1068 // unlock monitor 1069 push(state); // save result 1070 1071 // BasicObjectLock will be first in list, since this is a synchronized method. However, need 1072 // to check that the object has not been unlocked by an explicit monitorexit bytecode. 1073 1074 const Register Rmonitor = R1; // fixed in unlock_object() 1075 const Register Robj = R2; 1076 1077 // address of first monitor 1078 sub(Rmonitor, FP, - frame::interpreter_frame_monitor_block_bottom_offset * wordSize + (int)sizeof(BasicObjectLock)); 1079 1080 ldr(Robj, Address(Rmonitor, BasicObjectLock::obj_offset_in_bytes())); 1081 cbnz(Robj, unlock); 1082 1083 pop(state); 1084 1085 if (throw_monitor_exception) { 1086 // Entry already unlocked, need to throw exception 1087 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); 1088 should_not_reach_here(); 1089 } else { 1090 // Monitor already unlocked during a stack unroll. 1091 // If requested, install an illegal_monitor_state_exception. 1092 // Continue with stack unrolling. 1093 if (install_monitor_exception) { 1094 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); 1095 } 1096 b(unlocked); 1097 } 1098 1099 1100 // Exception case for the check that all monitors are unlocked. 1101 const Register Rcur = R2; 1102 Label restart_check_monitors_unlocked, exception_monitor_is_still_locked; 1103 1104 bind(exception_monitor_is_still_locked); 1105 // Monitor entry is still locked, need to throw exception. 1106 // Rcur: monitor entry. 1107 1108 if (throw_monitor_exception) { 1109 // Throw exception 1110 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); 1111 should_not_reach_here(); 1112 } else { 1113 // Stack unrolling. Unlock object and install illegal_monitor_exception 1114 // Unlock does not block, so don't have to worry about the frame 1115 1116 push(state); 1117 mov(R1, Rcur); 1118 unlock_object(R1); 1119 1120 if (install_monitor_exception) { 1121 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); 1122 } 1123 1124 pop(state); 1125 b(restart_check_monitors_unlocked); 1126 } 1127 1128 bind(unlock); 1129 unlock_object(Rmonitor); 1130 pop(state); 1131 1132 // Check that for block-structured locking (i.e., that all locked objects has been unlocked) 1133 bind(unlocked); 1134 1135 // Check that all monitors are unlocked 1136 { 1137 Label loop; 1138 1139 const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; 1140 const Register Rbottom = R3; 1141 const Register Rcur_obj = Rtemp; 1142 1143 bind(restart_check_monitors_unlocked); 1144 1145 ldr(Rcur, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); 1146 // points to current entry, starting with top-most entry 1147 sub(Rbottom, FP, -frame::interpreter_frame_monitor_block_bottom_offset * wordSize); 1148 // points to word before bottom of monitor block 1149 1150 cmp(Rcur, Rbottom); // check if there are no monitors 1151 #ifndef AARCH64 1152 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); 1153 // prefetch monitor's object 1154 #endif // !AARCH64 1155 b(no_unlock, eq); 1156 1157 bind(loop); 1158 #ifdef AARCH64 1159 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes())); 1160 #endif // AARCH64 1161 // check if current entry is used 1162 cbnz(Rcur_obj, exception_monitor_is_still_locked); 1163 1164 add(Rcur, Rcur, entry_size); // otherwise advance to next entry 1165 cmp(Rcur, Rbottom); // check if bottom reached 1166 #ifndef AARCH64 1167 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); 1168 // prefetch monitor's object 1169 #endif // !AARCH64 1170 b(loop, ne); // if not at bottom then check this entry 1171 } 1172 1173 bind(no_unlock); 1174 1175 // jvmti support 1176 if (notify_jvmdi) { 1177 notify_method_exit(state, NotifyJVMTI); // preserve TOSCA 1178 } else { 1179 notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA 1180 } 1181 1182 // remove activation 1183 #ifdef AARCH64 1184 ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); 1185 ldp(FP, LR, Address(FP)); 1186 mov(SP, Rtemp); 1187 #else 1188 mov(Rtemp, FP); 1189 ldmia(FP, RegisterSet(FP) | RegisterSet(LR)); 1190 ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize)); 1191 #endif 1192 1193 if (ret_addr != LR) { 1194 mov(ret_addr, LR); 1195 } 1196 } 1197 1198 1199 // At certain points in the method invocation the monitor of 1200 // synchronized methods hasn't been entered yet. 1201 // To correctly handle exceptions at these points, we set the thread local 1202 // variable _do_not_unlock_if_synchronized to true. The remove_activation will 1203 // check this flag. 1204 void InterpreterMacroAssembler::set_do_not_unlock_if_synchronized(bool flag, Register tmp) { 1205 const Address do_not_unlock_if_synchronized(Rthread, 1206 JavaThread::do_not_unlock_if_synchronized_offset()); 1207 if (flag) { 1208 mov(tmp, 1); 1209 strb(tmp, do_not_unlock_if_synchronized); 1210 } else { 1211 strb(zero_register(tmp), do_not_unlock_if_synchronized); 1212 } 1213 } 1214 1215 // Lock object 1216 // 1217 // Argument: R1 : Points to BasicObjectLock to be used for locking. 1218 // Must be initialized with object to lock. 1219 // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. 1220 void InterpreterMacroAssembler::lock_object(Register Rlock) { 1221 assert(Rlock == R1, "the second argument"); 1222 1223 if (UseHeavyMonitors) { 1224 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); 1225 } else { 1226 Label done; 1227 1228 const Register Robj = R2; 1229 const Register Rmark = R3; 1230 assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); 1231 1232 const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); 1233 const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); 1234 const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); 1235 1236 Label already_locked, slow_case; 1237 1238 // Load object pointer 1239 ldr(Robj, Address(Rlock, obj_offset)); 1240 1241 if (UseBiasedLocking) { 1242 biased_locking_enter(Robj, Rmark/*scratched*/, R0, false, Rtemp, done, slow_case); 1243 } 1244 1245 #ifdef AARCH64 1246 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1247 ldr(Rmark, Robj); 1248 1249 // Test if object is already locked 1250 assert(markOopDesc::unlocked_value == 1, "adjust this code"); 1251 tbz(Rmark, exact_log2(markOopDesc::unlocked_value), already_locked); 1252 1253 #else // AARCH64 1254 1255 // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. 1256 // That would be acceptable as ether CAS or slow case path is taken in that case. 1257 // Exception to that is if the object is locked by the calling thread, then the recursive test will pass (guaranteed as 1258 // loads are satisfied from a store queue if performed on the same processor). 1259 1260 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1261 ldr(Rmark, Address(Robj, oopDesc::mark_offset_in_bytes())); 1262 1263 // Test if object is already locked 1264 tst(Rmark, markOopDesc::unlocked_value); 1265 b(already_locked, eq); 1266 1267 #endif // !AARCH64 1268 // Save old object->mark() into BasicLock's displaced header 1269 str(Rmark, Address(Rlock, mark_offset)); 1270 1271 cas_for_lock_acquire(Rmark, Rlock, Robj, Rtemp, slow_case); 1272 1273 #ifndef PRODUCT 1274 if (PrintBiasedLockingStatistics) { 1275 cond_atomic_inc32(al, BiasedLocking::fast_path_entry_count_addr()); 1276 } 1277 #endif //!PRODUCT 1278 1279 b(done); 1280 1281 // If we got here that means the object is locked by ether calling thread or another thread. 1282 bind(already_locked); 1283 // Handling of locked objects: recursive locks and slow case. 1284 1285 // Fast check for recursive lock. 1286 // 1287 // Can apply the optimization only if this is a stack lock 1288 // allocated in this thread. For efficiency, we can focus on 1289 // recently allocated stack locks (instead of reading the stack 1290 // base and checking whether 'mark' points inside the current 1291 // thread stack): 1292 // 1) (mark & 3) == 0 1293 // 2) SP <= mark < SP + os::pagesize() 1294 // 1295 // Warning: SP + os::pagesize can overflow the stack base. We must 1296 // neither apply the optimization for an inflated lock allocated 1297 // just above the thread stack (this is why condition 1 matters) 1298 // nor apply the optimization if the stack lock is inside the stack 1299 // of another thread. The latter is avoided even in case of overflow 1300 // because we have guard pages at the end of all stacks. Hence, if 1301 // we go over the stack base and hit the stack of another thread, 1302 // this should not be in a writeable area that could contain a 1303 // stack lock allocated by that thread. As a consequence, a stack 1304 // lock less than page size away from SP is guaranteed to be 1305 // owned by the current thread. 1306 // 1307 // Note: assuming SP is aligned, we can check the low bits of 1308 // (mark-SP) instead of the low bits of mark. In that case, 1309 // assuming page size is a power of 2, we can merge the two 1310 // conditions into a single test: 1311 // => ((mark - SP) & (3 - os::pagesize())) == 0 1312 1313 #ifdef AARCH64 1314 // Use the single check since the immediate is OK for AARCH64 1315 sub(R0, Rmark, Rstack_top); 1316 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); 1317 Assembler::LogicalImmediate imm(mask, false); 1318 ands(R0, R0, imm); 1319 1320 // For recursive case store 0 into lock record. 1321 // It is harmless to store it unconditionally as lock record contains some garbage 1322 // value in its _displaced_header field by this moment. 1323 str(ZR, Address(Rlock, mark_offset)); 1324 1325 #else // AARCH64 1326 // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand. 1327 // Check independently the low bits and the distance to SP. 1328 // -1- test low 2 bits 1329 movs(R0, AsmOperand(Rmark, lsl, 30)); 1330 // -2- test (mark - SP) if the low two bits are 0 1331 sub(R0, Rmark, SP, eq); 1332 movs(R0, AsmOperand(R0, lsr, exact_log2(os::vm_page_size())), eq); 1333 // If still 'eq' then recursive locking OK: store 0 into lock record 1334 str(R0, Address(Rlock, mark_offset), eq); 1335 1336 #endif // AARCH64 1337 1338 #ifndef PRODUCT 1339 if (PrintBiasedLockingStatistics) { 1340 cond_atomic_inc32(eq, BiasedLocking::fast_path_entry_count_addr()); 1341 } 1342 #endif // !PRODUCT 1343 1344 b(done, eq); 1345 1346 bind(slow_case); 1347 1348 // Call the runtime routine for slow case 1349 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); 1350 1351 bind(done); 1352 } 1353 } 1354 1355 1356 // Unlocks an object. Used in monitorexit bytecode and remove_activation. 1357 // 1358 // Argument: R1: Points to BasicObjectLock structure for lock 1359 // Throw an IllegalMonitorException if object is not locked by current thread 1360 // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. 1361 void InterpreterMacroAssembler::unlock_object(Register Rlock) { 1362 assert(Rlock == R1, "the second argument"); 1363 1364 if (UseHeavyMonitors) { 1365 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); 1366 } else { 1367 Label done, slow_case; 1368 1369 const Register Robj = R2; 1370 const Register Rmark = R3; 1371 const Register Rresult = R0; 1372 assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); 1373 1374 const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); 1375 const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); 1376 const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); 1377 1378 const Register Rzero = zero_register(Rtemp); 1379 1380 // Load oop into Robj 1381 ldr(Robj, Address(Rlock, obj_offset)); 1382 1383 // Free entry 1384 str(Rzero, Address(Rlock, obj_offset)); 1385 1386 if (UseBiasedLocking) { 1387 biased_locking_exit(Robj, Rmark, done); 1388 } 1389 1390 // Load the old header from BasicLock structure 1391 ldr(Rmark, Address(Rlock, mark_offset)); 1392 1393 // Test for recursion (zero mark in BasicLock) 1394 cbz(Rmark, done); 1395 1396 bool allow_fallthrough_on_failure = true; 1397 1398 cas_for_lock_release(Rlock, Rmark, Robj, Rtemp, slow_case, allow_fallthrough_on_failure); 1399 1400 b(done, eq); 1401 1402 bind(slow_case); 1403 1404 // Call the runtime routine for slow case. 1405 str(Robj, Address(Rlock, obj_offset)); // restore obj 1406 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); 1407 1408 bind(done); 1409 } 1410 } 1411 1412 1413 // Test ImethodDataPtr. If it is null, continue at the specified label 1414 void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& zero_continue) { 1415 assert(ProfileInterpreter, "must be profiling interpreter"); 1416 ldr(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); 1417 cbz(mdp, zero_continue); 1418 } 1419 1420 1421 // Set the method data pointer for the current bcp. 1422 // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. 1423 void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { 1424 assert(ProfileInterpreter, "must be profiling interpreter"); 1425 Label set_mdp; 1426 1427 // Test MDO to avoid the call if it is NULL. 1428 ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); 1429 cbz(Rtemp, set_mdp); 1430 1431 mov(R0, Rmethod); 1432 mov(R1, Rbcp); 1433 call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), R0, R1); 1434 // R0/W0: mdi 1435 1436 // mdo is guaranteed to be non-zero here, we checked for it before the call. 1437 ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); 1438 add(Rtemp, Rtemp, in_bytes(MethodData::data_offset())); 1439 add_ptr_scaled_int32(Rtemp, Rtemp, R0, 0); 1440 1441 bind(set_mdp); 1442 str(Rtemp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); 1443 } 1444 1445 1446 void InterpreterMacroAssembler::verify_method_data_pointer() { 1447 assert(ProfileInterpreter, "must be profiling interpreter"); 1448 #ifdef ASSERT 1449 Label verify_continue; 1450 save_caller_save_registers(); 1451 1452 const Register Rmdp = R2; 1453 test_method_data_pointer(Rmdp, verify_continue); // If mdp is zero, continue 1454 1455 // If the mdp is valid, it will point to a DataLayout header which is 1456 // consistent with the bcp. The converse is highly probable also. 1457 1458 ldrh(R3, Address(Rmdp, DataLayout::bci_offset())); 1459 ldr(Rtemp, Address(Rmethod, Method::const_offset())); 1460 add(R3, R3, Rtemp); 1461 add(R3, R3, in_bytes(ConstMethod::codes_offset())); 1462 cmp(R3, Rbcp); 1463 b(verify_continue, eq); 1464 1465 mov(R0, Rmethod); 1466 mov(R1, Rbcp); 1467 call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), R0, R1, Rmdp); 1468 1469 bind(verify_continue); 1470 restore_caller_save_registers(); 1471 #endif // ASSERT 1472 } 1473 1474 1475 void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, int offset, Register value) { 1476 assert(ProfileInterpreter, "must be profiling interpreter"); 1477 assert_different_registers(mdp_in, value); 1478 str(value, Address(mdp_in, offset)); 1479 } 1480 1481 1482 // Increments mdp data. Sets bumped_count register to adjusted counter. 1483 void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, 1484 int offset, 1485 Register bumped_count, 1486 bool decrement) { 1487 assert(ProfileInterpreter, "must be profiling interpreter"); 1488 1489 // Counter address 1490 Address data(mdp_in, offset); 1491 assert_different_registers(mdp_in, bumped_count); 1492 1493 increment_mdp_data_at(data, bumped_count, decrement); 1494 } 1495 1496 void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, int flag_byte_constant) { 1497 assert_different_registers(mdp_in, Rtemp); 1498 assert(ProfileInterpreter, "must be profiling interpreter"); 1499 assert((0 < flag_byte_constant) && (flag_byte_constant < (1 << BitsPerByte)), "flag mask is out of range"); 1500 1501 // Set the flag 1502 ldrb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset()))); 1503 orr(Rtemp, Rtemp, (unsigned)flag_byte_constant); 1504 strb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset()))); 1505 } 1506 1507 1508 // Increments mdp data. Sets bumped_count register to adjusted counter. 1509 void InterpreterMacroAssembler::increment_mdp_data_at(Address data, 1510 Register bumped_count, 1511 bool decrement) { 1512 assert(ProfileInterpreter, "must be profiling interpreter"); 1513 1514 ldr(bumped_count, data); 1515 if (decrement) { 1516 // Decrement the register. Set condition codes. 1517 subs(bumped_count, bumped_count, DataLayout::counter_increment); 1518 // Avoid overflow. 1519 #ifdef AARCH64 1520 assert(DataLayout::counter_increment == 1, "required for cinc"); 1521 cinc(bumped_count, bumped_count, pl); 1522 #else 1523 add(bumped_count, bumped_count, DataLayout::counter_increment, pl); 1524 #endif // AARCH64 1525 } else { 1526 // Increment the register. Set condition codes. 1527 adds(bumped_count, bumped_count, DataLayout::counter_increment); 1528 // Avoid overflow. 1529 #ifdef AARCH64 1530 assert(DataLayout::counter_increment == 1, "required for cinv"); 1531 cinv(bumped_count, bumped_count, mi); // inverts 0x80..00 back to 0x7f..ff 1532 #else 1533 sub(bumped_count, bumped_count, DataLayout::counter_increment, mi); 1534 #endif // AARCH64 1535 } 1536 str(bumped_count, data); 1537 } 1538 1539 1540 void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, 1541 int offset, 1542 Register value, 1543 Register test_value_out, 1544 Label& not_equal_continue) { 1545 assert(ProfileInterpreter, "must be profiling interpreter"); 1546 assert_different_registers(mdp_in, test_value_out, value); 1547 1548 ldr(test_value_out, Address(mdp_in, offset)); 1549 cmp(test_value_out, value); 1550 1551 b(not_equal_continue, ne); 1552 } 1553 1554 1555 void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, int offset_of_disp, Register reg_temp) { 1556 assert(ProfileInterpreter, "must be profiling interpreter"); 1557 assert_different_registers(mdp_in, reg_temp); 1558 1559 ldr(reg_temp, Address(mdp_in, offset_of_disp)); 1560 add(mdp_in, mdp_in, reg_temp); 1561 str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); 1562 } 1563 1564 1565 void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, Register reg_offset, Register reg_tmp) { 1566 assert(ProfileInterpreter, "must be profiling interpreter"); 1567 assert_different_registers(mdp_in, reg_offset, reg_tmp); 1568 1569 ldr(reg_tmp, Address(mdp_in, reg_offset)); 1570 add(mdp_in, mdp_in, reg_tmp); 1571 str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); 1572 } 1573 1574 1575 void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int constant) { 1576 assert(ProfileInterpreter, "must be profiling interpreter"); 1577 add(mdp_in, mdp_in, constant); 1578 str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); 1579 } 1580 1581 1582 // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 1583 void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { 1584 assert(ProfileInterpreter, "must be profiling interpreter"); 1585 assert_different_registers(return_bci, R0, R1, R2, R3, Rtemp); 1586 1587 mov(R1, return_bci); 1588 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), R1); 1589 } 1590 1591 1592 // Sets mdp, bumped_count registers, blows Rtemp. 1593 void InterpreterMacroAssembler::profile_taken_branch(Register mdp, Register bumped_count) { 1594 assert_different_registers(mdp, bumped_count); 1595 1596 if (ProfileInterpreter) { 1597 Label profile_continue; 1598 1599 // If no method data exists, go to profile_continue. 1600 // Otherwise, assign to mdp 1601 test_method_data_pointer(mdp, profile_continue); 1602 1603 // We are taking a branch. Increment the taken count. 1604 increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()), bumped_count); 1605 1606 // The method data pointer needs to be updated to reflect the new target. 1607 update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()), Rtemp); 1608 1609 bind (profile_continue); 1610 } 1611 } 1612 1613 1614 // Sets mdp, blows Rtemp. 1615 void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { 1616 assert_different_registers(mdp, Rtemp); 1617 1618 if (ProfileInterpreter) { 1619 Label profile_continue; 1620 1621 // If no method data exists, go to profile_continue. 1622 test_method_data_pointer(mdp, profile_continue); 1623 1624 // We are taking a branch. Increment the not taken count. 1625 increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()), Rtemp); 1626 1627 // The method data pointer needs to be updated to correspond to the next bytecode 1628 update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); 1629 1630 bind (profile_continue); 1631 } 1632 } 1633 1634 1635 // Sets mdp, blows Rtemp. 1636 void InterpreterMacroAssembler::profile_call(Register mdp) { 1637 assert_different_registers(mdp, Rtemp); 1638 1639 if (ProfileInterpreter) { 1640 Label profile_continue; 1641 1642 // If no method data exists, go to profile_continue. 1643 test_method_data_pointer(mdp, profile_continue); 1644 1645 // We are making a call. Increment the count. 1646 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); 1647 1648 // The method data pointer needs to be updated to reflect the new target. 1649 update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); 1650 1651 bind (profile_continue); 1652 } 1653 } 1654 1655 1656 // Sets mdp, blows Rtemp. 1657 void InterpreterMacroAssembler::profile_final_call(Register mdp) { 1658 if (ProfileInterpreter) { 1659 Label profile_continue; 1660 1661 // If no method data exists, go to profile_continue. 1662 test_method_data_pointer(mdp, profile_continue); 1663 1664 // We are making a call. Increment the count. 1665 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); 1666 1667 // The method data pointer needs to be updated to reflect the new target. 1668 update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); 1669 1670 bind (profile_continue); 1671 } 1672 } 1673 1674 1675 // Sets mdp, blows Rtemp. 1676 void InterpreterMacroAssembler::profile_virtual_call(Register mdp, Register receiver, bool receiver_can_be_null) { 1677 assert_different_registers(mdp, receiver, Rtemp); 1678 1679 if (ProfileInterpreter) { 1680 Label profile_continue; 1681 1682 // If no method data exists, go to profile_continue. 1683 test_method_data_pointer(mdp, profile_continue); 1684 1685 Label skip_receiver_profile; 1686 if (receiver_can_be_null) { 1687 Label not_null; 1688 cbnz(receiver, not_null); 1689 // We are making a call. Increment the count for null receiver. 1690 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); 1691 b(skip_receiver_profile); 1692 bind(not_null); 1693 } 1694 1695 // Record the receiver type. 1696 record_klass_in_profile(receiver, mdp, Rtemp, true); 1697 bind(skip_receiver_profile); 1698 1699 // The method data pointer needs to be updated to reflect the new target. 1700 update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); 1701 bind(profile_continue); 1702 } 1703 } 1704 1705 1706 void InterpreterMacroAssembler::record_klass_in_profile_helper( 1707 Register receiver, Register mdp, 1708 Register reg_tmp, 1709 int start_row, Label& done, bool is_virtual_call) { 1710 if (TypeProfileWidth == 0) 1711 return; 1712 1713 assert_different_registers(receiver, mdp, reg_tmp); 1714 1715 int last_row = VirtualCallData::row_limit() - 1; 1716 assert(start_row <= last_row, "must be work left to do"); 1717 // Test this row for both the receiver and for null. 1718 // Take any of three different outcomes: 1719 // 1. found receiver => increment count and goto done 1720 // 2. found null => keep looking for case 1, maybe allocate this cell 1721 // 3. found something else => keep looking for cases 1 and 2 1722 // Case 3 is handled by a recursive call. 1723 for (int row = start_row; row <= last_row; row++) { 1724 Label next_test; 1725 1726 // See if the receiver is receiver[n]. 1727 int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); 1728 1729 test_mdp_data_at(mdp, recvr_offset, receiver, reg_tmp, next_test); 1730 1731 // The receiver is receiver[n]. Increment count[n]. 1732 int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); 1733 increment_mdp_data_at(mdp, count_offset, reg_tmp); 1734 b(done); 1735 1736 bind(next_test); 1737 // reg_tmp now contains the receiver from the CallData. 1738 1739 if (row == start_row) { 1740 Label found_null; 1741 // Failed the equality check on receiver[n]... Test for null. 1742 if (start_row == last_row) { 1743 // The only thing left to do is handle the null case. 1744 if (is_virtual_call) { 1745 cbz(reg_tmp, found_null); 1746 // Receiver did not match any saved receiver and there is no empty row for it. 1747 // Increment total counter to indicate polymorphic case. 1748 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), reg_tmp); 1749 b(done); 1750 bind(found_null); 1751 } else { 1752 cbnz(reg_tmp, done); 1753 } 1754 break; 1755 } 1756 // Since null is rare, make it be the branch-taken case. 1757 cbz(reg_tmp, found_null); 1758 1759 // Put all the "Case 3" tests here. 1760 record_klass_in_profile_helper(receiver, mdp, reg_tmp, start_row + 1, done, is_virtual_call); 1761 1762 // Found a null. Keep searching for a matching receiver, 1763 // but remember that this is an empty (unused) slot. 1764 bind(found_null); 1765 } 1766 } 1767 1768 // In the fall-through case, we found no matching receiver, but we 1769 // observed the receiver[start_row] is NULL. 1770 1771 // Fill in the receiver field and increment the count. 1772 int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); 1773 set_mdp_data_at(mdp, recvr_offset, receiver); 1774 int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); 1775 mov(reg_tmp, DataLayout::counter_increment); 1776 set_mdp_data_at(mdp, count_offset, reg_tmp); 1777 if (start_row > 0) { 1778 b(done); 1779 } 1780 } 1781 1782 void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, 1783 Register mdp, 1784 Register reg_tmp, 1785 bool is_virtual_call) { 1786 assert(ProfileInterpreter, "must be profiling"); 1787 assert_different_registers(receiver, mdp, reg_tmp); 1788 1789 Label done; 1790 1791 record_klass_in_profile_helper(receiver, mdp, reg_tmp, 0, done, is_virtual_call); 1792 1793 bind (done); 1794 } 1795 1796 // Sets mdp, blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 1797 void InterpreterMacroAssembler::profile_ret(Register mdp, Register return_bci) { 1798 assert_different_registers(mdp, return_bci, Rtemp, R0, R1, R2, R3); 1799 1800 if (ProfileInterpreter) { 1801 Label profile_continue; 1802 uint row; 1803 1804 // If no method data exists, go to profile_continue. 1805 test_method_data_pointer(mdp, profile_continue); 1806 1807 // Update the total ret count. 1808 increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); 1809 1810 for (row = 0; row < RetData::row_limit(); row++) { 1811 Label next_test; 1812 1813 // See if return_bci is equal to bci[n]: 1814 test_mdp_data_at(mdp, in_bytes(RetData::bci_offset(row)), return_bci, 1815 Rtemp, next_test); 1816 1817 // return_bci is equal to bci[n]. Increment the count. 1818 increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)), Rtemp); 1819 1820 // The method data pointer needs to be updated to reflect the new target. 1821 update_mdp_by_offset(mdp, in_bytes(RetData::bci_displacement_offset(row)), Rtemp); 1822 b(profile_continue); 1823 bind(next_test); 1824 } 1825 1826 update_mdp_for_ret(return_bci); 1827 1828 bind(profile_continue); 1829 } 1830 } 1831 1832 1833 // Sets mdp. 1834 void InterpreterMacroAssembler::profile_null_seen(Register mdp) { 1835 if (ProfileInterpreter) { 1836 Label profile_continue; 1837 1838 // If no method data exists, go to profile_continue. 1839 test_method_data_pointer(mdp, profile_continue); 1840 1841 set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); 1842 1843 // The method data pointer needs to be updated. 1844 int mdp_delta = in_bytes(BitData::bit_data_size()); 1845 if (TypeProfileCasts) { 1846 mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); 1847 } 1848 update_mdp_by_constant(mdp, mdp_delta); 1849 1850 bind (profile_continue); 1851 } 1852 } 1853 1854 1855 // Sets mdp, blows Rtemp. 1856 void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { 1857 assert_different_registers(mdp, Rtemp); 1858 1859 if (ProfileInterpreter && TypeProfileCasts) { 1860 Label profile_continue; 1861 1862 // If no method data exists, go to profile_continue. 1863 test_method_data_pointer(mdp, profile_continue); 1864 1865 int count_offset = in_bytes(CounterData::count_offset()); 1866 // Back up the address, since we have already bumped the mdp. 1867 count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); 1868 1869 // *Decrement* the counter. We expect to see zero or small negatives. 1870 increment_mdp_data_at(mdp, count_offset, Rtemp, true); 1871 1872 bind (profile_continue); 1873 } 1874 } 1875 1876 1877 // Sets mdp, blows Rtemp. 1878 void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass) 1879 { 1880 assert_different_registers(mdp, klass, Rtemp); 1881 1882 if (ProfileInterpreter) { 1883 Label profile_continue; 1884 1885 // If no method data exists, go to profile_continue. 1886 test_method_data_pointer(mdp, profile_continue); 1887 1888 // The method data pointer needs to be updated. 1889 int mdp_delta = in_bytes(BitData::bit_data_size()); 1890 if (TypeProfileCasts) { 1891 mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); 1892 1893 // Record the object type. 1894 record_klass_in_profile(klass, mdp, Rtemp, false); 1895 } 1896 update_mdp_by_constant(mdp, mdp_delta); 1897 1898 bind(profile_continue); 1899 } 1900 } 1901 1902 1903 // Sets mdp, blows Rtemp. 1904 void InterpreterMacroAssembler::profile_switch_default(Register mdp) { 1905 assert_different_registers(mdp, Rtemp); 1906 1907 if (ProfileInterpreter) { 1908 Label profile_continue; 1909 1910 // If no method data exists, go to profile_continue. 1911 test_method_data_pointer(mdp, profile_continue); 1912 1913 // Update the default case count 1914 increment_mdp_data_at(mdp, in_bytes(MultiBranchData::default_count_offset()), Rtemp); 1915 1916 // The method data pointer needs to be updated. 1917 update_mdp_by_offset(mdp, in_bytes(MultiBranchData::default_displacement_offset()), Rtemp); 1918 1919 bind(profile_continue); 1920 } 1921 } 1922 1923 1924 // Sets mdp. Blows reg_tmp1, reg_tmp2. Index could be the same as reg_tmp2. 1925 void InterpreterMacroAssembler::profile_switch_case(Register mdp, Register index, Register reg_tmp1, Register reg_tmp2) { 1926 assert_different_registers(mdp, reg_tmp1, reg_tmp2); 1927 assert_different_registers(mdp, reg_tmp1, index); 1928 1929 if (ProfileInterpreter) { 1930 Label profile_continue; 1931 1932 const int count_offset = in_bytes(MultiBranchData::case_array_offset()) + 1933 in_bytes(MultiBranchData::relative_count_offset()); 1934 1935 const int displacement_offset = in_bytes(MultiBranchData::case_array_offset()) + 1936 in_bytes(MultiBranchData::relative_displacement_offset()); 1937 1938 // If no method data exists, go to profile_continue. 1939 test_method_data_pointer(mdp, profile_continue); 1940 1941 // Build the base (index * per_case_size_in_bytes()) 1942 logical_shift_left(reg_tmp1, index, exact_log2(in_bytes(MultiBranchData::per_case_size()))); 1943 1944 // Update the case count 1945 add(reg_tmp1, reg_tmp1, count_offset); 1946 increment_mdp_data_at(Address(mdp, reg_tmp1), reg_tmp2); 1947 1948 // The method data pointer needs to be updated. 1949 add(reg_tmp1, reg_tmp1, displacement_offset - count_offset); 1950 update_mdp_by_offset(mdp, reg_tmp1, reg_tmp2); 1951 1952 bind (profile_continue); 1953 } 1954 } 1955 1956 1957 void InterpreterMacroAssembler::byteswap_u32(Register r, Register rtmp1, Register rtmp2) { 1958 #ifdef AARCH64 1959 rev_w(r, r); 1960 #else 1961 if (VM_Version::supports_rev()) { 1962 rev(r, r); 1963 } else { 1964 eor(rtmp1, r, AsmOperand(r, ror, 16)); 1965 mvn(rtmp2, 0x0000ff00); 1966 andr(rtmp1, rtmp2, AsmOperand(rtmp1, lsr, 8)); 1967 eor(r, rtmp1, AsmOperand(r, ror, 8)); 1968 } 1969 #endif // AARCH64 1970 } 1971 1972 1973 void InterpreterMacroAssembler::inc_global_counter(address address_of_counter, int offset, Register tmp1, Register tmp2, bool avoid_overflow) { 1974 const intx addr = (intx) (address_of_counter + offset); 1975 1976 assert ((addr & 0x3) == 0, "address of counter should be aligned"); 1977 const intx offset_mask = right_n_bits(AARCH64_ONLY(12 + 2) NOT_AARCH64(12)); 1978 1979 const address base = (address) (addr & ~offset_mask); 1980 const int offs = (int) (addr & offset_mask); 1981 1982 const Register addr_base = tmp1; 1983 const Register val = tmp2; 1984 1985 mov_slow(addr_base, base); 1986 ldr_s32(val, Address(addr_base, offs)); 1987 1988 if (avoid_overflow) { 1989 adds_32(val, val, 1); 1990 #ifdef AARCH64 1991 Label L; 1992 b(L, mi); 1993 str_32(val, Address(addr_base, offs)); 1994 bind(L); 1995 #else 1996 str(val, Address(addr_base, offs), pl); 1997 #endif // AARCH64 1998 } else { 1999 add_32(val, val, 1); 2000 str_32(val, Address(addr_base, offs)); 2001 } 2002 } 2003 2004 void InterpreterMacroAssembler::interp_verify_oop(Register reg, TosState state, const char *file, int line) { 2005 if (state == atos) { MacroAssembler::_verify_oop(reg, "broken oop", file, line); } 2006 } 2007 2008 // Inline assembly for: 2009 // 2010 // if (thread is in interp_only_mode) { 2011 // InterpreterRuntime::post_method_entry(); 2012 // } 2013 // if (DTraceMethodProbes) { 2014 // SharedRuntime::dtrace_method_entry(method, receiver); 2015 // } 2016 // if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { 2017 // SharedRuntime::rc_trace_method_entry(method, receiver); 2018 // } 2019 2020 void InterpreterMacroAssembler::notify_method_entry() { 2021 // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to 2022 // track stack depth. If it is possible to enter interp_only_mode we add 2023 // the code to check if the event should be sent. 2024 if (can_post_interpreter_events()) { 2025 Label L; 2026 2027 ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); 2028 cbz(Rtemp, L); 2029 2030 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry)); 2031 2032 bind(L); 2033 } 2034 2035 // Note: Disable DTrace runtime check for now to eliminate overhead on each method entry 2036 if (DTraceMethodProbes) { 2037 Label Lcontinue; 2038 2039 ldrb_global(Rtemp, (address)&DTraceMethodProbes); 2040 cbz(Rtemp, Lcontinue); 2041 2042 mov(R0, Rthread); 2043 mov(R1, Rmethod); 2044 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), R0, R1); 2045 2046 bind(Lcontinue); 2047 } 2048 // RedefineClasses() tracing support for obsolete method entry 2049 if (log_is_enabled(Trace, redefine, class, obsolete)) { 2050 mov(R0, Rthread); 2051 mov(R1, Rmethod); 2052 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 2053 R0, R1); 2054 } 2055 } 2056 2057 2058 void InterpreterMacroAssembler::notify_method_exit( 2059 TosState state, NotifyMethodExitMode mode, 2060 bool native, Register result_lo, Register result_hi, FloatRegister result_fp) { 2061 // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to 2062 // track stack depth. If it is possible to enter interp_only_mode we add 2063 // the code to check if the event should be sent. 2064 if (mode == NotifyJVMTI && can_post_interpreter_events()) { 2065 Label L; 2066 // Note: frame::interpreter_frame_result has a dependency on how the 2067 // method result is saved across the call to post_method_exit. If this 2068 // is changed then the interpreter_frame_result implementation will 2069 // need to be updated too. 2070 2071 ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); 2072 cbz(Rtemp, L); 2073 2074 if (native) { 2075 // For c++ and template interpreter push both result registers on the 2076 // stack in native, we don't know the state. 2077 // On AArch64 result registers are stored into the frame at known locations. 2078 // See frame::interpreter_frame_result for code that gets the result values from here. 2079 assert(result_lo != noreg, "result registers should be defined"); 2080 2081 #ifdef AARCH64 2082 assert(result_hi == noreg, "result_hi is not used on AArch64"); 2083 assert(result_fp != fnoreg, "FP result register must be defined"); 2084 2085 str_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); 2086 str(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); 2087 #else 2088 assert(result_hi != noreg, "result registers should be defined"); 2089 2090 #ifdef __ABI_HARD__ 2091 assert(result_fp != fnoreg, "FP result register must be defined"); 2092 sub(SP, SP, 2 * wordSize); 2093 fstd(result_fp, Address(SP)); 2094 #endif // __ABI_HARD__ 2095 2096 push(RegisterSet(result_lo) | RegisterSet(result_hi)); 2097 #endif // AARCH64 2098 2099 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); 2100 2101 #ifdef AARCH64 2102 ldr_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); 2103 ldr(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); 2104 #else 2105 pop(RegisterSet(result_lo) | RegisterSet(result_hi)); 2106 #ifdef __ABI_HARD__ 2107 fldd(result_fp, Address(SP)); 2108 add(SP, SP, 2 * wordSize); 2109 #endif // __ABI_HARD__ 2110 #endif // AARCH64 2111 2112 } else { 2113 // For the template interpreter, the value on tos is the size of the 2114 // state. (c++ interpreter calls jvmti somewhere else). 2115 push(state); 2116 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); 2117 pop(state); 2118 } 2119 2120 bind(L); 2121 } 2122 2123 // Note: Disable DTrace runtime check for now to eliminate overhead on each method exit 2124 if (DTraceMethodProbes) { 2125 Label Lcontinue; 2126 2127 ldrb_global(Rtemp, (address)&DTraceMethodProbes); 2128 cbz(Rtemp, Lcontinue); 2129 2130 push(state); 2131 2132 mov(R0, Rthread); 2133 mov(R1, Rmethod); 2134 2135 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), R0, R1); 2136 2137 pop(state); 2138 2139 bind(Lcontinue); 2140 } 2141 } 2142 2143 2144 #ifndef PRODUCT 2145 2146 void InterpreterMacroAssembler::trace_state(const char* msg) { 2147 int push_size = save_caller_save_registers(); 2148 2149 Label Lcontinue; 2150 InlinedString Lmsg0("%s: FP=" INTPTR_FORMAT ", SP=" INTPTR_FORMAT "\n"); 2151 InlinedString Lmsg(msg); 2152 InlinedAddress Lprintf((address)printf); 2153 2154 ldr_literal(R0, Lmsg0); 2155 ldr_literal(R1, Lmsg); 2156 mov(R2, FP); 2157 add(R3, SP, push_size); // original SP (without saved registers) 2158 ldr_literal(Rtemp, Lprintf); 2159 call(Rtemp); 2160 2161 b(Lcontinue); 2162 2163 bind_literal(Lmsg0); 2164 bind_literal(Lmsg); 2165 bind_literal(Lprintf); 2166 2167 2168 bind(Lcontinue); 2169 2170 restore_caller_save_registers(); 2171 } 2172 2173 #endif 2174 2175 // Jump if ((*counter_addr += increment) & mask) satisfies the condition. 2176 void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, 2177 int increment, Address mask_addr, 2178 Register scratch, Register scratch2, 2179 AsmCondition cond, Label* where) { 2180 // caution: scratch2 and base address of counter_addr can be the same 2181 assert_different_registers(scratch, scratch2); 2182 ldr_u32(scratch, counter_addr); 2183 add(scratch, scratch, increment); 2184 str_32(scratch, counter_addr); 2185 2186 #ifdef AARCH64 2187 ldr_u32(scratch2, mask_addr); 2188 ands_w(ZR, scratch, scratch2); 2189 #else 2190 ldr(scratch2, mask_addr); 2191 andrs(scratch, scratch, scratch2); 2192 #endif // AARCH64 2193 b(*where, cond); 2194 } 2195 2196 void InterpreterMacroAssembler::get_method_counters(Register method, 2197 Register Rcounters, 2198 Label& skip) { 2199 const Address method_counters(method, Method::method_counters_offset()); 2200 Label has_counters; 2201 2202 ldr(Rcounters, method_counters); 2203 cbnz(Rcounters, has_counters); 2204 2205 #ifdef AARCH64 2206 const Register tmp = Rcounters; 2207 const int saved_regs_size = 20*wordSize; 2208 2209 // Note: call_VM will cut SP according to Rstack_top value before call, and restore SP to 2210 // extended_sp value from frame after the call. 2211 // So make sure there is enough stack space to save registers and adjust Rstack_top accordingly. 2212 { 2213 Label enough_stack_space; 2214 check_extended_sp(tmp); 2215 sub(Rstack_top, Rstack_top, saved_regs_size); 2216 cmp(SP, Rstack_top); 2217 b(enough_stack_space, ls); 2218 2219 align_reg(tmp, Rstack_top, StackAlignmentInBytes); 2220 mov(SP, tmp); 2221 str(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); 2222 2223 bind(enough_stack_space); 2224 check_stack_top(); 2225 2226 int offset = 0; 2227 stp(R0, R1, Address(Rstack_top, offset)); offset += 2*wordSize; 2228 stp(R2, R3, Address(Rstack_top, offset)); offset += 2*wordSize; 2229 stp(R4, R5, Address(Rstack_top, offset)); offset += 2*wordSize; 2230 stp(R6, R7, Address(Rstack_top, offset)); offset += 2*wordSize; 2231 stp(R8, R9, Address(Rstack_top, offset)); offset += 2*wordSize; 2232 stp(R10, R11, Address(Rstack_top, offset)); offset += 2*wordSize; 2233 stp(R12, R13, Address(Rstack_top, offset)); offset += 2*wordSize; 2234 stp(R14, R15, Address(Rstack_top, offset)); offset += 2*wordSize; 2235 stp(R16, R17, Address(Rstack_top, offset)); offset += 2*wordSize; 2236 stp(R18, LR, Address(Rstack_top, offset)); offset += 2*wordSize; 2237 assert (offset == saved_regs_size, "should be"); 2238 } 2239 #else 2240 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(R14)); 2241 #endif // AARCH64 2242 2243 mov(R1, method); 2244 call_VM(noreg, CAST_FROM_FN_PTR(address, 2245 InterpreterRuntime::build_method_counters), R1); 2246 2247 #ifdef AARCH64 2248 { 2249 int offset = 0; 2250 ldp(R0, R1, Address(Rstack_top, offset)); offset += 2*wordSize; 2251 ldp(R2, R3, Address(Rstack_top, offset)); offset += 2*wordSize; 2252 ldp(R4, R5, Address(Rstack_top, offset)); offset += 2*wordSize; 2253 ldp(R6, R7, Address(Rstack_top, offset)); offset += 2*wordSize; 2254 ldp(R8, R9, Address(Rstack_top, offset)); offset += 2*wordSize; 2255 ldp(R10, R11, Address(Rstack_top, offset)); offset += 2*wordSize; 2256 ldp(R12, R13, Address(Rstack_top, offset)); offset += 2*wordSize; 2257 ldp(R14, R15, Address(Rstack_top, offset)); offset += 2*wordSize; 2258 ldp(R16, R17, Address(Rstack_top, offset)); offset += 2*wordSize; 2259 ldp(R18, LR, Address(Rstack_top, offset)); offset += 2*wordSize; 2260 assert (offset == saved_regs_size, "should be"); 2261 2262 add(Rstack_top, Rstack_top, saved_regs_size); 2263 } 2264 #else 2265 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(R14)); 2266 #endif // AARCH64 2267 2268 ldr(Rcounters, method_counters); 2269 cbz(Rcounters, skip); // No MethodCounters created, OutOfMemory 2270 2271 bind(has_counters); 2272 }