1 /* 2 * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "asm/macroAssembler.hpp" 29 #include "ci/ciEnv.hpp" 30 #include "code/nativeInst.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/barrierSet.hpp" 33 #include "gc/shared/cardTable.hpp" 34 #include "gc/shared/barrierSetAssembler.hpp" 35 #include "gc/shared/cardTableBarrierSet.hpp" 36 #include "gc/shared/collectedHeap.inline.hpp" 37 #include "interpreter/interpreter.hpp" 38 #include "memory/resourceArea.hpp" 39 #include "oops/klass.inline.hpp" 40 #include "prims/methodHandles.hpp" 41 #include "runtime/biasedLocking.hpp" 42 #include "runtime/interfaceSupport.inline.hpp" 43 #include "runtime/objectMonitor.hpp" 44 #include "runtime/os.hpp" 45 #include "runtime/sharedRuntime.hpp" 46 #include "runtime/stubRoutines.hpp" 47 #include "utilities/macros.hpp" 48 #if INCLUDE_ALL_GCS 49 #include "gc/g1/g1BarrierSet.hpp" 50 #include "gc/g1/g1CardTable.hpp" 51 #include "gc/g1/g1ThreadLocalData.hpp" 52 #include "gc/g1/heapRegion.hpp" 53 #endif 54 55 // Implementation of AddressLiteral 56 57 void AddressLiteral::set_rspec(relocInfo::relocType rtype) { 58 switch (rtype) { 59 case relocInfo::oop_type: 60 // Oops are a special case. Normally they would be their own section 61 // but in cases like icBuffer they are literals in the code stream that 62 // we don't have a section for. We use none so that we get a literal address 63 // which is always patchable. 64 break; 65 case relocInfo::external_word_type: 66 _rspec = external_word_Relocation::spec(_target); 67 break; 68 case relocInfo::internal_word_type: 69 _rspec = internal_word_Relocation::spec(_target); 70 break; 71 case relocInfo::opt_virtual_call_type: 72 _rspec = opt_virtual_call_Relocation::spec(); 73 break; 74 case relocInfo::static_call_type: 75 _rspec = static_call_Relocation::spec(); 76 break; 77 case relocInfo::runtime_call_type: 78 _rspec = runtime_call_Relocation::spec(); 79 break; 80 case relocInfo::poll_type: 81 case relocInfo::poll_return_type: 82 _rspec = Relocation::spec_simple(rtype); 83 break; 84 case relocInfo::none: 85 break; 86 default: 87 ShouldNotReachHere(); 88 break; 89 } 90 } 91 92 // Initially added to the Assembler interface as a pure virtual: 93 // RegisterConstant delayed_value(..) 94 // for: 95 // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) 96 // this was subsequently modified to its present name and return type 97 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 98 Register tmp, 99 int offset) { 100 ShouldNotReachHere(); 101 return RegisterOrConstant(-1); 102 } 103 104 105 #ifdef AARCH64 106 // Note: ARM32 version is OS dependent 107 void MacroAssembler::breakpoint(AsmCondition cond) { 108 if (cond == al) { 109 brk(); 110 } else { 111 Label L; 112 b(L, inverse(cond)); 113 brk(); 114 bind(L); 115 } 116 } 117 #endif // AARCH64 118 119 120 // virtual method calling 121 void MacroAssembler::lookup_virtual_method(Register recv_klass, 122 Register vtable_index, 123 Register method_result) { 124 const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); 125 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 126 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); 127 ldr(method_result, Address(recv_klass, base_offset)); 128 } 129 130 131 // Simplified, combined version, good for typical uses. 132 // Falls through on failure. 133 void MacroAssembler::check_klass_subtype(Register sub_klass, 134 Register super_klass, 135 Register temp_reg, 136 Register temp_reg2, 137 Register temp_reg3, 138 Label& L_success) { 139 Label L_failure; 140 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); 141 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); 142 bind(L_failure); 143 }; 144 145 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 146 Register super_klass, 147 Register temp_reg, 148 Register temp_reg2, 149 Label* L_success, 150 Label* L_failure, 151 Label* L_slow_path) { 152 153 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); 154 const Register super_check_offset = temp_reg2; 155 156 Label L_fallthrough; 157 int label_nulls = 0; 158 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 159 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 160 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 161 assert(label_nulls <= 1, "at most one NULL in the batch"); 162 163 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 164 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 165 Address super_check_offset_addr(super_klass, sco_offset); 166 167 // If the pointers are equal, we are done (e.g., String[] elements). 168 // This self-check enables sharing of secondary supertype arrays among 169 // non-primary types such as array-of-interface. Otherwise, each such 170 // type would need its own customized SSA. 171 // We move this check to the front of the fast path because many 172 // type checks are in fact trivially successful in this manner, 173 // so we get a nicely predicted branch right at the start of the check. 174 cmp(sub_klass, super_klass); 175 b(*L_success, eq); 176 177 // Check the supertype display: 178 ldr_u32(super_check_offset, super_check_offset_addr); 179 180 Address super_check_addr(sub_klass, super_check_offset); 181 ldr(temp_reg, super_check_addr); 182 cmp(super_klass, temp_reg); // load displayed supertype 183 184 // This check has worked decisively for primary supers. 185 // Secondary supers are sought in the super_cache ('super_cache_addr'). 186 // (Secondary supers are interfaces and very deeply nested subtypes.) 187 // This works in the same check above because of a tricky aliasing 188 // between the super_cache and the primary super display elements. 189 // (The 'super_check_addr' can address either, as the case requires.) 190 // Note that the cache is updated below if it does not help us find 191 // what we need immediately. 192 // So if it was a primary super, we can just fail immediately. 193 // Otherwise, it's the slow path for us (no success at this point). 194 195 b(*L_success, eq); 196 cmp_32(super_check_offset, sc_offset); 197 if (L_failure == &L_fallthrough) { 198 b(*L_slow_path, eq); 199 } else { 200 b(*L_failure, ne); 201 if (L_slow_path != &L_fallthrough) { 202 b(*L_slow_path); 203 } 204 } 205 206 bind(L_fallthrough); 207 } 208 209 210 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 211 Register super_klass, 212 Register temp_reg, 213 Register temp2_reg, 214 Register temp3_reg, 215 Label* L_success, 216 Label* L_failure, 217 bool set_cond_codes) { 218 #ifdef AARCH64 219 NOT_IMPLEMENTED(); 220 #else 221 // Note: if used by code that expects a register to be 0 on success, 222 // this register must be temp_reg and set_cond_codes must be true 223 224 Register saved_reg = noreg; 225 226 // get additional tmp registers 227 if (temp3_reg == noreg) { 228 saved_reg = temp3_reg = LR; 229 push(saved_reg); 230 } 231 232 assert(temp2_reg != noreg, "need all the temporary registers"); 233 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); 234 235 Register cmp_temp = temp_reg; 236 Register scan_temp = temp3_reg; 237 Register count_temp = temp2_reg; 238 239 Label L_fallthrough; 240 int label_nulls = 0; 241 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 242 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 243 assert(label_nulls <= 1, "at most one NULL in the batch"); 244 245 // a couple of useful fields in sub_klass: 246 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 247 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 248 Address secondary_supers_addr(sub_klass, ss_offset); 249 Address super_cache_addr( sub_klass, sc_offset); 250 251 #ifndef PRODUCT 252 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); 253 #endif 254 255 // We will consult the secondary-super array. 256 ldr(scan_temp, Address(sub_klass, ss_offset)); 257 258 assert(! UseCompressedOops, "search_key must be the compressed super_klass"); 259 // else search_key is the 260 Register search_key = super_klass; 261 262 // Load the array length. 263 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes())); 264 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes()); 265 266 add(count_temp, count_temp, 1); 267 268 Label L_loop, L_setnz_and_fail, L_fail; 269 270 // Top of search loop 271 bind(L_loop); 272 // Notes: 273 // scan_temp starts at the array elements 274 // count_temp is 1+size 275 subs(count_temp, count_temp, 1); 276 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { 277 // direct jump to L_failure if failed and no cleanup needed 278 b(*L_failure, eq); // not found and 279 } else { 280 b(L_fail, eq); // not found in the array 281 } 282 283 // Load next super to check 284 // In the array of super classes elements are pointer sized. 285 int element_size = wordSize; 286 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); 287 288 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list 289 subs(cmp_temp, cmp_temp, search_key); 290 291 // A miss means we are NOT a subtype and need to keep looping 292 b(L_loop, ne); 293 294 // Falling out the bottom means we found a hit; we ARE a subtype 295 296 // Note: temp_reg/cmp_temp is already 0 and flag Z is set 297 298 // Success. Cache the super we found and proceed in triumph. 299 str(super_klass, Address(sub_klass, sc_offset)); 300 301 if (saved_reg != noreg) { 302 // Return success 303 pop(saved_reg); 304 } 305 306 b(*L_success); 307 308 bind(L_fail); 309 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here 310 if (set_cond_codes) { 311 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed 312 } 313 if (saved_reg != noreg) { 314 pop(saved_reg); 315 } 316 if (L_failure != &L_fallthrough) { 317 b(*L_failure); 318 } 319 320 bind(L_fallthrough); 321 #endif 322 } 323 324 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. 325 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { 326 assert_different_registers(params_base, params_count); 327 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); 328 return Address(tmp, -Interpreter::stackElementSize); 329 } 330 331 332 void MacroAssembler::align(int modulus) { 333 while (offset() % modulus != 0) { 334 nop(); 335 } 336 } 337 338 int MacroAssembler::set_last_Java_frame(Register last_java_sp, 339 Register last_java_fp, 340 bool save_last_java_pc, 341 Register tmp) { 342 int pc_offset; 343 if (last_java_fp != noreg) { 344 // optional 345 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); 346 _fp_saved = true; 347 } else { 348 _fp_saved = false; 349 } 350 if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM 351 #ifdef AARCH64 352 pc_offset = mov_pc_to(tmp); 353 str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); 354 #else 355 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); 356 pc_offset = offset() + VM_Version::stored_pc_adjustment(); 357 #endif 358 _pc_saved = true; 359 } else { 360 _pc_saved = false; 361 pc_offset = -1; 362 } 363 // According to comment in javaFrameAnchorm SP must be saved last, so that other 364 // entries are valid when SP is set. 365 366 // However, this is probably not a strong constrainst since for instance PC is 367 // sometimes read from the stack at SP... but is pushed later (by the call). Hence, 368 // we now write the fields in the expected order but we have not added a StoreStore 369 // barrier. 370 371 // XXX: if the ordering is really important, PC should always be saved (without forgetting 372 // to update oop_map offsets) and a StoreStore barrier might be needed. 373 374 if (last_java_sp == noreg) { 375 last_java_sp = SP; // always saved 376 } 377 #ifdef AARCH64 378 if (last_java_sp == SP) { 379 mov(tmp, SP); 380 str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); 381 } else { 382 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 383 } 384 #else 385 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 386 #endif 387 388 return pc_offset; // for oopmaps 389 } 390 391 void MacroAssembler::reset_last_Java_frame(Register tmp) { 392 const Register Rzero = zero_register(tmp); 393 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); 394 if (_fp_saved) { 395 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); 396 } 397 if (_pc_saved) { 398 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); 399 } 400 } 401 402 403 // Implementation of call_VM versions 404 405 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { 406 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 407 assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); 408 409 #ifndef AARCH64 410 // Safer to save R9 here since callers may have been written 411 // assuming R9 survives. This is suboptimal but is not worth 412 // optimizing for the few platforms where R9 is scratched. 413 push(RegisterSet(R4) | R9ifScratched); 414 mov(R4, SP); 415 bic(SP, SP, StackAlignmentInBytes - 1); 416 #endif // AARCH64 417 call(entry_point, relocInfo::runtime_call_type); 418 #ifndef AARCH64 419 mov(SP, R4); 420 pop(RegisterSet(R4) | R9ifScratched); 421 #endif // AARCH64 422 } 423 424 425 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 426 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 427 assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); 428 429 const Register tmp = Rtemp; 430 assert_different_registers(oop_result, tmp); 431 432 set_last_Java_frame(SP, FP, true, tmp); 433 434 #ifdef ASSERT 435 AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); 436 #endif // ASSERT 437 438 #ifndef AARCH64 439 #if R9_IS_SCRATCHED 440 // Safer to save R9 here since callers may have been written 441 // assuming R9 survives. This is suboptimal but is not worth 442 // optimizing for the few platforms where R9 is scratched. 443 444 // Note: cannot save R9 above the saved SP (some calls expect for 445 // instance the Java stack top at the saved SP) 446 // => once saved (with set_last_Java_frame), decrease SP before rounding to 447 // ensure the slot at SP will be free for R9). 448 sub(SP, SP, 4); 449 bic(SP, SP, StackAlignmentInBytes - 1); 450 str(R9, Address(SP, 0)); 451 #else 452 bic(SP, SP, StackAlignmentInBytes - 1); 453 #endif // R9_IS_SCRATCHED 454 #endif 455 456 mov(R0, Rthread); 457 call(entry_point, relocInfo::runtime_call_type); 458 459 #ifndef AARCH64 460 #if R9_IS_SCRATCHED 461 ldr(R9, Address(SP, 0)); 462 #endif 463 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); 464 #endif 465 466 reset_last_Java_frame(tmp); 467 468 // C++ interp handles this in the interpreter 469 check_and_handle_popframe(); 470 check_and_handle_earlyret(); 471 472 if (check_exceptions) { 473 // check for pending exceptions 474 ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); 475 #ifdef AARCH64 476 Label L; 477 cbz(tmp, L); 478 mov_pc_to(Rexception_pc); 479 b(StubRoutines::forward_exception_entry()); 480 bind(L); 481 #else 482 cmp(tmp, 0); 483 mov(Rexception_pc, PC, ne); 484 b(StubRoutines::forward_exception_entry(), ne); 485 #endif // AARCH64 486 } 487 488 // get oop result if there is one and reset the value in the thread 489 if (oop_result->is_valid()) { 490 get_vm_result(oop_result, tmp); 491 } 492 } 493 494 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 495 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 496 } 497 498 499 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 500 assert (arg_1 == R1, "fixed register for arg_1"); 501 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 502 } 503 504 505 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 506 assert (arg_1 == R1, "fixed register for arg_1"); 507 assert (arg_2 == R2, "fixed register for arg_2"); 508 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 509 } 510 511 512 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 513 assert (arg_1 == R1, "fixed register for arg_1"); 514 assert (arg_2 == R2, "fixed register for arg_2"); 515 assert (arg_3 == R3, "fixed register for arg_3"); 516 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 517 } 518 519 520 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { 521 // Not used on ARM 522 Unimplemented(); 523 } 524 525 526 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 527 // Not used on ARM 528 Unimplemented(); 529 } 530 531 532 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 533 // Not used on ARM 534 Unimplemented(); 535 } 536 537 538 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 539 // Not used on ARM 540 Unimplemented(); 541 } 542 543 // Raw call, without saving/restoring registers, exception handling, etc. 544 // Mainly used from various stubs. 545 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { 546 const Register tmp = Rtemp; // Rtemp free since scratched by call 547 set_last_Java_frame(SP, FP, true, tmp); 548 #if R9_IS_SCRATCHED 549 if (save_R9_if_scratched) { 550 // Note: Saving also R10 for alignment. 551 push(RegisterSet(R9, R10)); 552 } 553 #endif 554 mov(R0, Rthread); 555 call(entry_point, relocInfo::runtime_call_type); 556 #if R9_IS_SCRATCHED 557 if (save_R9_if_scratched) { 558 pop(RegisterSet(R9, R10)); 559 } 560 #endif 561 reset_last_Java_frame(tmp); 562 } 563 564 void MacroAssembler::call_VM_leaf(address entry_point) { 565 call_VM_leaf_helper(entry_point, 0); 566 } 567 568 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 569 assert (arg_1 == R0, "fixed register for arg_1"); 570 call_VM_leaf_helper(entry_point, 1); 571 } 572 573 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 574 assert (arg_1 == R0, "fixed register for arg_1"); 575 assert (arg_2 == R1, "fixed register for arg_2"); 576 call_VM_leaf_helper(entry_point, 2); 577 } 578 579 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 580 assert (arg_1 == R0, "fixed register for arg_1"); 581 assert (arg_2 == R1, "fixed register for arg_2"); 582 assert (arg_3 == R2, "fixed register for arg_3"); 583 call_VM_leaf_helper(entry_point, 3); 584 } 585 586 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { 587 assert (arg_1 == R0, "fixed register for arg_1"); 588 assert (arg_2 == R1, "fixed register for arg_2"); 589 assert (arg_3 == R2, "fixed register for arg_3"); 590 assert (arg_4 == R3, "fixed register for arg_4"); 591 call_VM_leaf_helper(entry_point, 4); 592 } 593 594 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { 595 assert_different_registers(oop_result, tmp); 596 ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); 597 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); 598 verify_oop(oop_result); 599 } 600 601 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { 602 assert_different_registers(metadata_result, tmp); 603 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); 604 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); 605 } 606 607 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { 608 if (arg2.is_register()) { 609 add(dst, arg1, arg2.as_register()); 610 } else { 611 add(dst, arg1, arg2.as_constant()); 612 } 613 } 614 615 void MacroAssembler::add_slow(Register rd, Register rn, int c) { 616 #ifdef AARCH64 617 if (c == 0) { 618 if (rd != rn) { 619 mov(rd, rn); 620 } 621 return; 622 } 623 if (c < 0) { 624 sub_slow(rd, rn, -c); 625 return; 626 } 627 if (c > right_n_bits(24)) { 628 guarantee(rd != rn, "no large add_slow with only one register"); 629 mov_slow(rd, c); 630 add(rd, rn, rd); 631 } else { 632 int lo = c & right_n_bits(12); 633 int hi = (c >> 12) & right_n_bits(12); 634 if (lo != 0) { 635 add(rd, rn, lo, lsl0); 636 } 637 if (hi != 0) { 638 add(rd, (lo == 0) ? rn : rd, hi, lsl12); 639 } 640 } 641 #else 642 // This function is used in compiler for handling large frame offsets 643 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 644 return sub(rd, rn, (-c)); 645 } 646 int low = c & 0x3fc; 647 if (low != 0) { 648 add(rd, rn, low); 649 rn = rd; 650 } 651 if (c & ~0x3fc) { 652 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); 653 add(rd, rn, c & ~0x3fc); 654 } else if (rd != rn) { 655 assert(c == 0, ""); 656 mov(rd, rn); // need to generate at least one move! 657 } 658 #endif // AARCH64 659 } 660 661 void MacroAssembler::sub_slow(Register rd, Register rn, int c) { 662 #ifdef AARCH64 663 if (c <= 0) { 664 add_slow(rd, rn, -c); 665 return; 666 } 667 if (c > right_n_bits(24)) { 668 guarantee(rd != rn, "no large sub_slow with only one register"); 669 mov_slow(rd, c); 670 sub(rd, rn, rd); 671 } else { 672 int lo = c & right_n_bits(12); 673 int hi = (c >> 12) & right_n_bits(12); 674 if (lo != 0) { 675 sub(rd, rn, lo, lsl0); 676 } 677 if (hi != 0) { 678 sub(rd, (lo == 0) ? rn : rd, hi, lsl12); 679 } 680 } 681 #else 682 // This function is used in compiler for handling large frame offsets 683 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 684 return add(rd, rn, (-c)); 685 } 686 int low = c & 0x3fc; 687 if (low != 0) { 688 sub(rd, rn, low); 689 rn = rd; 690 } 691 if (c & ~0x3fc) { 692 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); 693 sub(rd, rn, c & ~0x3fc); 694 } else if (rd != rn) { 695 assert(c == 0, ""); 696 mov(rd, rn); // need to generate at least one move! 697 } 698 #endif // AARCH64 699 } 700 701 void MacroAssembler::mov_slow(Register rd, address addr) { 702 // do *not* call the non relocated mov_related_address 703 mov_slow(rd, (intptr_t)addr); 704 } 705 706 void MacroAssembler::mov_slow(Register rd, const char *str) { 707 mov_slow(rd, (intptr_t)str); 708 } 709 710 #ifdef AARCH64 711 712 // Common code for mov_slow and instr_count_for_mov_slow. 713 // Returns number of instructions of mov_slow pattern, 714 // generating it if non-null MacroAssembler is given. 715 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { 716 // This code pattern is matched in NativeIntruction::is_mov_slow. 717 // Update it at modifications. 718 719 const intx mask = right_n_bits(16); 720 // 1 movz instruction 721 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 722 if ((c & ~(mask << base_shift)) == 0) { 723 if (masm != NULL) { 724 masm->movz(rd, ((uintx)c) >> base_shift, base_shift); 725 } 726 return 1; 727 } 728 } 729 // 1 movn instruction 730 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 731 if (((~c) & ~(mask << base_shift)) == 0) { 732 if (masm != NULL) { 733 masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); 734 } 735 return 1; 736 } 737 } 738 // 1 orr instruction 739 { 740 LogicalImmediate imm(c, false); 741 if (imm.is_encoded()) { 742 if (masm != NULL) { 743 masm->orr(rd, ZR, imm); 744 } 745 return 1; 746 } 747 } 748 // 1 movz/movn + up to 3 movk instructions 749 int zeroes = 0; 750 int ones = 0; 751 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 752 int part = (c >> base_shift) & mask; 753 if (part == 0) { 754 ++zeroes; 755 } else if (part == mask) { 756 ++ones; 757 } 758 } 759 int def_bits = 0; 760 if (ones > zeroes) { 761 def_bits = mask; 762 } 763 int inst_count = 0; 764 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 765 int part = (c >> base_shift) & mask; 766 if (part != def_bits) { 767 if (masm != NULL) { 768 if (inst_count > 0) { 769 masm->movk(rd, part, base_shift); 770 } else { 771 if (def_bits == 0) { 772 masm->movz(rd, part, base_shift); 773 } else { 774 masm->movn(rd, ~part & mask, base_shift); 775 } 776 } 777 } 778 inst_count++; 779 } 780 } 781 assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); 782 return inst_count; 783 } 784 785 void MacroAssembler::mov_slow(Register rd, intptr_t c) { 786 #ifdef ASSERT 787 int off = offset(); 788 #endif 789 (void) mov_slow_helper(rd, c, this); 790 assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); 791 } 792 793 // Counts instructions generated by mov_slow(rd, c). 794 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { 795 return mov_slow_helper(noreg, c, NULL); 796 } 797 798 int MacroAssembler::instr_count_for_mov_slow(address c) { 799 return mov_slow_helper(noreg, (intptr_t)c, NULL); 800 } 801 802 #else 803 804 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { 805 if (AsmOperand::is_rotated_imm(c)) { 806 mov(rd, c, cond); 807 } else if (AsmOperand::is_rotated_imm(~c)) { 808 mvn(rd, ~c, cond); 809 } else if (VM_Version::supports_movw()) { 810 movw(rd, c & 0xffff, cond); 811 if ((unsigned int)c >> 16) { 812 movt(rd, (unsigned int)c >> 16, cond); 813 } 814 } else { 815 // Find first non-zero bit 816 int shift = 0; 817 while ((c & (3 << shift)) == 0) { 818 shift += 2; 819 } 820 // Put the least significant part of the constant 821 int mask = 0xff << shift; 822 mov(rd, c & mask, cond); 823 // Add up to 3 other parts of the constant; 824 // each of them can be represented as rotated_imm 825 if (c & (mask << 8)) { 826 orr(rd, rd, c & (mask << 8), cond); 827 } 828 if (c & (mask << 16)) { 829 orr(rd, rd, c & (mask << 16), cond); 830 } 831 if (c & (mask << 24)) { 832 orr(rd, rd, c & (mask << 24), cond); 833 } 834 } 835 } 836 837 #endif // AARCH64 838 839 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, 840 #ifdef AARCH64 841 bool patchable 842 #else 843 AsmCondition cond 844 #endif 845 ) { 846 847 if (o == NULL) { 848 #ifdef AARCH64 849 if (patchable) { 850 nop(); 851 } 852 mov(rd, ZR); 853 #else 854 mov(rd, 0, cond); 855 #endif 856 return; 857 } 858 859 if (oop_index == 0) { 860 oop_index = oop_recorder()->allocate_oop_index(o); 861 } 862 relocate(oop_Relocation::spec(oop_index)); 863 864 #ifdef AARCH64 865 if (patchable) { 866 nop(); 867 } 868 ldr(rd, pc()); 869 #else 870 if (VM_Version::supports_movw()) { 871 movw(rd, 0, cond); 872 movt(rd, 0, cond); 873 } else { 874 ldr(rd, Address(PC), cond); 875 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). 876 nop(); 877 } 878 #endif 879 } 880 881 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { 882 if (o == NULL) { 883 #ifdef AARCH64 884 if (patchable) { 885 nop(); 886 } 887 #endif 888 mov(rd, 0); 889 return; 890 } 891 892 if (metadata_index == 0) { 893 metadata_index = oop_recorder()->allocate_metadata_index(o); 894 } 895 relocate(metadata_Relocation::spec(metadata_index)); 896 897 #ifdef AARCH64 898 if (patchable) { 899 nop(); 900 } 901 #ifdef COMPILER2 902 if (!patchable && VM_Version::prefer_moves_over_load_literal()) { 903 mov_slow(rd, (address)o); 904 return; 905 } 906 #endif 907 ldr(rd, pc()); 908 #else 909 if (VM_Version::supports_movw()) { 910 movw(rd, ((int)o) & 0xffff); 911 movt(rd, (unsigned int)o >> 16); 912 } else { 913 ldr(rd, Address(PC)); 914 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). 915 nop(); 916 } 917 #endif // AARCH64 918 } 919 920 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { 921 Label skip_constant; 922 union { 923 jfloat f; 924 jint i; 925 } accessor; 926 accessor.f = c; 927 928 #ifdef AARCH64 929 // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow 930 Label L; 931 ldr_s(fd, target(L)); 932 b(skip_constant); 933 bind(L); 934 emit_int32(accessor.i); 935 bind(skip_constant); 936 #else 937 flds(fd, Address(PC), cond); 938 b(skip_constant); 939 emit_int32(accessor.i); 940 bind(skip_constant); 941 #endif // AARCH64 942 } 943 944 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { 945 Label skip_constant; 946 union { 947 jdouble d; 948 jint i[2]; 949 } accessor; 950 accessor.d = c; 951 952 #ifdef AARCH64 953 // TODO-AARCH64 - try to optimize loading of double constants with fmov 954 Label L; 955 ldr_d(fd, target(L)); 956 b(skip_constant); 957 align(wordSize); 958 bind(L); 959 emit_int32(accessor.i[0]); 960 emit_int32(accessor.i[1]); 961 bind(skip_constant); 962 #else 963 fldd(fd, Address(PC), cond); 964 b(skip_constant); 965 emit_int32(accessor.i[0]); 966 emit_int32(accessor.i[1]); 967 bind(skip_constant); 968 #endif // AARCH64 969 } 970 971 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { 972 intptr_t addr = (intptr_t) address_of_global; 973 #ifdef AARCH64 974 assert((addr & 0x3) == 0, "address should be aligned"); 975 976 // FIXME: TODO 977 if (false && page_reachable_from_cache(address_of_global)) { 978 assert(false,"TODO: relocate"); 979 //relocate(); 980 adrp(reg, address_of_global); 981 ldrsw(reg, Address(reg, addr & 0xfff)); 982 } else { 983 mov_slow(reg, addr & ~0x3fff); 984 ldrsw(reg, Address(reg, addr & 0x3fff)); 985 } 986 #else 987 mov_slow(reg, addr & ~0xfff); 988 ldr(reg, Address(reg, addr & 0xfff)); 989 #endif 990 } 991 992 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { 993 #ifdef AARCH64 994 intptr_t addr = (intptr_t) address_of_global; 995 assert ((addr & 0x7) == 0, "address should be aligned"); 996 mov_slow(reg, addr & ~0x7fff); 997 ldr(reg, Address(reg, addr & 0x7fff)); 998 #else 999 ldr_global_s32(reg, address_of_global); 1000 #endif 1001 } 1002 1003 void MacroAssembler::ldrb_global(Register reg, address address_of_global) { 1004 intptr_t addr = (intptr_t) address_of_global; 1005 mov_slow(reg, addr & ~0xfff); 1006 ldrb(reg, Address(reg, addr & 0xfff)); 1007 } 1008 1009 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { 1010 #ifdef AARCH64 1011 switch (bits) { 1012 case 8: uxtb(rd, rn); break; 1013 case 16: uxth(rd, rn); break; 1014 case 32: mov_w(rd, rn); break; 1015 default: ShouldNotReachHere(); 1016 } 1017 #else 1018 if (bits <= 8) { 1019 andr(rd, rn, (1 << bits) - 1); 1020 } else if (bits >= 24) { 1021 bic(rd, rn, -1 << bits); 1022 } else { 1023 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1024 mov(rd, AsmOperand(rd, lsr, 32 - bits)); 1025 } 1026 #endif 1027 } 1028 1029 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { 1030 #ifdef AARCH64 1031 switch (bits) { 1032 case 8: sxtb(rd, rn); break; 1033 case 16: sxth(rd, rn); break; 1034 case 32: sxtw(rd, rn); break; 1035 default: ShouldNotReachHere(); 1036 } 1037 #else 1038 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1039 mov(rd, AsmOperand(rd, asr, 32 - bits)); 1040 #endif 1041 } 1042 1043 #ifndef AARCH64 1044 1045 void MacroAssembler::long_move(Register rd_lo, Register rd_hi, 1046 Register rn_lo, Register rn_hi, 1047 AsmCondition cond) { 1048 if (rd_lo != rn_hi) { 1049 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1050 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1051 } else if (rd_hi != rn_lo) { 1052 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1053 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1054 } else { 1055 eor(rd_lo, rd_hi, rd_lo, cond); 1056 eor(rd_hi, rd_lo, rd_hi, cond); 1057 eor(rd_lo, rd_hi, rd_lo, cond); 1058 } 1059 } 1060 1061 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1062 Register rn_lo, Register rn_hi, 1063 AsmShift shift, Register count) { 1064 Register tmp; 1065 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { 1066 tmp = rd_lo; 1067 } else { 1068 tmp = rd_hi; 1069 } 1070 assert_different_registers(tmp, count, rn_lo, rn_hi); 1071 1072 subs(tmp, count, 32); 1073 if (shift == lsl) { 1074 assert_different_registers(rd_hi, rn_lo); 1075 assert_different_registers(count, rd_hi); 1076 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); 1077 rsb(tmp, count, 32, mi); 1078 if (rd_hi == rn_hi) { 1079 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1080 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1081 } else { 1082 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1083 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1084 } 1085 mov(rd_lo, AsmOperand(rn_lo, shift, count)); 1086 } else { 1087 assert_different_registers(rd_lo, rn_hi); 1088 assert_different_registers(rd_lo, count); 1089 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); 1090 rsb(tmp, count, 32, mi); 1091 if (rd_lo == rn_lo) { 1092 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1093 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1094 } else { 1095 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1096 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1097 } 1098 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1099 } 1100 } 1101 1102 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1103 Register rn_lo, Register rn_hi, 1104 AsmShift shift, int count) { 1105 assert(count != 0 && (count & ~63) == 0, "must be"); 1106 1107 if (shift == lsl) { 1108 assert_different_registers(rd_hi, rn_lo); 1109 if (count >= 32) { 1110 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); 1111 mov(rd_lo, 0); 1112 } else { 1113 mov(rd_hi, AsmOperand(rn_hi, lsl, count)); 1114 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); 1115 mov(rd_lo, AsmOperand(rn_lo, lsl, count)); 1116 } 1117 } else { 1118 assert_different_registers(rd_lo, rn_hi); 1119 if (count >= 32) { 1120 if (count == 32) { 1121 mov(rd_lo, rn_hi); 1122 } else { 1123 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); 1124 } 1125 if (shift == asr) { 1126 mov(rd_hi, AsmOperand(rn_hi, asr, 0)); 1127 } else { 1128 mov(rd_hi, 0); 1129 } 1130 } else { 1131 mov(rd_lo, AsmOperand(rn_lo, lsr, count)); 1132 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); 1133 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1134 } 1135 } 1136 } 1137 #endif // !AARCH64 1138 1139 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 1140 // This code pattern is matched in NativeIntruction::skip_verify_oop. 1141 // Update it at modifications. 1142 if (!VerifyOops) return; 1143 1144 char buffer[64]; 1145 #ifdef COMPILER1 1146 if (CommentedAssembly) { 1147 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); 1148 block_comment(buffer); 1149 } 1150 #endif 1151 const char* msg_buffer = NULL; 1152 { 1153 ResourceMark rm; 1154 stringStream ss; 1155 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); 1156 msg_buffer = code_string(ss.as_string()); 1157 } 1158 1159 save_all_registers(); 1160 1161 if (reg != R2) { 1162 mov(R2, reg); // oop to verify 1163 } 1164 mov(R1, SP); // register save area 1165 1166 Label done; 1167 InlinedString Lmsg(msg_buffer); 1168 ldr_literal(R0, Lmsg); // message 1169 1170 // call indirectly to solve generation ordering problem 1171 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1172 call(Rtemp); 1173 1174 restore_all_registers(); 1175 1176 b(done); 1177 #ifdef COMPILER2 1178 int off = offset(); 1179 #endif 1180 bind_literal(Lmsg); 1181 #ifdef COMPILER2 1182 if (offset() - off == 1 * wordSize) { 1183 // no padding, so insert nop for worst-case sizing 1184 nop(); 1185 } 1186 #endif 1187 bind(done); 1188 } 1189 1190 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 1191 if (!VerifyOops) return; 1192 1193 const char* msg_buffer = NULL; 1194 { 1195 ResourceMark rm; 1196 stringStream ss; 1197 if ((addr.base() == SP) && (addr.index()==noreg)) { 1198 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); 1199 } else { 1200 ss.print("verify_oop_addr: %s", s); 1201 } 1202 ss.print(" (%s:%d)", file, line); 1203 msg_buffer = code_string(ss.as_string()); 1204 } 1205 1206 int push_size = save_all_registers(); 1207 1208 if (addr.base() == SP) { 1209 // computes an addr that takes into account the push 1210 if (addr.index() != noreg) { 1211 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index 1212 add(new_base, SP, push_size); 1213 addr = addr.rebase(new_base); 1214 } else { 1215 addr = addr.plus_disp(push_size); 1216 } 1217 } 1218 1219 ldr(R2, addr); // oop to verify 1220 mov(R1, SP); // register save area 1221 1222 Label done; 1223 InlinedString Lmsg(msg_buffer); 1224 ldr_literal(R0, Lmsg); // message 1225 1226 // call indirectly to solve generation ordering problem 1227 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1228 call(Rtemp); 1229 1230 restore_all_registers(); 1231 1232 b(done); 1233 bind_literal(Lmsg); 1234 bind(done); 1235 } 1236 1237 void MacroAssembler::null_check(Register reg, Register tmp, int offset) { 1238 if (needs_explicit_null_check(offset)) { 1239 #ifdef AARCH64 1240 ldr(ZR, Address(reg)); 1241 #else 1242 assert_different_registers(reg, tmp); 1243 if (tmp == noreg) { 1244 tmp = Rtemp; 1245 assert((! Thread::current()->is_Compiler_thread()) || 1246 (! (ciEnv::current()->task() == NULL)) || 1247 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), 1248 "Rtemp not available in C2"); // explicit tmp register required 1249 // XXX: could we mark the code buffer as not compatible with C2 ? 1250 } 1251 ldr(tmp, Address(reg)); 1252 #endif 1253 } 1254 } 1255 1256 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1257 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, 1258 RegisterOrConstant size_expression, Label& slow_case) { 1259 if (!Universe::heap()->supports_inline_contig_alloc()) { 1260 b(slow_case); 1261 return; 1262 } 1263 1264 CollectedHeap* ch = Universe::heap(); 1265 1266 const Register top_addr = tmp1; 1267 const Register heap_end = tmp2; 1268 1269 if (size_expression.is_register()) { 1270 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); 1271 } else { 1272 assert_different_registers(obj, obj_end, top_addr, heap_end); 1273 } 1274 1275 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance 1276 if (load_const) { 1277 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); 1278 } else { 1279 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); 1280 } 1281 // Calculate new heap_top by adding the size of the object 1282 Label retry; 1283 bind(retry); 1284 1285 #ifdef AARCH64 1286 ldxr(obj, top_addr); 1287 #else 1288 ldr(obj, Address(top_addr)); 1289 #endif // AARCH64 1290 1291 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); 1292 add_rc(obj_end, obj, size_expression); 1293 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. 1294 cmp(obj_end, obj); 1295 b(slow_case, lo); 1296 // Update heap_top if allocation succeeded 1297 cmp(obj_end, heap_end); 1298 b(slow_case, hi); 1299 1300 #ifdef AARCH64 1301 stxr(heap_end/*scratched*/, obj_end, top_addr); 1302 cbnz_w(heap_end, retry); 1303 #else 1304 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); 1305 b(retry, ne); 1306 #endif // AARCH64 1307 } 1308 1309 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1310 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, 1311 RegisterOrConstant size_expression, Label& slow_case) { 1312 const Register tlab_end = tmp1; 1313 assert_different_registers(obj, obj_end, tlab_end); 1314 1315 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); 1316 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); 1317 add_rc(obj_end, obj, size_expression); 1318 cmp(obj_end, tlab_end); 1319 b(slow_case, hi); 1320 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); 1321 } 1322 1323 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. 1324 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { 1325 Label loop; 1326 const Register ptr = start; 1327 1328 #ifdef AARCH64 1329 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x 1330 const Register size = tmp; 1331 Label remaining, done; 1332 1333 sub(size, end, start); 1334 1335 #ifdef ASSERT 1336 { Label L; 1337 tst(size, wordSize - 1); 1338 b(L, eq); 1339 stop("size is not a multiple of wordSize"); 1340 bind(L); 1341 } 1342 #endif // ASSERT 1343 1344 subs(size, size, wordSize); 1345 b(remaining, le); 1346 1347 // Zero by 2 words per iteration. 1348 bind(loop); 1349 subs(size, size, 2*wordSize); 1350 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); 1351 b(loop, gt); 1352 1353 bind(remaining); 1354 b(done, ne); 1355 str(ZR, Address(ptr)); 1356 bind(done); 1357 #else 1358 mov(tmp, 0); 1359 bind(loop); 1360 cmp(ptr, end); 1361 str(tmp, Address(ptr, wordSize, post_indexed), lo); 1362 b(loop, lo); 1363 #endif // AARCH64 1364 } 1365 1366 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { 1367 #ifdef AARCH64 1368 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1369 add_rc(tmp, tmp, size_in_bytes); 1370 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1371 #else 1372 // Bump total bytes allocated by this thread 1373 Label done; 1374 1375 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1376 adds(tmp, tmp, size_in_bytes); 1377 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); 1378 b(done, cc); 1379 1380 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) 1381 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by 1382 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. 1383 Register low, high; 1384 // Select ether R0/R1 or R2/R3 1385 1386 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { 1387 low = R2; 1388 high = R3; 1389 } else { 1390 low = R0; 1391 high = R1; 1392 } 1393 push(RegisterSet(low, high)); 1394 1395 ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1396 adds(low, low, size_in_bytes); 1397 adc(high, high, 0); 1398 strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1399 1400 pop(RegisterSet(low, high)); 1401 1402 bind(done); 1403 #endif // AARCH64 1404 } 1405 1406 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { 1407 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM 1408 if (UseStackBanging) { 1409 const int page_size = os::vm_page_size(); 1410 1411 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); 1412 strb(R0, Address(tmp)); 1413 #ifdef AARCH64 1414 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { 1415 sub(tmp, tmp, page_size); 1416 strb(R0, Address(tmp)); 1417 } 1418 #else 1419 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { 1420 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1421 } 1422 #endif // AARCH64 1423 } 1424 } 1425 1426 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { 1427 if (UseStackBanging) { 1428 Label loop; 1429 1430 mov(tmp, SP); 1431 add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); 1432 #ifdef AARCH64 1433 sub(tmp, tmp, Rsize); 1434 bind(loop); 1435 subs(Rsize, Rsize, os::vm_page_size()); 1436 strb(ZR, Address(tmp, Rsize)); 1437 #else 1438 bind(loop); 1439 subs(Rsize, Rsize, 0xff0); 1440 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1441 #endif // AARCH64 1442 b(loop, hi); 1443 } 1444 } 1445 1446 void MacroAssembler::stop(const char* msg) { 1447 // This code pattern is matched in NativeIntruction::is_stop. 1448 // Update it at modifications. 1449 #ifdef COMPILER1 1450 if (CommentedAssembly) { 1451 block_comment("stop"); 1452 } 1453 #endif 1454 1455 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); 1456 InlinedString Lmsg(msg); 1457 1458 // save all registers for further inspection 1459 save_all_registers(); 1460 1461 ldr_literal(R0, Lmsg); // message 1462 mov(R1, SP); // register save area 1463 1464 #ifdef AARCH64 1465 ldr_literal(Rtemp, Ldebug); 1466 br(Rtemp); 1467 #else 1468 ldr_literal(PC, Ldebug); // call MacroAssembler::debug 1469 #endif // AARCH64 1470 1471 #if defined(COMPILER2) && defined(AARCH64) 1472 int off = offset(); 1473 #endif 1474 bind_literal(Lmsg); 1475 bind_literal(Ldebug); 1476 #if defined(COMPILER2) && defined(AARCH64) 1477 if (offset() - off == 2 * wordSize) { 1478 // no padding, so insert nop for worst-case sizing 1479 nop(); 1480 } 1481 #endif 1482 } 1483 1484 void MacroAssembler::warn(const char* msg) { 1485 #ifdef COMPILER1 1486 if (CommentedAssembly) { 1487 block_comment("warn"); 1488 } 1489 #endif 1490 1491 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); 1492 InlinedString Lmsg(msg); 1493 Label done; 1494 1495 int push_size = save_caller_save_registers(); 1496 1497 #ifdef AARCH64 1498 // TODO-AARCH64 - get rid of extra debug parameters 1499 mov(R1, LR); 1500 mov(R2, FP); 1501 add(R3, SP, push_size); 1502 #endif 1503 1504 ldr_literal(R0, Lmsg); // message 1505 ldr_literal(LR, Lwarn); // call warning 1506 1507 call(LR); 1508 1509 restore_caller_save_registers(); 1510 1511 b(done); 1512 bind_literal(Lmsg); 1513 bind_literal(Lwarn); 1514 bind(done); 1515 } 1516 1517 1518 int MacroAssembler::save_all_registers() { 1519 // This code pattern is matched in NativeIntruction::is_save_all_registers. 1520 // Update it at modifications. 1521 #ifdef AARCH64 1522 const Register tmp = Rtemp; 1523 raw_push(R30, ZR); 1524 for (int i = 28; i >= 0; i -= 2) { 1525 raw_push(as_Register(i), as_Register(i+1)); 1526 } 1527 mov_pc_to(tmp); 1528 str(tmp, Address(SP, 31*wordSize)); 1529 ldr(tmp, Address(SP, tmp->encoding()*wordSize)); 1530 return 32*wordSize; 1531 #else 1532 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); 1533 return 15*wordSize; 1534 #endif // AARCH64 1535 } 1536 1537 void MacroAssembler::restore_all_registers() { 1538 #ifdef AARCH64 1539 for (int i = 0; i <= 28; i += 2) { 1540 raw_pop(as_Register(i), as_Register(i+1)); 1541 } 1542 raw_pop(R30, ZR); 1543 #else 1544 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers 1545 add(SP, SP, wordSize); // discard saved PC 1546 #endif // AARCH64 1547 } 1548 1549 int MacroAssembler::save_caller_save_registers() { 1550 #ifdef AARCH64 1551 for (int i = 0; i <= 16; i += 2) { 1552 raw_push(as_Register(i), as_Register(i+1)); 1553 } 1554 raw_push(R18, LR); 1555 return 20*wordSize; 1556 #else 1557 #if R9_IS_SCRATCHED 1558 // Save also R10 to preserve alignment 1559 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1560 return 8*wordSize; 1561 #else 1562 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1563 return 6*wordSize; 1564 #endif 1565 #endif // AARCH64 1566 } 1567 1568 void MacroAssembler::restore_caller_save_registers() { 1569 #ifdef AARCH64 1570 raw_pop(R18, LR); 1571 for (int i = 16; i >= 0; i -= 2) { 1572 raw_pop(as_Register(i), as_Register(i+1)); 1573 } 1574 #else 1575 #if R9_IS_SCRATCHED 1576 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1577 #else 1578 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1579 #endif 1580 #endif // AARCH64 1581 } 1582 1583 void MacroAssembler::debug(const char* msg, const intx* registers) { 1584 // In order to get locks to work, we need to fake a in_VM state 1585 JavaThread* thread = JavaThread::current(); 1586 thread->set_thread_state(_thread_in_vm); 1587 1588 if (ShowMessageBoxOnError) { 1589 ttyLocker ttyl; 1590 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1591 BytecodeCounter::print(); 1592 } 1593 if (os::message_box(msg, "Execution stopped, print registers?")) { 1594 #ifdef AARCH64 1595 // saved registers: R0-R30, PC 1596 const int nregs = 32; 1597 #else 1598 // saved registers: R0-R12, LR, PC 1599 const int nregs = 15; 1600 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; 1601 #endif // AARCH64 1602 1603 for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { 1604 tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); 1605 } 1606 1607 #ifdef AARCH64 1608 tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); 1609 #endif // AARCH64 1610 1611 // derive original SP value from the address of register save area 1612 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); 1613 } 1614 BREAKPOINT; 1615 } else { 1616 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1617 } 1618 assert(false, "DEBUG MESSAGE: %s", msg); 1619 fatal("%s", msg); // returning from MacroAssembler::debug is not supported 1620 } 1621 1622 void MacroAssembler::unimplemented(const char* what) { 1623 const char* buf = NULL; 1624 { 1625 ResourceMark rm; 1626 stringStream ss; 1627 ss.print("unimplemented: %s", what); 1628 buf = code_string(ss.as_string()); 1629 } 1630 stop(buf); 1631 } 1632 1633 1634 // Implementation of FixedSizeCodeBlock 1635 1636 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : 1637 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { 1638 } 1639 1640 FixedSizeCodeBlock::~FixedSizeCodeBlock() { 1641 if (_enabled) { 1642 address curr_pc = _masm->pc(); 1643 1644 assert(_start < curr_pc, "invalid current pc"); 1645 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); 1646 1647 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; 1648 for (int i = 0; i < nops_count; i++) { 1649 _masm->nop(); 1650 } 1651 } 1652 } 1653 1654 #ifdef AARCH64 1655 1656 // Serializes memory. 1657 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM 1658 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { 1659 if (!os::is_MP()) return; 1660 1661 // TODO-AARCH64 investigate dsb vs dmb effects 1662 if (order_constraint == StoreStore) { 1663 dmb(DMB_st); 1664 } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { 1665 dmb(DMB_ld); 1666 } else { 1667 dmb(DMB_all); 1668 } 1669 } 1670 1671 #else 1672 1673 // Serializes memory. Potentially blows flags and reg. 1674 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) 1675 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. 1676 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. 1677 void MacroAssembler::membar(Membar_mask_bits order_constraint, 1678 Register tmp, 1679 bool preserve_flags, 1680 Register load_tgt) { 1681 if (!os::is_MP()) return; 1682 1683 if (order_constraint == StoreStore) { 1684 dmb(DMB_st, tmp); 1685 } else if ((order_constraint & StoreLoad) || 1686 (order_constraint & LoadLoad) || 1687 (order_constraint & StoreStore) || 1688 (load_tgt == noreg) || 1689 preserve_flags) { 1690 dmb(DMB_all, tmp); 1691 } else { 1692 // LoadStore: speculative stores reordeing is prohibited 1693 1694 // By providing an ordered load target register, we avoid an extra memory load reference 1695 Label not_taken; 1696 bind(not_taken); 1697 cmp(load_tgt, load_tgt); 1698 b(not_taken, ne); 1699 } 1700 } 1701 1702 #endif // AARCH64 1703 1704 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" 1705 // on failure, so fall-through can only mean success. 1706 // "one_shot" controls whether we loop and retry to mitigate spurious failures. 1707 // This is only needed for C2, which for some reason does not rety, 1708 // while C1/interpreter does. 1709 // TODO: measure if it makes a difference 1710 1711 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, 1712 Register base, Register tmp, Label &slow_case, 1713 bool allow_fallthrough_on_failure, bool one_shot) 1714 { 1715 1716 bool fallthrough_is_success = false; 1717 1718 // ARM Litmus Test example does prefetching here. 1719 // TODO: investigate if it helps performance 1720 1721 // The last store was to the displaced header, so to prevent 1722 // reordering we must issue a StoreStore or Release barrier before 1723 // the CAS store. 1724 1725 #ifdef AARCH64 1726 1727 Register Rscratch = tmp; 1728 Register Roop = base; 1729 Register mark = oldval; 1730 Register Rbox = newval; 1731 Label loop; 1732 1733 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1734 1735 // Instead of StoreStore here, we use store-release-exclusive below 1736 1737 bind(loop); 1738 1739 ldaxr(tmp, base); // acquire 1740 cmp(tmp, oldval); 1741 b(slow_case, ne); 1742 stlxr(tmp, newval, base); // release 1743 if (one_shot) { 1744 cmp_w(tmp, 0); 1745 } else { 1746 cbnz_w(tmp, loop); 1747 fallthrough_is_success = true; 1748 } 1749 1750 // MemBarAcquireLock would normally go here, but 1751 // we already do ldaxr+stlxr above, which has 1752 // Sequential Consistency 1753 1754 #else 1755 membar(MacroAssembler::StoreStore, noreg); 1756 1757 if (one_shot) { 1758 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1759 cmp(tmp, oldval); 1760 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1761 cmp(tmp, 0, eq); 1762 } else { 1763 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1764 } 1765 1766 // MemBarAcquireLock barrier 1767 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, 1768 // but that doesn't prevent a load or store from floating up between 1769 // the load and store in the CAS sequence, so play it safe and 1770 // do a full fence. 1771 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); 1772 #endif 1773 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1774 b(slow_case, ne); 1775 } 1776 } 1777 1778 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, 1779 Register base, Register tmp, Label &slow_case, 1780 bool allow_fallthrough_on_failure, bool one_shot) 1781 { 1782 1783 bool fallthrough_is_success = false; 1784 1785 assert_different_registers(oldval,newval,base,tmp); 1786 1787 #ifdef AARCH64 1788 Label loop; 1789 1790 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1791 1792 bind(loop); 1793 ldxr(tmp, base); 1794 cmp(tmp, oldval); 1795 b(slow_case, ne); 1796 // MemBarReleaseLock barrier 1797 stlxr(tmp, newval, base); 1798 if (one_shot) { 1799 cmp_w(tmp, 0); 1800 } else { 1801 cbnz_w(tmp, loop); 1802 fallthrough_is_success = true; 1803 } 1804 #else 1805 // MemBarReleaseLock barrier 1806 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, 1807 // but that doesn't prevent a load or store from floating down between 1808 // the load and store in the CAS sequence, so play it safe and 1809 // do a full fence. 1810 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); 1811 1812 if (one_shot) { 1813 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1814 cmp(tmp, oldval); 1815 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1816 cmp(tmp, 0, eq); 1817 } else { 1818 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1819 } 1820 #endif 1821 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1822 b(slow_case, ne); 1823 } 1824 1825 // ExitEnter 1826 // According to JSR-133 Cookbook, this should be StoreLoad, the same 1827 // barrier that follows volatile store. 1828 // TODO: Should be able to remove on armv8 if volatile loads 1829 // use the load-acquire instruction. 1830 membar(StoreLoad, noreg); 1831 } 1832 1833 #ifndef PRODUCT 1834 1835 // Preserves flags and all registers. 1836 // On SMP the updated value might not be visible to external observers without a sychronization barrier 1837 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { 1838 if (counter_addr != NULL) { 1839 InlinedAddress counter_addr_literal((address)counter_addr); 1840 Label done, retry; 1841 if (cond != al) { 1842 b(done, inverse(cond)); 1843 } 1844 1845 #ifdef AARCH64 1846 raw_push(R0, R1); 1847 raw_push(R2, ZR); 1848 1849 ldr_literal(R0, counter_addr_literal); 1850 1851 bind(retry); 1852 ldxr_w(R1, R0); 1853 add_w(R1, R1, 1); 1854 stxr_w(R2, R1, R0); 1855 cbnz_w(R2, retry); 1856 1857 raw_pop(R2, ZR); 1858 raw_pop(R0, R1); 1859 #else 1860 push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1861 ldr_literal(R0, counter_addr_literal); 1862 1863 mrs(CPSR, Rtemp); 1864 1865 bind(retry); 1866 ldr_s32(R1, Address(R0)); 1867 add(R2, R1, 1); 1868 atomic_cas_bool(R1, R2, R0, 0, R3); 1869 b(retry, ne); 1870 1871 msr(CPSR_fsxc, Rtemp); 1872 1873 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1874 #endif // AARCH64 1875 1876 b(done); 1877 bind_literal(counter_addr_literal); 1878 1879 bind(done); 1880 } 1881 } 1882 1883 #endif // !PRODUCT 1884 1885 1886 // Building block for CAS cases of biased locking: makes CAS and records statistics. 1887 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. 1888 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, 1889 Register tmp, Label& slow_case, int* counter_addr) { 1890 1891 cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); 1892 #ifdef ASSERT 1893 breakpoint(ne); // Fallthrough only on success 1894 #endif 1895 #ifndef PRODUCT 1896 if (counter_addr != NULL) { 1897 cond_atomic_inc32(al, counter_addr); 1898 } 1899 #endif // !PRODUCT 1900 } 1901 1902 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, 1903 bool swap_reg_contains_mark, 1904 Register tmp2, 1905 Label& done, Label& slow_case, 1906 BiasedLockingCounters* counters) { 1907 // obj_reg must be preserved (at least) if the bias locking fails 1908 // tmp_reg is a temporary register 1909 // swap_reg was used as a temporary but contained a value 1910 // that was used afterwards in some call pathes. Callers 1911 // have been fixed so that swap_reg no longer needs to be 1912 // saved. 1913 // Rtemp in no longer scratched 1914 1915 assert(UseBiasedLocking, "why call this otherwise?"); 1916 assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); 1917 guarantee(swap_reg!=tmp_reg, "invariant"); 1918 assert(tmp_reg != noreg, "must supply tmp_reg"); 1919 1920 #ifndef PRODUCT 1921 if (PrintBiasedLockingStatistics && (counters == NULL)) { 1922 counters = BiasedLocking::counters(); 1923 } 1924 #endif 1925 1926 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 1927 Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); 1928 1929 // Biased locking 1930 // See whether the lock is currently biased toward our thread and 1931 // whether the epoch is still valid 1932 // Note that the runtime guarantees sufficient alignment of JavaThread 1933 // pointers to allow age to be placed into low bits 1934 // First check to see whether biasing is even enabled for this object 1935 Label cas_label; 1936 1937 // The null check applies to the mark loading, if we need to load it. 1938 // If the mark has already been loaded in swap_reg then it has already 1939 // been performed and the offset is irrelevant. 1940 int null_check_offset = offset(); 1941 if (!swap_reg_contains_mark) { 1942 ldr(swap_reg, mark_addr); 1943 } 1944 1945 // On MP platform loads could return 'stale' values in some cases. 1946 // That is acceptable since either CAS or slow case path is taken in the worst case. 1947 1948 andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1949 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 1950 1951 b(cas_label, ne); 1952 1953 // The bias pattern is present in the object's header. Need to check 1954 // whether the bias owner and the epoch are both still current. 1955 load_klass(tmp_reg, obj_reg); 1956 ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 1957 orr(tmp_reg, tmp_reg, Rthread); 1958 eor(tmp_reg, tmp_reg, swap_reg); 1959 1960 #ifdef AARCH64 1961 ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); 1962 #else 1963 bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); 1964 #endif // AARCH64 1965 1966 #ifndef PRODUCT 1967 if (counters != NULL) { 1968 cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); 1969 } 1970 #endif // !PRODUCT 1971 1972 b(done, eq); 1973 1974 Label try_revoke_bias; 1975 Label try_rebias; 1976 1977 // At this point we know that the header has the bias pattern and 1978 // that we are not the bias owner in the current epoch. We need to 1979 // figure out more details about the state of the header in order to 1980 // know what operations can be legally performed on the object's 1981 // header. 1982 1983 // If the low three bits in the xor result aren't clear, that means 1984 // the prototype header is no longer biased and we have to revoke 1985 // the bias on this object. 1986 tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1987 b(try_revoke_bias, ne); 1988 1989 // Biasing is still enabled for this data type. See whether the 1990 // epoch of the current bias is still valid, meaning that the epoch 1991 // bits of the mark word are equal to the epoch bits of the 1992 // prototype header. (Note that the prototype header's epoch bits 1993 // only change at a safepoint.) If not, attempt to rebias the object 1994 // toward the current thread. Note that we must be absolutely sure 1995 // that the current epoch is invalid in order to do this because 1996 // otherwise the manipulations it performs on the mark word are 1997 // illegal. 1998 tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); 1999 b(try_rebias, ne); 2000 2001 // tmp_reg has the age, epoch and pattern bits cleared 2002 // The remaining (owner) bits are (Thread ^ current_owner) 2003 2004 // The epoch of the current bias is still valid but we know nothing 2005 // about the owner; it might be set or it might be clear. Try to 2006 // acquire the bias of the object using an atomic operation. If this 2007 // fails we will go in to the runtime to revoke the object's bias. 2008 // Note that we first construct the presumed unbiased header so we 2009 // don't accidentally blow away another thread's valid bias. 2010 2011 // Note that we know the owner is not ourself. Hence, success can 2012 // only happen when the owner bits is 0 2013 2014 #ifdef AARCH64 2015 // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has 2016 // cleared bit in the middle (cms bit). So it is loaded with separate instruction. 2017 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2018 andr(swap_reg, swap_reg, tmp2); 2019 #else 2020 // until the assembler can be made smarter, we need to make some assumptions about the values 2021 // so we can optimize this: 2022 assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); 2023 2024 mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); 2025 mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) 2026 #endif // AARCH64 2027 2028 orr(tmp_reg, swap_reg, Rthread); // new mark 2029 2030 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2031 (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); 2032 2033 // If the biasing toward our thread failed, this means that 2034 // another thread succeeded in biasing it toward itself and we 2035 // need to revoke that bias. The revocation will occur in the 2036 // interpreter runtime in the slow case. 2037 2038 b(done); 2039 2040 bind(try_rebias); 2041 2042 // At this point we know the epoch has expired, meaning that the 2043 // current "bias owner", if any, is actually invalid. Under these 2044 // circumstances _only_, we are allowed to use the current header's 2045 // value as the comparison value when doing the cas to acquire the 2046 // bias in the current epoch. In other words, we allow transfer of 2047 // the bias from one thread to another directly in this situation. 2048 2049 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2050 2051 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2052 2053 // owner bits 'random'. Set them to Rthread. 2054 #ifdef AARCH64 2055 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2056 andr(tmp_reg, tmp_reg, tmp2); 2057 #else 2058 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2059 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2060 #endif // AARCH64 2061 2062 orr(tmp_reg, tmp_reg, Rthread); // new mark 2063 2064 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2065 (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); 2066 2067 // If the biasing toward our thread failed, then another thread 2068 // succeeded in biasing it toward itself and we need to revoke that 2069 // bias. The revocation will occur in the runtime in the slow case. 2070 2071 b(done); 2072 2073 bind(try_revoke_bias); 2074 2075 // The prototype mark in the klass doesn't have the bias bit set any 2076 // more, indicating that objects of this data type are not supposed 2077 // to be biased any more. We are going to try to reset the mark of 2078 // this object to the prototype value and fall through to the 2079 // CAS-based locking scheme. Note that if our CAS fails, it means 2080 // that another thread raced us for the privilege of revoking the 2081 // bias of this particular object, so it's okay to continue in the 2082 // normal locking code. 2083 2084 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2085 2086 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2087 2088 // owner bits 'random'. Clear them 2089 #ifdef AARCH64 2090 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2091 andr(tmp_reg, tmp_reg, tmp2); 2092 #else 2093 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2094 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2095 #endif // AARCH64 2096 2097 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, 2098 (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); 2099 2100 // Fall through to the normal CAS-based lock, because no matter what 2101 // the result of the above CAS, some thread must have succeeded in 2102 // removing the bias bit from the object's header. 2103 2104 bind(cas_label); 2105 2106 return null_check_offset; 2107 } 2108 2109 2110 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { 2111 assert(UseBiasedLocking, "why call this otherwise?"); 2112 2113 // Check for biased locking unlock case, which is a no-op 2114 // Note: we do not have to check the thread ID for two reasons. 2115 // First, the interpreter checks for IllegalMonitorStateException at 2116 // a higher level. Second, if the bias was revoked while we held the 2117 // lock, the object could not be rebiased toward another thread, so 2118 // the bias bit would be clear. 2119 ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2120 2121 andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2122 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 2123 b(done, eq); 2124 } 2125 2126 2127 void MacroAssembler::resolve_jobject(Register value, 2128 Register tmp1, 2129 Register tmp2) { 2130 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2131 2132 assert_different_registers(value, tmp1, tmp2); 2133 Label done, not_weak; 2134 cbz(value, done); // Use NULL as-is. 2135 STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); 2136 tbz(value, 0, not_weak); // Test for jweak tag. 2137 2138 // Resolve jweak. 2139 bs->load_at(this, IN_ROOT | ON_PHANTOM_OOP_REF, T_OBJECT, 2140 value, Address(value, -JNIHandles::weak_tag_value), tmp1, tmp2, noreg); 2141 b(done); 2142 bind(not_weak); 2143 // Resolve (untagged) jobject. 2144 bs->load_at(this, IN_ROOT | IN_CONCURRENT_ROOT, T_OBJECT, 2145 value, Address(value, 0), tmp1, tmp2, noreg); 2146 verify_oop(value); 2147 bind(done); 2148 } 2149 2150 2151 ////////////////////////////////////////////////////////////////////////////////// 2152 2153 #ifdef AARCH64 2154 2155 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2156 switch (size_in_bytes) { 2157 case 8: ldr(dst, src); break; 2158 case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; 2159 case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; 2160 case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; 2161 default: ShouldNotReachHere(); 2162 } 2163 } 2164 2165 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 2166 switch (size_in_bytes) { 2167 case 8: str(src, dst); break; 2168 case 4: str_32(src, dst); break; 2169 case 2: strh(src, dst); break; 2170 case 1: strb(src, dst); break; 2171 default: ShouldNotReachHere(); 2172 } 2173 } 2174 2175 #else 2176 2177 void MacroAssembler::load_sized_value(Register dst, Address src, 2178 size_t size_in_bytes, bool is_signed, AsmCondition cond) { 2179 switch (size_in_bytes) { 2180 case 4: ldr(dst, src, cond); break; 2181 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; 2182 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; 2183 default: ShouldNotReachHere(); 2184 } 2185 } 2186 2187 2188 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { 2189 switch (size_in_bytes) { 2190 case 4: str(src, dst, cond); break; 2191 case 2: strh(src, dst, cond); break; 2192 case 1: strb(src, dst, cond); break; 2193 default: ShouldNotReachHere(); 2194 } 2195 } 2196 #endif // AARCH64 2197 2198 // Look up the method for a megamorphic invokeinterface call. 2199 // The target method is determined by <Rinterf, Rindex>. 2200 // The receiver klass is in Rklass. 2201 // On success, the result will be in method_result, and execution falls through. 2202 // On failure, execution transfers to the given label. 2203 void MacroAssembler::lookup_interface_method(Register Rklass, 2204 Register Rintf, 2205 RegisterOrConstant itable_index, 2206 Register method_result, 2207 Register Rscan, 2208 Register Rtmp, 2209 Label& L_no_such_interface) { 2210 2211 assert_different_registers(Rklass, Rintf, Rscan, Rtmp); 2212 2213 const int entry_size = itableOffsetEntry::size() * HeapWordSize; 2214 assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience"); 2215 2216 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 2217 const int base = in_bytes(Klass::vtable_start_offset()); 2218 const int scale = exact_log2(vtableEntry::size_in_bytes()); 2219 ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable 2220 add(Rscan, Rklass, base); 2221 add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale)); 2222 2223 // Search through the itable for an interface equal to incoming Rintf 2224 // itable looks like [intface][offset][intface][offset][intface][offset] 2225 2226 Label loop; 2227 bind(loop); 2228 ldr(Rtmp, Address(Rscan, entry_size, post_indexed)); 2229 #ifdef AARCH64 2230 Label found; 2231 cmp(Rtmp, Rintf); 2232 b(found, eq); 2233 cbnz(Rtmp, loop); 2234 #else 2235 cmp(Rtmp, Rintf); // set ZF and CF if interface is found 2236 cmn(Rtmp, 0, ne); // check if tmp == 0 and clear CF if it is 2237 b(loop, ne); 2238 #endif // AARCH64 2239 2240 #ifdef AARCH64 2241 b(L_no_such_interface); 2242 bind(found); 2243 #else 2244 // CF == 0 means we reached the end of itable without finding icklass 2245 b(L_no_such_interface, cc); 2246 #endif // !AARCH64 2247 2248 if (method_result != noreg) { 2249 // Interface found at previous position of Rscan, now load the method 2250 ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size)); 2251 if (itable_index.is_register()) { 2252 add(Rtmp, Rtmp, Rklass); // Add offset to Klass* 2253 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); 2254 assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); 2255 ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register())); 2256 } else { 2257 int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() + 2258 itableMethodEntry::method_offset_in_bytes(); 2259 add_slow(method_result, Rklass, method_offset); 2260 ldr(method_result, Address(method_result, Rtmp)); 2261 } 2262 } 2263 } 2264 2265 #ifdef COMPILER2 2266 // TODO: 8 bytes at a time? pre-fetch? 2267 // Compare char[] arrays aligned to 4 bytes. 2268 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, 2269 Register limit, Register result, 2270 Register chr1, Register chr2, Label& Ldone) { 2271 Label Lvector, Lloop; 2272 2273 // Note: limit contains number of bytes (2*char_elements) != 0. 2274 tst(limit, 0x2); // trailing character ? 2275 b(Lvector, eq); 2276 2277 // compare the trailing char 2278 sub(limit, limit, sizeof(jchar)); 2279 ldrh(chr1, Address(ary1, limit)); 2280 ldrh(chr2, Address(ary2, limit)); 2281 cmp(chr1, chr2); 2282 mov(result, 0, ne); // not equal 2283 b(Ldone, ne); 2284 2285 // only one char ? 2286 tst(limit, limit); 2287 mov(result, 1, eq); 2288 b(Ldone, eq); 2289 2290 // word by word compare, dont't need alignment check 2291 bind(Lvector); 2292 2293 // Shift ary1 and ary2 to the end of the arrays, negate limit 2294 add(ary1, limit, ary1); 2295 add(ary2, limit, ary2); 2296 neg(limit, limit); 2297 2298 bind(Lloop); 2299 ldr_u32(chr1, Address(ary1, limit)); 2300 ldr_u32(chr2, Address(ary2, limit)); 2301 cmp_32(chr1, chr2); 2302 mov(result, 0, ne); // not equal 2303 b(Ldone, ne); 2304 adds(limit, limit, 2*sizeof(jchar)); 2305 b(Lloop, ne); 2306 2307 // Caller should set it: 2308 // mov(result_reg, 1); //equal 2309 } 2310 #endif 2311 2312 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { 2313 mov_slow(tmpreg1, counter_addr); 2314 ldr_s32(tmpreg2, tmpreg1); 2315 add_32(tmpreg2, tmpreg2, 1); 2316 str_32(tmpreg2, tmpreg1); 2317 } 2318 2319 void MacroAssembler::floating_cmp(Register dst) { 2320 #ifdef AARCH64 2321 NOT_TESTED(); 2322 cset(dst, gt); // 1 if '>', else 0 2323 csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 2324 #else 2325 vmrs(dst, FPSCR); 2326 orr(dst, dst, 0x08000000); 2327 eor(dst, dst, AsmOperand(dst, lsl, 3)); 2328 mov(dst, AsmOperand(dst, asr, 30)); 2329 #endif 2330 } 2331 2332 void MacroAssembler::restore_default_fp_mode() { 2333 #ifdef AARCH64 2334 msr(SysReg_FPCR, ZR); 2335 #else 2336 #ifndef __SOFTFP__ 2337 // Round to Near mode, IEEE compatible, masked exceptions 2338 mov(Rtemp, 0); 2339 vmsr(FPSCR, Rtemp); 2340 #endif // !__SOFTFP__ 2341 #endif // AARCH64 2342 } 2343 2344 #ifndef AARCH64 2345 // 24-bit word range == 26-bit byte range 2346 bool check26(int offset) { 2347 // this could be simplified, but it mimics encoding and decoding 2348 // an actual branch insrtuction 2349 int off1 = offset << 6 >> 8; 2350 int encoded = off1 & ((1<<24)-1); 2351 int decoded = encoded << 8 >> 6; 2352 return offset == decoded; 2353 } 2354 #endif // !AARCH64 2355 2356 // Perform some slight adjustments so the default 32MB code cache 2357 // is fully reachable. 2358 static inline address first_cache_address() { 2359 return CodeCache::low_bound() + sizeof(HeapBlock::Header); 2360 } 2361 static inline address last_cache_address() { 2362 return CodeCache::high_bound() - Assembler::InstructionSize; 2363 } 2364 2365 #ifdef AARCH64 2366 // Can we reach target using ADRP? 2367 bool MacroAssembler::page_reachable_from_cache(address target) { 2368 intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; 2369 intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; 2370 intptr_t addr = (intptr_t)target & ~0xfff; 2371 2372 intptr_t loffset = addr - cl; 2373 intptr_t hoffset = addr - ch; 2374 return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); 2375 } 2376 #endif 2377 2378 // Can we reach target using unconditional branch or call from anywhere 2379 // in the code cache (because code can be relocated)? 2380 bool MacroAssembler::_reachable_from_cache(address target) { 2381 #ifdef __thumb__ 2382 if ((1 & (intptr_t)target) != 0) { 2383 // Return false to avoid 'b' if we need switching to THUMB mode. 2384 return false; 2385 } 2386 #endif 2387 2388 address cl = first_cache_address(); 2389 address ch = last_cache_address(); 2390 2391 if (ForceUnreachable) { 2392 // Only addresses from CodeCache can be treated as reachable. 2393 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { 2394 return false; 2395 } 2396 } 2397 2398 intptr_t loffset = (intptr_t)target - (intptr_t)cl; 2399 intptr_t hoffset = (intptr_t)target - (intptr_t)ch; 2400 2401 #ifdef AARCH64 2402 return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); 2403 #else 2404 return check26(loffset - 8) && check26(hoffset - 8); 2405 #endif 2406 } 2407 2408 bool MacroAssembler::reachable_from_cache(address target) { 2409 assert(CodeCache::contains(pc()), "not supported"); 2410 return _reachable_from_cache(target); 2411 } 2412 2413 // Can we reach the entire code cache from anywhere else in the code cache? 2414 bool MacroAssembler::_cache_fully_reachable() { 2415 address cl = first_cache_address(); 2416 address ch = last_cache_address(); 2417 return _reachable_from_cache(cl) && _reachable_from_cache(ch); 2418 } 2419 2420 bool MacroAssembler::cache_fully_reachable() { 2421 assert(CodeCache::contains(pc()), "not supported"); 2422 return _cache_fully_reachable(); 2423 } 2424 2425 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2426 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2427 if (reachable_from_cache(target)) { 2428 relocate(rtype); 2429 b(target NOT_AARCH64_ARG(cond)); 2430 return; 2431 } 2432 2433 // Note: relocate is not needed for the code below, 2434 // encoding targets in absolute format. 2435 if (ignore_non_patchable_relocations()) { 2436 rtype = relocInfo::none; 2437 } 2438 2439 #ifdef AARCH64 2440 assert (scratch != noreg, "should be specified"); 2441 InlinedAddress address_literal(target, rtype); 2442 ldr_literal(scratch, address_literal); 2443 br(scratch); 2444 int off = offset(); 2445 bind_literal(address_literal); 2446 #ifdef COMPILER2 2447 if (offset() - off == wordSize) { 2448 // no padding, so insert nop for worst-case sizing 2449 nop(); 2450 } 2451 #endif 2452 #else 2453 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { 2454 // Note: this version cannot be (atomically) patched 2455 mov_slow(scratch, (intptr_t)target, cond); 2456 bx(scratch, cond); 2457 } else { 2458 Label skip; 2459 InlinedAddress address_literal(target); 2460 if (cond != al) { 2461 b(skip, inverse(cond)); 2462 } 2463 relocate(rtype); 2464 ldr_literal(PC, address_literal); 2465 bind_literal(address_literal); 2466 bind(skip); 2467 } 2468 #endif // AARCH64 2469 } 2470 2471 // Similar to jump except that: 2472 // - near calls are valid only if any destination in the cache is near 2473 // - no movt/movw (not atomically patchable) 2474 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2475 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2476 if (cache_fully_reachable()) { 2477 // Note: this assumes that all possible targets (the initial one 2478 // and the addressed patched to) are all in the code cache. 2479 assert(CodeCache::contains(target), "target might be too far"); 2480 relocate(rtype); 2481 b(target NOT_AARCH64_ARG(cond)); 2482 return; 2483 } 2484 2485 // Discard the relocation information if not needed for CacheCompiledCode 2486 // since the next encodings are all in absolute format. 2487 if (ignore_non_patchable_relocations()) { 2488 rtype = relocInfo::none; 2489 } 2490 2491 #ifdef AARCH64 2492 assert (scratch != noreg, "should be specified"); 2493 InlinedAddress address_literal(target); 2494 relocate(rtype); 2495 ldr_literal(scratch, address_literal); 2496 br(scratch); 2497 int off = offset(); 2498 bind_literal(address_literal); 2499 #ifdef COMPILER2 2500 if (offset() - off == wordSize) { 2501 // no padding, so insert nop for worst-case sizing 2502 nop(); 2503 } 2504 #endif 2505 #else 2506 { 2507 Label skip; 2508 InlinedAddress address_literal(target); 2509 if (cond != al) { 2510 b(skip, inverse(cond)); 2511 } 2512 relocate(rtype); 2513 ldr_literal(PC, address_literal); 2514 bind_literal(address_literal); 2515 bind(skip); 2516 } 2517 #endif // AARCH64 2518 } 2519 2520 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { 2521 Register scratch = LR; 2522 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); 2523 if (reachable_from_cache(target)) { 2524 relocate(rspec); 2525 bl(target NOT_AARCH64_ARG(cond)); 2526 return; 2527 } 2528 2529 // Note: relocate is not needed for the code below, 2530 // encoding targets in absolute format. 2531 if (ignore_non_patchable_relocations()) { 2532 // This assumes the information was needed only for relocating the code. 2533 rspec = RelocationHolder::none; 2534 } 2535 2536 #ifndef AARCH64 2537 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { 2538 // Note: this version cannot be (atomically) patched 2539 mov_slow(scratch, (intptr_t)target, cond); 2540 blx(scratch, cond); 2541 return; 2542 } 2543 #endif 2544 2545 { 2546 Label ret_addr; 2547 #ifndef AARCH64 2548 if (cond != al) { 2549 b(ret_addr, inverse(cond)); 2550 } 2551 #endif 2552 2553 2554 #ifdef AARCH64 2555 // TODO-AARCH64: make more optimal implementation 2556 // [ Keep in sync with MacroAssembler::call_size ] 2557 assert(rspec.type() == relocInfo::none, "call reloc not implemented"); 2558 mov_slow(scratch, target); 2559 blr(scratch); 2560 #else 2561 InlinedAddress address_literal(target); 2562 relocate(rspec); 2563 adr(LR, ret_addr); 2564 ldr_literal(PC, address_literal); 2565 2566 bind_literal(address_literal); 2567 bind(ret_addr); 2568 #endif 2569 } 2570 } 2571 2572 #if defined(AARCH64) && defined(COMPILER2) 2573 int MacroAssembler::call_size(address target, bool far, bool patchable) { 2574 // FIXME: mov_slow is variable-length 2575 if (!far) return 1; // bl 2576 if (patchable) return 2; // ldr; blr 2577 return instr_count_for_mov_slow((intptr_t)target) + 1; 2578 } 2579 #endif 2580 2581 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { 2582 assert(rspec.type() == relocInfo::static_call_type || 2583 rspec.type() == relocInfo::none || 2584 rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); 2585 2586 // Always generate the relocation information, needed for patching 2587 relocate(rspec); // used by NativeCall::is_call_before() 2588 if (cache_fully_reachable()) { 2589 // Note: this assumes that all possible targets (the initial one 2590 // and the addresses patched to) are all in the code cache. 2591 assert(CodeCache::contains(target), "target might be too far"); 2592 bl(target); 2593 } else { 2594 #if defined(AARCH64) && defined(COMPILER2) 2595 if (c2) { 2596 // return address needs to match call_size(). 2597 // no need to trash Rtemp 2598 int off = offset(); 2599 Label skip_literal; 2600 InlinedAddress address_literal(target); 2601 ldr_literal(LR, address_literal); 2602 blr(LR); 2603 int ret_addr_offset = offset(); 2604 assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); 2605 b(skip_literal); 2606 int off2 = offset(); 2607 bind_literal(address_literal); 2608 if (offset() - off2 == wordSize) { 2609 // no padding, so insert nop for worst-case sizing 2610 nop(); 2611 } 2612 bind(skip_literal); 2613 return ret_addr_offset; 2614 } 2615 #endif 2616 Label ret_addr; 2617 InlinedAddress address_literal(target); 2618 #ifdef AARCH64 2619 ldr_literal(Rtemp, address_literal); 2620 adr(LR, ret_addr); 2621 br(Rtemp); 2622 #else 2623 adr(LR, ret_addr); 2624 ldr_literal(PC, address_literal); 2625 #endif 2626 bind_literal(address_literal); 2627 bind(ret_addr); 2628 } 2629 return offset(); 2630 } 2631 2632 // ((OopHandle)result).resolve(); 2633 void MacroAssembler::resolve_oop_handle(Register result) { 2634 // OopHandle::resolve is an indirection. 2635 ldr(result, Address(result, 0)); 2636 } 2637 2638 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { 2639 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2640 ldr(tmp, Address(method, Method::const_offset())); 2641 ldr(tmp, Address(tmp, ConstMethod::constants_offset())); 2642 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); 2643 ldr(mirror, Address(tmp, mirror_offset)); 2644 resolve_oop_handle(mirror); 2645 } 2646 2647 2648 /////////////////////////////////////////////////////////////////////////////// 2649 2650 // Compressed pointers 2651 2652 #ifdef AARCH64 2653 2654 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { 2655 if (UseCompressedClassPointers) { 2656 ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2657 decode_klass_not_null(dst_klass); 2658 } else { 2659 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2660 } 2661 } 2662 2663 #else 2664 2665 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { 2666 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); 2667 } 2668 2669 #endif // AARCH64 2670 2671 // Blows src_klass. 2672 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { 2673 #ifdef AARCH64 2674 if (UseCompressedClassPointers) { 2675 assert(src_klass != dst_oop, "not enough registers"); 2676 encode_klass_not_null(src_klass); 2677 str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2678 return; 2679 } 2680 #endif // AARCH64 2681 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2682 } 2683 2684 #ifdef AARCH64 2685 2686 void MacroAssembler::store_klass_gap(Register dst) { 2687 if (UseCompressedClassPointers) { 2688 str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2689 } 2690 } 2691 2692 #endif // AARCH64 2693 2694 2695 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { 2696 access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3); 2697 } 2698 2699 // Blows src and flags. 2700 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { 2701 access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false); 2702 } 2703 2704 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { 2705 access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true); 2706 } 2707 2708 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2709 Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) { 2710 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2711 bool as_raw = (decorators & AS_RAW) != 0; 2712 if (as_raw) { 2713 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); 2714 } else { 2715 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); 2716 } 2717 } 2718 2719 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2720 Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) { 2721 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2722 bool as_raw = (decorators & AS_RAW) != 0; 2723 if (as_raw) { 2724 bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null); 2725 } else { 2726 bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null); 2727 } 2728 } 2729 2730 2731 #ifdef AARCH64 2732 2733 // Algorithm must match oop.inline.hpp encode_heap_oop. 2734 void MacroAssembler::encode_heap_oop(Register dst, Register src) { 2735 // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. 2736 // Update it at modifications. 2737 assert (UseCompressedOops, "must be compressed"); 2738 assert (Universe::heap() != NULL, "java heap should be initialized"); 2739 #ifdef ASSERT 2740 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2741 #endif 2742 verify_oop(src); 2743 if (Universe::narrow_oop_base() == NULL) { 2744 if (Universe::narrow_oop_shift() != 0) { 2745 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2746 _lsr(dst, src, Universe::narrow_oop_shift()); 2747 } else if (dst != src) { 2748 mov(dst, src); 2749 } 2750 } else { 2751 tst(src, src); 2752 csel(dst, Rheap_base, src, eq); 2753 sub(dst, dst, Rheap_base); 2754 if (Universe::narrow_oop_shift() != 0) { 2755 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2756 _lsr(dst, dst, Universe::narrow_oop_shift()); 2757 } 2758 } 2759 } 2760 2761 // Same algorithm as oop.inline.hpp decode_heap_oop. 2762 void MacroAssembler::decode_heap_oop(Register dst, Register src) { 2763 #ifdef ASSERT 2764 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2765 #endif 2766 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2767 if (Universe::narrow_oop_base() != NULL) { 2768 tst(src, src); 2769 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2770 csel(dst, dst, ZR, ne); 2771 } else { 2772 _lsl(dst, src, Universe::narrow_oop_shift()); 2773 } 2774 verify_oop(dst); 2775 } 2776 2777 #ifdef COMPILER2 2778 // Algorithm must match oop.inline.hpp encode_heap_oop. 2779 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule 2780 // must be changed. 2781 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2782 assert (UseCompressedOops, "must be compressed"); 2783 assert (Universe::heap() != NULL, "java heap should be initialized"); 2784 #ifdef ASSERT 2785 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2786 #endif 2787 verify_oop(src); 2788 if (Universe::narrow_oop_base() == NULL) { 2789 if (Universe::narrow_oop_shift() != 0) { 2790 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2791 _lsr(dst, src, Universe::narrow_oop_shift()); 2792 } else if (dst != src) { 2793 mov(dst, src); 2794 } 2795 } else { 2796 sub(dst, src, Rheap_base); 2797 if (Universe::narrow_oop_shift() != 0) { 2798 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2799 _lsr(dst, dst, Universe::narrow_oop_shift()); 2800 } 2801 } 2802 } 2803 2804 // Same algorithm as oops.inline.hpp decode_heap_oop. 2805 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule 2806 // must be changed. 2807 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2808 #ifdef ASSERT 2809 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2810 #endif 2811 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2812 if (Universe::narrow_oop_base() != NULL) { 2813 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2814 } else { 2815 _lsl(dst, src, Universe::narrow_oop_shift()); 2816 } 2817 verify_oop(dst); 2818 } 2819 2820 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 2821 assert(UseCompressedClassPointers, "should only be used for compressed header"); 2822 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 2823 int klass_index = oop_recorder()->find_index(k); 2824 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 2825 2826 // Relocation with special format (see relocInfo_arm.hpp). 2827 relocate(rspec); 2828 narrowKlass encoded_k = Klass::encode_klass(k); 2829 movz(dst, encoded_k & 0xffff, 0); 2830 movk(dst, (encoded_k >> 16) & 0xffff, 16); 2831 } 2832 2833 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 2834 assert(UseCompressedOops, "should only be used for compressed header"); 2835 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 2836 int oop_index = oop_recorder()->find_index(obj); 2837 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2838 2839 relocate(rspec); 2840 movz(dst, 0xffff, 0); 2841 movk(dst, 0xffff, 16); 2842 } 2843 2844 #endif // COMPILER2 2845 // Must preserve condition codes, or C2 encodeKlass_not_null rule 2846 // must be changed. 2847 void MacroAssembler::encode_klass_not_null(Register r) { 2848 if (Universe::narrow_klass_base() != NULL) { 2849 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 2850 assert(r != Rheap_base, "Encoding a klass in Rheap_base"); 2851 mov_slow(Rheap_base, Universe::narrow_klass_base()); 2852 sub(r, r, Rheap_base); 2853 } 2854 if (Universe::narrow_klass_shift() != 0) { 2855 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2856 _lsr(r, r, Universe::narrow_klass_shift()); 2857 } 2858 if (Universe::narrow_klass_base() != NULL) { 2859 reinit_heapbase(); 2860 } 2861 } 2862 2863 // Must preserve condition codes, or C2 encodeKlass_not_null rule 2864 // must be changed. 2865 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 2866 if (dst == src) { 2867 encode_klass_not_null(src); 2868 return; 2869 } 2870 if (Universe::narrow_klass_base() != NULL) { 2871 mov_slow(dst, (int64_t)Universe::narrow_klass_base()); 2872 sub(dst, src, dst); 2873 if (Universe::narrow_klass_shift() != 0) { 2874 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2875 _lsr(dst, dst, Universe::narrow_klass_shift()); 2876 } 2877 } else { 2878 if (Universe::narrow_klass_shift() != 0) { 2879 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2880 _lsr(dst, src, Universe::narrow_klass_shift()); 2881 } else { 2882 mov(dst, src); 2883 } 2884 } 2885 } 2886 2887 // Function instr_count_for_decode_klass_not_null() counts the instructions 2888 // generated by decode_klass_not_null(register r) and reinit_heapbase(), 2889 // when (Universe::heap() != NULL). Hence, if the instructions they 2890 // generate change, then this method needs to be updated. 2891 int MacroAssembler::instr_count_for_decode_klass_not_null() { 2892 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 2893 assert(Universe::heap() != NULL, "java heap should be initialized"); 2894 if (Universe::narrow_klass_base() != NULL) { 2895 return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow 2896 1 + // add 2897 instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow 2898 } else { 2899 if (Universe::narrow_klass_shift() != 0) { 2900 return 1; 2901 } 2902 } 2903 return 0; 2904 } 2905 2906 // Must preserve condition codes, or C2 decodeKlass_not_null rule 2907 // must be changed. 2908 void MacroAssembler::decode_klass_not_null(Register r) { 2909 int off = offset(); 2910 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2911 assert(Universe::heap() != NULL, "java heap should be initialized"); 2912 assert(r != Rheap_base, "Decoding a klass in Rheap_base"); 2913 // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. 2914 // Also do not verify_oop as this is called by verify_oop. 2915 if (Universe::narrow_klass_base() != NULL) { 2916 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 2917 mov_slow(Rheap_base, Universe::narrow_klass_base()); 2918 add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); 2919 reinit_heapbase(); 2920 } else { 2921 if (Universe::narrow_klass_shift() != 0) { 2922 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2923 _lsl(r, r, Universe::narrow_klass_shift()); 2924 } 2925 } 2926 assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); 2927 } 2928 2929 // Must preserve condition codes, or C2 decodeKlass_not_null rule 2930 // must be changed. 2931 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 2932 if (src == dst) { 2933 decode_klass_not_null(src); 2934 return; 2935 } 2936 2937 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2938 assert(Universe::heap() != NULL, "java heap should be initialized"); 2939 assert(src != Rheap_base, "Decoding a klass in Rheap_base"); 2940 assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); 2941 // Also do not verify_oop as this is called by verify_oop. 2942 if (Universe::narrow_klass_base() != NULL) { 2943 mov_slow(dst, Universe::narrow_klass_base()); 2944 add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); 2945 } else { 2946 _lsl(dst, src, Universe::narrow_klass_shift()); 2947 } 2948 } 2949 2950 2951 void MacroAssembler::reinit_heapbase() { 2952 if (UseCompressedOops || UseCompressedClassPointers) { 2953 if (Universe::heap() != NULL) { 2954 mov_slow(Rheap_base, Universe::narrow_ptrs_base()); 2955 } else { 2956 ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); 2957 } 2958 } 2959 } 2960 2961 #ifdef ASSERT 2962 void MacroAssembler::verify_heapbase(const char* msg) { 2963 // This code pattern is matched in NativeIntruction::skip_verify_heapbase. 2964 // Update it at modifications. 2965 assert (UseCompressedOops, "should be compressed"); 2966 assert (Universe::heap() != NULL, "java heap should be initialized"); 2967 if (CheckCompressedOops) { 2968 Label ok; 2969 str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 2970 raw_push(Rtemp, ZR); 2971 mrs(Rtemp, Assembler::SysReg_NZCV); 2972 str(Rtemp, Address(SP, 1 * wordSize)); 2973 mov_slow(Rtemp, Universe::narrow_ptrs_base()); 2974 cmp(Rheap_base, Rtemp); 2975 b(ok, eq); 2976 stop(msg); 2977 bind(ok); 2978 ldr(Rtemp, Address(SP, 1 * wordSize)); 2979 msr(Assembler::SysReg_NZCV, Rtemp); 2980 raw_pop(Rtemp, ZR); 2981 str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 2982 } 2983 } 2984 #endif // ASSERT 2985 2986 #endif // AARCH64 2987 2988 #ifdef COMPILER2 2989 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 2990 { 2991 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 2992 2993 Register Rmark = Rscratch2; 2994 2995 assert(Roop != Rscratch, ""); 2996 assert(Roop != Rmark, ""); 2997 assert(Rbox != Rscratch, ""); 2998 assert(Rbox != Rmark, ""); 2999 3000 Label fast_lock, done; 3001 3002 if (UseBiasedLocking && !UseOptoBiasInlining) { 3003 Label failed; 3004 #ifdef AARCH64 3005 biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); 3006 #else 3007 biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); 3008 #endif 3009 bind(failed); 3010 } 3011 3012 ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); 3013 tst(Rmark, markOopDesc::unlocked_value); 3014 b(fast_lock, ne); 3015 3016 // Check for recursive lock 3017 // See comments in InterpreterMacroAssembler::lock_object for 3018 // explanations on the fast recursive locking check. 3019 #ifdef AARCH64 3020 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); 3021 Assembler::LogicalImmediate imm(mask, false); 3022 mov(Rscratch, SP); 3023 sub(Rscratch, Rmark, Rscratch); 3024 ands(Rscratch, Rscratch, imm); 3025 b(done, ne); // exit with failure 3026 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero 3027 b(done); 3028 3029 #else 3030 // -1- test low 2 bits 3031 movs(Rscratch, AsmOperand(Rmark, lsl, 30)); 3032 // -2- test (hdr - SP) if the low two bits are 0 3033 sub(Rscratch, Rmark, SP, eq); 3034 movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); 3035 // If still 'eq' then recursive locking OK 3036 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero 3037 b(done); 3038 #endif 3039 3040 bind(fast_lock); 3041 str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3042 3043 bool allow_fallthrough_on_failure = true; 3044 bool one_shot = true; 3045 cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3046 3047 bind(done); 3048 3049 } 3050 3051 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3052 { 3053 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3054 3055 Register Rmark = Rscratch2; 3056 3057 assert(Roop != Rscratch, ""); 3058 assert(Roop != Rmark, ""); 3059 assert(Rbox != Rscratch, ""); 3060 assert(Rbox != Rmark, ""); 3061 3062 Label done; 3063 3064 if (UseBiasedLocking && !UseOptoBiasInlining) { 3065 biased_locking_exit(Roop, Rscratch, done); 3066 } 3067 3068 ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3069 // If hdr is NULL, we've got recursive locking and there's nothing more to do 3070 cmp(Rmark, 0); 3071 b(done, eq); 3072 3073 // Restore the object header 3074 bool allow_fallthrough_on_failure = true; 3075 bool one_shot = true; 3076 cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3077 3078 bind(done); 3079 3080 } 3081 #endif // COMPILER2