1 /* 2 * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "asm/macroAssembler.hpp" 29 #include "ci/ciEnv.hpp" 30 #include "code/nativeInst.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/barrierSet.hpp" 33 #include "gc/shared/cardTable.hpp" 34 #include "gc/shared/barrierSetAssembler.hpp" 35 #include "gc/shared/cardTableBarrierSet.hpp" 36 #include "gc/shared/collectedHeap.inline.hpp" 37 #include "interpreter/interpreter.hpp" 38 #include "memory/resourceArea.hpp" 39 #include "oops/klass.inline.hpp" 40 #include "prims/methodHandles.hpp" 41 #include "runtime/biasedLocking.hpp" 42 #include "runtime/interfaceSupport.inline.hpp" 43 #include "runtime/objectMonitor.hpp" 44 #include "runtime/os.hpp" 45 #include "runtime/sharedRuntime.hpp" 46 #include "runtime/stubRoutines.hpp" 47 #include "utilities/macros.hpp" 48 49 // Implementation of AddressLiteral 50 51 void AddressLiteral::set_rspec(relocInfo::relocType rtype) { 52 switch (rtype) { 53 case relocInfo::oop_type: 54 // Oops are a special case. Normally they would be their own section 55 // but in cases like icBuffer they are literals in the code stream that 56 // we don't have a section for. We use none so that we get a literal address 57 // which is always patchable. 58 break; 59 case relocInfo::external_word_type: 60 _rspec = external_word_Relocation::spec(_target); 61 break; 62 case relocInfo::internal_word_type: 63 _rspec = internal_word_Relocation::spec(_target); 64 break; 65 case relocInfo::opt_virtual_call_type: 66 _rspec = opt_virtual_call_Relocation::spec(); 67 break; 68 case relocInfo::static_call_type: 69 _rspec = static_call_Relocation::spec(); 70 break; 71 case relocInfo::runtime_call_type: 72 _rspec = runtime_call_Relocation::spec(); 73 break; 74 case relocInfo::poll_type: 75 case relocInfo::poll_return_type: 76 _rspec = Relocation::spec_simple(rtype); 77 break; 78 case relocInfo::none: 79 break; 80 default: 81 ShouldNotReachHere(); 82 break; 83 } 84 } 85 86 // Initially added to the Assembler interface as a pure virtual: 87 // RegisterConstant delayed_value(..) 88 // for: 89 // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) 90 // this was subsequently modified to its present name and return type 91 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 92 Register tmp, 93 int offset) { 94 ShouldNotReachHere(); 95 return RegisterOrConstant(-1); 96 } 97 98 99 #ifdef AARCH64 100 // Note: ARM32 version is OS dependent 101 void MacroAssembler::breakpoint(AsmCondition cond) { 102 if (cond == al) { 103 brk(); 104 } else { 105 Label L; 106 b(L, inverse(cond)); 107 brk(); 108 bind(L); 109 } 110 } 111 #endif // AARCH64 112 113 114 // virtual method calling 115 void MacroAssembler::lookup_virtual_method(Register recv_klass, 116 Register vtable_index, 117 Register method_result) { 118 const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); 119 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 120 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); 121 ldr(method_result, Address(recv_klass, base_offset)); 122 } 123 124 125 // Simplified, combined version, good for typical uses. 126 // Falls through on failure. 127 void MacroAssembler::check_klass_subtype(Register sub_klass, 128 Register super_klass, 129 Register temp_reg, 130 Register temp_reg2, 131 Register temp_reg3, 132 Label& L_success) { 133 Label L_failure; 134 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); 135 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); 136 bind(L_failure); 137 }; 138 139 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 140 Register super_klass, 141 Register temp_reg, 142 Register temp_reg2, 143 Label* L_success, 144 Label* L_failure, 145 Label* L_slow_path) { 146 147 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); 148 const Register super_check_offset = temp_reg2; 149 150 Label L_fallthrough; 151 int label_nulls = 0; 152 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 153 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 154 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 155 assert(label_nulls <= 1, "at most one NULL in the batch"); 156 157 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 158 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 159 Address super_check_offset_addr(super_klass, sco_offset); 160 161 // If the pointers are equal, we are done (e.g., String[] elements). 162 // This self-check enables sharing of secondary supertype arrays among 163 // non-primary types such as array-of-interface. Otherwise, each such 164 // type would need its own customized SSA. 165 // We move this check to the front of the fast path because many 166 // type checks are in fact trivially successful in this manner, 167 // so we get a nicely predicted branch right at the start of the check. 168 cmp(sub_klass, super_klass); 169 b(*L_success, eq); 170 171 // Check the supertype display: 172 ldr_u32(super_check_offset, super_check_offset_addr); 173 174 Address super_check_addr(sub_klass, super_check_offset); 175 ldr(temp_reg, super_check_addr); 176 cmp(super_klass, temp_reg); // load displayed supertype 177 178 // This check has worked decisively for primary supers. 179 // Secondary supers are sought in the super_cache ('super_cache_addr'). 180 // (Secondary supers are interfaces and very deeply nested subtypes.) 181 // This works in the same check above because of a tricky aliasing 182 // between the super_cache and the primary super display elements. 183 // (The 'super_check_addr' can address either, as the case requires.) 184 // Note that the cache is updated below if it does not help us find 185 // what we need immediately. 186 // So if it was a primary super, we can just fail immediately. 187 // Otherwise, it's the slow path for us (no success at this point). 188 189 b(*L_success, eq); 190 cmp_32(super_check_offset, sc_offset); 191 if (L_failure == &L_fallthrough) { 192 b(*L_slow_path, eq); 193 } else { 194 b(*L_failure, ne); 195 if (L_slow_path != &L_fallthrough) { 196 b(*L_slow_path); 197 } 198 } 199 200 bind(L_fallthrough); 201 } 202 203 204 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 205 Register super_klass, 206 Register temp_reg, 207 Register temp2_reg, 208 Register temp3_reg, 209 Label* L_success, 210 Label* L_failure, 211 bool set_cond_codes) { 212 #ifdef AARCH64 213 NOT_IMPLEMENTED(); 214 #else 215 // Note: if used by code that expects a register to be 0 on success, 216 // this register must be temp_reg and set_cond_codes must be true 217 218 Register saved_reg = noreg; 219 220 // get additional tmp registers 221 if (temp3_reg == noreg) { 222 saved_reg = temp3_reg = LR; 223 push(saved_reg); 224 } 225 226 assert(temp2_reg != noreg, "need all the temporary registers"); 227 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); 228 229 Register cmp_temp = temp_reg; 230 Register scan_temp = temp3_reg; 231 Register count_temp = temp2_reg; 232 233 Label L_fallthrough; 234 int label_nulls = 0; 235 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 236 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 237 assert(label_nulls <= 1, "at most one NULL in the batch"); 238 239 // a couple of useful fields in sub_klass: 240 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 241 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 242 Address secondary_supers_addr(sub_klass, ss_offset); 243 Address super_cache_addr( sub_klass, sc_offset); 244 245 #ifndef PRODUCT 246 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); 247 #endif 248 249 // We will consult the secondary-super array. 250 ldr(scan_temp, Address(sub_klass, ss_offset)); 251 252 assert(! UseCompressedOops, "search_key must be the compressed super_klass"); 253 // else search_key is the 254 Register search_key = super_klass; 255 256 // Load the array length. 257 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes())); 258 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes()); 259 260 add(count_temp, count_temp, 1); 261 262 Label L_loop, L_setnz_and_fail, L_fail; 263 264 // Top of search loop 265 bind(L_loop); 266 // Notes: 267 // scan_temp starts at the array elements 268 // count_temp is 1+size 269 subs(count_temp, count_temp, 1); 270 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { 271 // direct jump to L_failure if failed and no cleanup needed 272 b(*L_failure, eq); // not found and 273 } else { 274 b(L_fail, eq); // not found in the array 275 } 276 277 // Load next super to check 278 // In the array of super classes elements are pointer sized. 279 int element_size = wordSize; 280 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); 281 282 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list 283 subs(cmp_temp, cmp_temp, search_key); 284 285 // A miss means we are NOT a subtype and need to keep looping 286 b(L_loop, ne); 287 288 // Falling out the bottom means we found a hit; we ARE a subtype 289 290 // Note: temp_reg/cmp_temp is already 0 and flag Z is set 291 292 // Success. Cache the super we found and proceed in triumph. 293 str(super_klass, Address(sub_klass, sc_offset)); 294 295 if (saved_reg != noreg) { 296 // Return success 297 pop(saved_reg); 298 } 299 300 b(*L_success); 301 302 bind(L_fail); 303 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here 304 if (set_cond_codes) { 305 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed 306 } 307 if (saved_reg != noreg) { 308 pop(saved_reg); 309 } 310 if (L_failure != &L_fallthrough) { 311 b(*L_failure); 312 } 313 314 bind(L_fallthrough); 315 #endif 316 } 317 318 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. 319 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { 320 assert_different_registers(params_base, params_count); 321 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); 322 return Address(tmp, -Interpreter::stackElementSize); 323 } 324 325 326 void MacroAssembler::align(int modulus) { 327 while (offset() % modulus != 0) { 328 nop(); 329 } 330 } 331 332 int MacroAssembler::set_last_Java_frame(Register last_java_sp, 333 Register last_java_fp, 334 bool save_last_java_pc, 335 Register tmp) { 336 int pc_offset; 337 if (last_java_fp != noreg) { 338 // optional 339 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); 340 _fp_saved = true; 341 } else { 342 _fp_saved = false; 343 } 344 if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM 345 #ifdef AARCH64 346 pc_offset = mov_pc_to(tmp); 347 str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); 348 #else 349 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); 350 pc_offset = offset() + VM_Version::stored_pc_adjustment(); 351 #endif 352 _pc_saved = true; 353 } else { 354 _pc_saved = false; 355 pc_offset = -1; 356 } 357 // According to comment in javaFrameAnchorm SP must be saved last, so that other 358 // entries are valid when SP is set. 359 360 // However, this is probably not a strong constrainst since for instance PC is 361 // sometimes read from the stack at SP... but is pushed later (by the call). Hence, 362 // we now write the fields in the expected order but we have not added a StoreStore 363 // barrier. 364 365 // XXX: if the ordering is really important, PC should always be saved (without forgetting 366 // to update oop_map offsets) and a StoreStore barrier might be needed. 367 368 if (last_java_sp == noreg) { 369 last_java_sp = SP; // always saved 370 } 371 #ifdef AARCH64 372 if (last_java_sp == SP) { 373 mov(tmp, SP); 374 str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); 375 } else { 376 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 377 } 378 #else 379 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 380 #endif 381 382 return pc_offset; // for oopmaps 383 } 384 385 void MacroAssembler::reset_last_Java_frame(Register tmp) { 386 const Register Rzero = zero_register(tmp); 387 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); 388 if (_fp_saved) { 389 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); 390 } 391 if (_pc_saved) { 392 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); 393 } 394 } 395 396 397 // Implementation of call_VM versions 398 399 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { 400 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 401 assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); 402 403 #ifndef AARCH64 404 // Safer to save R9 here since callers may have been written 405 // assuming R9 survives. This is suboptimal but is not worth 406 // optimizing for the few platforms where R9 is scratched. 407 push(RegisterSet(R4) | R9ifScratched); 408 mov(R4, SP); 409 bic(SP, SP, StackAlignmentInBytes - 1); 410 #endif // AARCH64 411 call(entry_point, relocInfo::runtime_call_type); 412 #ifndef AARCH64 413 mov(SP, R4); 414 pop(RegisterSet(R4) | R9ifScratched); 415 #endif // AARCH64 416 } 417 418 419 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 420 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 421 assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); 422 423 const Register tmp = Rtemp; 424 assert_different_registers(oop_result, tmp); 425 426 set_last_Java_frame(SP, FP, true, tmp); 427 428 #ifdef ASSERT 429 AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); 430 #endif // ASSERT 431 432 #ifndef AARCH64 433 #if R9_IS_SCRATCHED 434 // Safer to save R9 here since callers may have been written 435 // assuming R9 survives. This is suboptimal but is not worth 436 // optimizing for the few platforms where R9 is scratched. 437 438 // Note: cannot save R9 above the saved SP (some calls expect for 439 // instance the Java stack top at the saved SP) 440 // => once saved (with set_last_Java_frame), decrease SP before rounding to 441 // ensure the slot at SP will be free for R9). 442 sub(SP, SP, 4); 443 bic(SP, SP, StackAlignmentInBytes - 1); 444 str(R9, Address(SP, 0)); 445 #else 446 bic(SP, SP, StackAlignmentInBytes - 1); 447 #endif // R9_IS_SCRATCHED 448 #endif 449 450 mov(R0, Rthread); 451 call(entry_point, relocInfo::runtime_call_type); 452 453 #ifndef AARCH64 454 #if R9_IS_SCRATCHED 455 ldr(R9, Address(SP, 0)); 456 #endif 457 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); 458 #endif 459 460 reset_last_Java_frame(tmp); 461 462 // C++ interp handles this in the interpreter 463 check_and_handle_popframe(); 464 check_and_handle_earlyret(); 465 466 if (check_exceptions) { 467 // check for pending exceptions 468 ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); 469 #ifdef AARCH64 470 Label L; 471 cbz(tmp, L); 472 mov_pc_to(Rexception_pc); 473 b(StubRoutines::forward_exception_entry()); 474 bind(L); 475 #else 476 cmp(tmp, 0); 477 mov(Rexception_pc, PC, ne); 478 b(StubRoutines::forward_exception_entry(), ne); 479 #endif // AARCH64 480 } 481 482 // get oop result if there is one and reset the value in the thread 483 if (oop_result->is_valid()) { 484 get_vm_result(oop_result, tmp); 485 } 486 } 487 488 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 489 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 490 } 491 492 493 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 494 assert (arg_1 == R1, "fixed register for arg_1"); 495 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 496 } 497 498 499 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 500 assert (arg_1 == R1, "fixed register for arg_1"); 501 assert (arg_2 == R2, "fixed register for arg_2"); 502 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 503 } 504 505 506 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 507 assert (arg_1 == R1, "fixed register for arg_1"); 508 assert (arg_2 == R2, "fixed register for arg_2"); 509 assert (arg_3 == R3, "fixed register for arg_3"); 510 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 511 } 512 513 514 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { 515 // Not used on ARM 516 Unimplemented(); 517 } 518 519 520 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 521 // Not used on ARM 522 Unimplemented(); 523 } 524 525 526 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 527 // Not used on ARM 528 Unimplemented(); 529 } 530 531 532 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 533 // Not used on ARM 534 Unimplemented(); 535 } 536 537 // Raw call, without saving/restoring registers, exception handling, etc. 538 // Mainly used from various stubs. 539 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { 540 const Register tmp = Rtemp; // Rtemp free since scratched by call 541 set_last_Java_frame(SP, FP, true, tmp); 542 #if R9_IS_SCRATCHED 543 if (save_R9_if_scratched) { 544 // Note: Saving also R10 for alignment. 545 push(RegisterSet(R9, R10)); 546 } 547 #endif 548 mov(R0, Rthread); 549 call(entry_point, relocInfo::runtime_call_type); 550 #if R9_IS_SCRATCHED 551 if (save_R9_if_scratched) { 552 pop(RegisterSet(R9, R10)); 553 } 554 #endif 555 reset_last_Java_frame(tmp); 556 } 557 558 void MacroAssembler::call_VM_leaf(address entry_point) { 559 call_VM_leaf_helper(entry_point, 0); 560 } 561 562 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 563 assert (arg_1 == R0, "fixed register for arg_1"); 564 call_VM_leaf_helper(entry_point, 1); 565 } 566 567 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 568 assert (arg_1 == R0, "fixed register for arg_1"); 569 assert (arg_2 == R1, "fixed register for arg_2"); 570 call_VM_leaf_helper(entry_point, 2); 571 } 572 573 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 574 assert (arg_1 == R0, "fixed register for arg_1"); 575 assert (arg_2 == R1, "fixed register for arg_2"); 576 assert (arg_3 == R2, "fixed register for arg_3"); 577 call_VM_leaf_helper(entry_point, 3); 578 } 579 580 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { 581 assert (arg_1 == R0, "fixed register for arg_1"); 582 assert (arg_2 == R1, "fixed register for arg_2"); 583 assert (arg_3 == R2, "fixed register for arg_3"); 584 assert (arg_4 == R3, "fixed register for arg_4"); 585 call_VM_leaf_helper(entry_point, 4); 586 } 587 588 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { 589 assert_different_registers(oop_result, tmp); 590 ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); 591 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); 592 verify_oop(oop_result); 593 } 594 595 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { 596 assert_different_registers(metadata_result, tmp); 597 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); 598 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); 599 } 600 601 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { 602 if (arg2.is_register()) { 603 add(dst, arg1, arg2.as_register()); 604 } else { 605 add(dst, arg1, arg2.as_constant()); 606 } 607 } 608 609 void MacroAssembler::add_slow(Register rd, Register rn, int c) { 610 #ifdef AARCH64 611 if (c == 0) { 612 if (rd != rn) { 613 mov(rd, rn); 614 } 615 return; 616 } 617 if (c < 0) { 618 sub_slow(rd, rn, -c); 619 return; 620 } 621 if (c > right_n_bits(24)) { 622 guarantee(rd != rn, "no large add_slow with only one register"); 623 mov_slow(rd, c); 624 add(rd, rn, rd); 625 } else { 626 int lo = c & right_n_bits(12); 627 int hi = (c >> 12) & right_n_bits(12); 628 if (lo != 0) { 629 add(rd, rn, lo, lsl0); 630 } 631 if (hi != 0) { 632 add(rd, (lo == 0) ? rn : rd, hi, lsl12); 633 } 634 } 635 #else 636 // This function is used in compiler for handling large frame offsets 637 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 638 return sub(rd, rn, (-c)); 639 } 640 int low = c & 0x3fc; 641 if (low != 0) { 642 add(rd, rn, low); 643 rn = rd; 644 } 645 if (c & ~0x3fc) { 646 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); 647 add(rd, rn, c & ~0x3fc); 648 } else if (rd != rn) { 649 assert(c == 0, ""); 650 mov(rd, rn); // need to generate at least one move! 651 } 652 #endif // AARCH64 653 } 654 655 void MacroAssembler::sub_slow(Register rd, Register rn, int c) { 656 #ifdef AARCH64 657 if (c <= 0) { 658 add_slow(rd, rn, -c); 659 return; 660 } 661 if (c > right_n_bits(24)) { 662 guarantee(rd != rn, "no large sub_slow with only one register"); 663 mov_slow(rd, c); 664 sub(rd, rn, rd); 665 } else { 666 int lo = c & right_n_bits(12); 667 int hi = (c >> 12) & right_n_bits(12); 668 if (lo != 0) { 669 sub(rd, rn, lo, lsl0); 670 } 671 if (hi != 0) { 672 sub(rd, (lo == 0) ? rn : rd, hi, lsl12); 673 } 674 } 675 #else 676 // This function is used in compiler for handling large frame offsets 677 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 678 return add(rd, rn, (-c)); 679 } 680 int low = c & 0x3fc; 681 if (low != 0) { 682 sub(rd, rn, low); 683 rn = rd; 684 } 685 if (c & ~0x3fc) { 686 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); 687 sub(rd, rn, c & ~0x3fc); 688 } else if (rd != rn) { 689 assert(c == 0, ""); 690 mov(rd, rn); // need to generate at least one move! 691 } 692 #endif // AARCH64 693 } 694 695 void MacroAssembler::mov_slow(Register rd, address addr) { 696 // do *not* call the non relocated mov_related_address 697 mov_slow(rd, (intptr_t)addr); 698 } 699 700 void MacroAssembler::mov_slow(Register rd, const char *str) { 701 mov_slow(rd, (intptr_t)str); 702 } 703 704 #ifdef AARCH64 705 706 // Common code for mov_slow and instr_count_for_mov_slow. 707 // Returns number of instructions of mov_slow pattern, 708 // generating it if non-null MacroAssembler is given. 709 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { 710 // This code pattern is matched in NativeIntruction::is_mov_slow. 711 // Update it at modifications. 712 713 const intx mask = right_n_bits(16); 714 // 1 movz instruction 715 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 716 if ((c & ~(mask << base_shift)) == 0) { 717 if (masm != NULL) { 718 masm->movz(rd, ((uintx)c) >> base_shift, base_shift); 719 } 720 return 1; 721 } 722 } 723 // 1 movn instruction 724 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 725 if (((~c) & ~(mask << base_shift)) == 0) { 726 if (masm != NULL) { 727 masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); 728 } 729 return 1; 730 } 731 } 732 // 1 orr instruction 733 { 734 LogicalImmediate imm(c, false); 735 if (imm.is_encoded()) { 736 if (masm != NULL) { 737 masm->orr(rd, ZR, imm); 738 } 739 return 1; 740 } 741 } 742 // 1 movz/movn + up to 3 movk instructions 743 int zeroes = 0; 744 int ones = 0; 745 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 746 int part = (c >> base_shift) & mask; 747 if (part == 0) { 748 ++zeroes; 749 } else if (part == mask) { 750 ++ones; 751 } 752 } 753 int def_bits = 0; 754 if (ones > zeroes) { 755 def_bits = mask; 756 } 757 int inst_count = 0; 758 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 759 int part = (c >> base_shift) & mask; 760 if (part != def_bits) { 761 if (masm != NULL) { 762 if (inst_count > 0) { 763 masm->movk(rd, part, base_shift); 764 } else { 765 if (def_bits == 0) { 766 masm->movz(rd, part, base_shift); 767 } else { 768 masm->movn(rd, ~part & mask, base_shift); 769 } 770 } 771 } 772 inst_count++; 773 } 774 } 775 assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); 776 return inst_count; 777 } 778 779 void MacroAssembler::mov_slow(Register rd, intptr_t c) { 780 #ifdef ASSERT 781 int off = offset(); 782 #endif 783 (void) mov_slow_helper(rd, c, this); 784 assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); 785 } 786 787 // Counts instructions generated by mov_slow(rd, c). 788 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { 789 return mov_slow_helper(noreg, c, NULL); 790 } 791 792 int MacroAssembler::instr_count_for_mov_slow(address c) { 793 return mov_slow_helper(noreg, (intptr_t)c, NULL); 794 } 795 796 #else 797 798 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { 799 if (AsmOperand::is_rotated_imm(c)) { 800 mov(rd, c, cond); 801 } else if (AsmOperand::is_rotated_imm(~c)) { 802 mvn(rd, ~c, cond); 803 } else if (VM_Version::supports_movw()) { 804 movw(rd, c & 0xffff, cond); 805 if ((unsigned int)c >> 16) { 806 movt(rd, (unsigned int)c >> 16, cond); 807 } 808 } else { 809 // Find first non-zero bit 810 int shift = 0; 811 while ((c & (3 << shift)) == 0) { 812 shift += 2; 813 } 814 // Put the least significant part of the constant 815 int mask = 0xff << shift; 816 mov(rd, c & mask, cond); 817 // Add up to 3 other parts of the constant; 818 // each of them can be represented as rotated_imm 819 if (c & (mask << 8)) { 820 orr(rd, rd, c & (mask << 8), cond); 821 } 822 if (c & (mask << 16)) { 823 orr(rd, rd, c & (mask << 16), cond); 824 } 825 if (c & (mask << 24)) { 826 orr(rd, rd, c & (mask << 24), cond); 827 } 828 } 829 } 830 831 #endif // AARCH64 832 833 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, 834 #ifdef AARCH64 835 bool patchable 836 #else 837 AsmCondition cond 838 #endif 839 ) { 840 841 if (o == NULL) { 842 #ifdef AARCH64 843 if (patchable) { 844 nop(); 845 } 846 mov(rd, ZR); 847 #else 848 mov(rd, 0, cond); 849 #endif 850 return; 851 } 852 853 if (oop_index == 0) { 854 oop_index = oop_recorder()->allocate_oop_index(o); 855 } 856 relocate(oop_Relocation::spec(oop_index)); 857 858 #ifdef AARCH64 859 if (patchable) { 860 nop(); 861 } 862 ldr(rd, pc()); 863 #else 864 if (VM_Version::supports_movw()) { 865 movw(rd, 0, cond); 866 movt(rd, 0, cond); 867 } else { 868 ldr(rd, Address(PC), cond); 869 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). 870 nop(); 871 } 872 #endif 873 } 874 875 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { 876 if (o == NULL) { 877 #ifdef AARCH64 878 if (patchable) { 879 nop(); 880 } 881 #endif 882 mov(rd, 0); 883 return; 884 } 885 886 if (metadata_index == 0) { 887 metadata_index = oop_recorder()->allocate_metadata_index(o); 888 } 889 relocate(metadata_Relocation::spec(metadata_index)); 890 891 #ifdef AARCH64 892 if (patchable) { 893 nop(); 894 } 895 #ifdef COMPILER2 896 if (!patchable && VM_Version::prefer_moves_over_load_literal()) { 897 mov_slow(rd, (address)o); 898 return; 899 } 900 #endif 901 ldr(rd, pc()); 902 #else 903 if (VM_Version::supports_movw()) { 904 movw(rd, ((int)o) & 0xffff); 905 movt(rd, (unsigned int)o >> 16); 906 } else { 907 ldr(rd, Address(PC)); 908 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). 909 nop(); 910 } 911 #endif // AARCH64 912 } 913 914 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { 915 Label skip_constant; 916 union { 917 jfloat f; 918 jint i; 919 } accessor; 920 accessor.f = c; 921 922 #ifdef AARCH64 923 // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow 924 Label L; 925 ldr_s(fd, target(L)); 926 b(skip_constant); 927 bind(L); 928 emit_int32(accessor.i); 929 bind(skip_constant); 930 #else 931 flds(fd, Address(PC), cond); 932 b(skip_constant); 933 emit_int32(accessor.i); 934 bind(skip_constant); 935 #endif // AARCH64 936 } 937 938 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { 939 Label skip_constant; 940 union { 941 jdouble d; 942 jint i[2]; 943 } accessor; 944 accessor.d = c; 945 946 #ifdef AARCH64 947 // TODO-AARCH64 - try to optimize loading of double constants with fmov 948 Label L; 949 ldr_d(fd, target(L)); 950 b(skip_constant); 951 align(wordSize); 952 bind(L); 953 emit_int32(accessor.i[0]); 954 emit_int32(accessor.i[1]); 955 bind(skip_constant); 956 #else 957 fldd(fd, Address(PC), cond); 958 b(skip_constant); 959 emit_int32(accessor.i[0]); 960 emit_int32(accessor.i[1]); 961 bind(skip_constant); 962 #endif // AARCH64 963 } 964 965 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { 966 intptr_t addr = (intptr_t) address_of_global; 967 #ifdef AARCH64 968 assert((addr & 0x3) == 0, "address should be aligned"); 969 970 // FIXME: TODO 971 if (false && page_reachable_from_cache(address_of_global)) { 972 assert(false,"TODO: relocate"); 973 //relocate(); 974 adrp(reg, address_of_global); 975 ldrsw(reg, Address(reg, addr & 0xfff)); 976 } else { 977 mov_slow(reg, addr & ~0x3fff); 978 ldrsw(reg, Address(reg, addr & 0x3fff)); 979 } 980 #else 981 mov_slow(reg, addr & ~0xfff); 982 ldr(reg, Address(reg, addr & 0xfff)); 983 #endif 984 } 985 986 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { 987 #ifdef AARCH64 988 intptr_t addr = (intptr_t) address_of_global; 989 assert ((addr & 0x7) == 0, "address should be aligned"); 990 mov_slow(reg, addr & ~0x7fff); 991 ldr(reg, Address(reg, addr & 0x7fff)); 992 #else 993 ldr_global_s32(reg, address_of_global); 994 #endif 995 } 996 997 void MacroAssembler::ldrb_global(Register reg, address address_of_global) { 998 intptr_t addr = (intptr_t) address_of_global; 999 mov_slow(reg, addr & ~0xfff); 1000 ldrb(reg, Address(reg, addr & 0xfff)); 1001 } 1002 1003 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { 1004 #ifdef AARCH64 1005 switch (bits) { 1006 case 8: uxtb(rd, rn); break; 1007 case 16: uxth(rd, rn); break; 1008 case 32: mov_w(rd, rn); break; 1009 default: ShouldNotReachHere(); 1010 } 1011 #else 1012 if (bits <= 8) { 1013 andr(rd, rn, (1 << bits) - 1); 1014 } else if (bits >= 24) { 1015 bic(rd, rn, -1 << bits); 1016 } else { 1017 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1018 mov(rd, AsmOperand(rd, lsr, 32 - bits)); 1019 } 1020 #endif 1021 } 1022 1023 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { 1024 #ifdef AARCH64 1025 switch (bits) { 1026 case 8: sxtb(rd, rn); break; 1027 case 16: sxth(rd, rn); break; 1028 case 32: sxtw(rd, rn); break; 1029 default: ShouldNotReachHere(); 1030 } 1031 #else 1032 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1033 mov(rd, AsmOperand(rd, asr, 32 - bits)); 1034 #endif 1035 } 1036 1037 #ifndef AARCH64 1038 1039 void MacroAssembler::long_move(Register rd_lo, Register rd_hi, 1040 Register rn_lo, Register rn_hi, 1041 AsmCondition cond) { 1042 if (rd_lo != rn_hi) { 1043 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1044 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1045 } else if (rd_hi != rn_lo) { 1046 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1047 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1048 } else { 1049 eor(rd_lo, rd_hi, rd_lo, cond); 1050 eor(rd_hi, rd_lo, rd_hi, cond); 1051 eor(rd_lo, rd_hi, rd_lo, cond); 1052 } 1053 } 1054 1055 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1056 Register rn_lo, Register rn_hi, 1057 AsmShift shift, Register count) { 1058 Register tmp; 1059 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { 1060 tmp = rd_lo; 1061 } else { 1062 tmp = rd_hi; 1063 } 1064 assert_different_registers(tmp, count, rn_lo, rn_hi); 1065 1066 subs(tmp, count, 32); 1067 if (shift == lsl) { 1068 assert_different_registers(rd_hi, rn_lo); 1069 assert_different_registers(count, rd_hi); 1070 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); 1071 rsb(tmp, count, 32, mi); 1072 if (rd_hi == rn_hi) { 1073 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1074 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1075 } else { 1076 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1077 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1078 } 1079 mov(rd_lo, AsmOperand(rn_lo, shift, count)); 1080 } else { 1081 assert_different_registers(rd_lo, rn_hi); 1082 assert_different_registers(rd_lo, count); 1083 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); 1084 rsb(tmp, count, 32, mi); 1085 if (rd_lo == rn_lo) { 1086 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1087 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1088 } else { 1089 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1090 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1091 } 1092 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1093 } 1094 } 1095 1096 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1097 Register rn_lo, Register rn_hi, 1098 AsmShift shift, int count) { 1099 assert(count != 0 && (count & ~63) == 0, "must be"); 1100 1101 if (shift == lsl) { 1102 assert_different_registers(rd_hi, rn_lo); 1103 if (count >= 32) { 1104 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); 1105 mov(rd_lo, 0); 1106 } else { 1107 mov(rd_hi, AsmOperand(rn_hi, lsl, count)); 1108 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); 1109 mov(rd_lo, AsmOperand(rn_lo, lsl, count)); 1110 } 1111 } else { 1112 assert_different_registers(rd_lo, rn_hi); 1113 if (count >= 32) { 1114 if (count == 32) { 1115 mov(rd_lo, rn_hi); 1116 } else { 1117 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); 1118 } 1119 if (shift == asr) { 1120 mov(rd_hi, AsmOperand(rn_hi, asr, 0)); 1121 } else { 1122 mov(rd_hi, 0); 1123 } 1124 } else { 1125 mov(rd_lo, AsmOperand(rn_lo, lsr, count)); 1126 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); 1127 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1128 } 1129 } 1130 } 1131 #endif // !AARCH64 1132 1133 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 1134 // This code pattern is matched in NativeIntruction::skip_verify_oop. 1135 // Update it at modifications. 1136 if (!VerifyOops) return; 1137 1138 char buffer[64]; 1139 #ifdef COMPILER1 1140 if (CommentedAssembly) { 1141 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); 1142 block_comment(buffer); 1143 } 1144 #endif 1145 const char* msg_buffer = NULL; 1146 { 1147 ResourceMark rm; 1148 stringStream ss; 1149 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); 1150 msg_buffer = code_string(ss.as_string()); 1151 } 1152 1153 save_all_registers(); 1154 1155 if (reg != R2) { 1156 mov(R2, reg); // oop to verify 1157 } 1158 mov(R1, SP); // register save area 1159 1160 Label done; 1161 InlinedString Lmsg(msg_buffer); 1162 ldr_literal(R0, Lmsg); // message 1163 1164 // call indirectly to solve generation ordering problem 1165 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1166 call(Rtemp); 1167 1168 restore_all_registers(); 1169 1170 b(done); 1171 #ifdef COMPILER2 1172 int off = offset(); 1173 #endif 1174 bind_literal(Lmsg); 1175 #ifdef COMPILER2 1176 if (offset() - off == 1 * wordSize) { 1177 // no padding, so insert nop for worst-case sizing 1178 nop(); 1179 } 1180 #endif 1181 bind(done); 1182 } 1183 1184 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 1185 if (!VerifyOops) return; 1186 1187 const char* msg_buffer = NULL; 1188 { 1189 ResourceMark rm; 1190 stringStream ss; 1191 if ((addr.base() == SP) && (addr.index()==noreg)) { 1192 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); 1193 } else { 1194 ss.print("verify_oop_addr: %s", s); 1195 } 1196 ss.print(" (%s:%d)", file, line); 1197 msg_buffer = code_string(ss.as_string()); 1198 } 1199 1200 int push_size = save_all_registers(); 1201 1202 if (addr.base() == SP) { 1203 // computes an addr that takes into account the push 1204 if (addr.index() != noreg) { 1205 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index 1206 add(new_base, SP, push_size); 1207 addr = addr.rebase(new_base); 1208 } else { 1209 addr = addr.plus_disp(push_size); 1210 } 1211 } 1212 1213 ldr(R2, addr); // oop to verify 1214 mov(R1, SP); // register save area 1215 1216 Label done; 1217 InlinedString Lmsg(msg_buffer); 1218 ldr_literal(R0, Lmsg); // message 1219 1220 // call indirectly to solve generation ordering problem 1221 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1222 call(Rtemp); 1223 1224 restore_all_registers(); 1225 1226 b(done); 1227 bind_literal(Lmsg); 1228 bind(done); 1229 } 1230 1231 void MacroAssembler::null_check(Register reg, Register tmp, int offset) { 1232 if (needs_explicit_null_check(offset)) { 1233 #ifdef AARCH64 1234 ldr(ZR, Address(reg)); 1235 #else 1236 assert_different_registers(reg, tmp); 1237 if (tmp == noreg) { 1238 tmp = Rtemp; 1239 assert((! Thread::current()->is_Compiler_thread()) || 1240 (! (ciEnv::current()->task() == NULL)) || 1241 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), 1242 "Rtemp not available in C2"); // explicit tmp register required 1243 // XXX: could we mark the code buffer as not compatible with C2 ? 1244 } 1245 ldr(tmp, Address(reg)); 1246 #endif 1247 } 1248 } 1249 1250 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1251 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, 1252 RegisterOrConstant size_expression, Label& slow_case) { 1253 if (!Universe::heap()->supports_inline_contig_alloc()) { 1254 b(slow_case); 1255 return; 1256 } 1257 1258 CollectedHeap* ch = Universe::heap(); 1259 1260 const Register top_addr = tmp1; 1261 const Register heap_end = tmp2; 1262 1263 if (size_expression.is_register()) { 1264 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); 1265 } else { 1266 assert_different_registers(obj, obj_end, top_addr, heap_end); 1267 } 1268 1269 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance 1270 if (load_const) { 1271 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); 1272 } else { 1273 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); 1274 } 1275 // Calculate new heap_top by adding the size of the object 1276 Label retry; 1277 bind(retry); 1278 1279 #ifdef AARCH64 1280 ldxr(obj, top_addr); 1281 #else 1282 ldr(obj, Address(top_addr)); 1283 #endif // AARCH64 1284 1285 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); 1286 add_rc(obj_end, obj, size_expression); 1287 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. 1288 cmp(obj_end, obj); 1289 b(slow_case, lo); 1290 // Update heap_top if allocation succeeded 1291 cmp(obj_end, heap_end); 1292 b(slow_case, hi); 1293 1294 #ifdef AARCH64 1295 stxr(heap_end/*scratched*/, obj_end, top_addr); 1296 cbnz_w(heap_end, retry); 1297 #else 1298 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); 1299 b(retry, ne); 1300 #endif // AARCH64 1301 } 1302 1303 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1304 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, 1305 RegisterOrConstant size_expression, Label& slow_case) { 1306 const Register tlab_end = tmp1; 1307 assert_different_registers(obj, obj_end, tlab_end); 1308 1309 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); 1310 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); 1311 add_rc(obj_end, obj, size_expression); 1312 cmp(obj_end, tlab_end); 1313 b(slow_case, hi); 1314 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); 1315 } 1316 1317 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. 1318 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { 1319 Label loop; 1320 const Register ptr = start; 1321 1322 #ifdef AARCH64 1323 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x 1324 const Register size = tmp; 1325 Label remaining, done; 1326 1327 sub(size, end, start); 1328 1329 #ifdef ASSERT 1330 { Label L; 1331 tst(size, wordSize - 1); 1332 b(L, eq); 1333 stop("size is not a multiple of wordSize"); 1334 bind(L); 1335 } 1336 #endif // ASSERT 1337 1338 subs(size, size, wordSize); 1339 b(remaining, le); 1340 1341 // Zero by 2 words per iteration. 1342 bind(loop); 1343 subs(size, size, 2*wordSize); 1344 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); 1345 b(loop, gt); 1346 1347 bind(remaining); 1348 b(done, ne); 1349 str(ZR, Address(ptr)); 1350 bind(done); 1351 #else 1352 mov(tmp, 0); 1353 bind(loop); 1354 cmp(ptr, end); 1355 str(tmp, Address(ptr, wordSize, post_indexed), lo); 1356 b(loop, lo); 1357 #endif // AARCH64 1358 } 1359 1360 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { 1361 #ifdef AARCH64 1362 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1363 add_rc(tmp, tmp, size_in_bytes); 1364 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1365 #else 1366 // Bump total bytes allocated by this thread 1367 Label done; 1368 1369 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1370 adds(tmp, tmp, size_in_bytes); 1371 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); 1372 b(done, cc); 1373 1374 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) 1375 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by 1376 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. 1377 Register low, high; 1378 // Select ether R0/R1 or R2/R3 1379 1380 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { 1381 low = R2; 1382 high = R3; 1383 } else { 1384 low = R0; 1385 high = R1; 1386 } 1387 push(RegisterSet(low, high)); 1388 1389 ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1390 adds(low, low, size_in_bytes); 1391 adc(high, high, 0); 1392 strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1393 1394 pop(RegisterSet(low, high)); 1395 1396 bind(done); 1397 #endif // AARCH64 1398 } 1399 1400 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { 1401 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM 1402 if (UseStackBanging) { 1403 const int page_size = os::vm_page_size(); 1404 1405 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); 1406 strb(R0, Address(tmp)); 1407 #ifdef AARCH64 1408 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { 1409 sub(tmp, tmp, page_size); 1410 strb(R0, Address(tmp)); 1411 } 1412 #else 1413 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { 1414 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1415 } 1416 #endif // AARCH64 1417 } 1418 } 1419 1420 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { 1421 if (UseStackBanging) { 1422 Label loop; 1423 1424 mov(tmp, SP); 1425 add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); 1426 #ifdef AARCH64 1427 sub(tmp, tmp, Rsize); 1428 bind(loop); 1429 subs(Rsize, Rsize, os::vm_page_size()); 1430 strb(ZR, Address(tmp, Rsize)); 1431 #else 1432 bind(loop); 1433 subs(Rsize, Rsize, 0xff0); 1434 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1435 #endif // AARCH64 1436 b(loop, hi); 1437 } 1438 } 1439 1440 void MacroAssembler::stop(const char* msg) { 1441 // This code pattern is matched in NativeIntruction::is_stop. 1442 // Update it at modifications. 1443 #ifdef COMPILER1 1444 if (CommentedAssembly) { 1445 block_comment("stop"); 1446 } 1447 #endif 1448 1449 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); 1450 InlinedString Lmsg(msg); 1451 1452 // save all registers for further inspection 1453 save_all_registers(); 1454 1455 ldr_literal(R0, Lmsg); // message 1456 mov(R1, SP); // register save area 1457 1458 #ifdef AARCH64 1459 ldr_literal(Rtemp, Ldebug); 1460 br(Rtemp); 1461 #else 1462 ldr_literal(PC, Ldebug); // call MacroAssembler::debug 1463 #endif // AARCH64 1464 1465 #if defined(COMPILER2) && defined(AARCH64) 1466 int off = offset(); 1467 #endif 1468 bind_literal(Lmsg); 1469 bind_literal(Ldebug); 1470 #if defined(COMPILER2) && defined(AARCH64) 1471 if (offset() - off == 2 * wordSize) { 1472 // no padding, so insert nop for worst-case sizing 1473 nop(); 1474 } 1475 #endif 1476 } 1477 1478 void MacroAssembler::warn(const char* msg) { 1479 #ifdef COMPILER1 1480 if (CommentedAssembly) { 1481 block_comment("warn"); 1482 } 1483 #endif 1484 1485 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); 1486 InlinedString Lmsg(msg); 1487 Label done; 1488 1489 int push_size = save_caller_save_registers(); 1490 1491 #ifdef AARCH64 1492 // TODO-AARCH64 - get rid of extra debug parameters 1493 mov(R1, LR); 1494 mov(R2, FP); 1495 add(R3, SP, push_size); 1496 #endif 1497 1498 ldr_literal(R0, Lmsg); // message 1499 ldr_literal(LR, Lwarn); // call warning 1500 1501 call(LR); 1502 1503 restore_caller_save_registers(); 1504 1505 b(done); 1506 bind_literal(Lmsg); 1507 bind_literal(Lwarn); 1508 bind(done); 1509 } 1510 1511 1512 int MacroAssembler::save_all_registers() { 1513 // This code pattern is matched in NativeIntruction::is_save_all_registers. 1514 // Update it at modifications. 1515 #ifdef AARCH64 1516 const Register tmp = Rtemp; 1517 raw_push(R30, ZR); 1518 for (int i = 28; i >= 0; i -= 2) { 1519 raw_push(as_Register(i), as_Register(i+1)); 1520 } 1521 mov_pc_to(tmp); 1522 str(tmp, Address(SP, 31*wordSize)); 1523 ldr(tmp, Address(SP, tmp->encoding()*wordSize)); 1524 return 32*wordSize; 1525 #else 1526 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); 1527 return 15*wordSize; 1528 #endif // AARCH64 1529 } 1530 1531 void MacroAssembler::restore_all_registers() { 1532 #ifdef AARCH64 1533 for (int i = 0; i <= 28; i += 2) { 1534 raw_pop(as_Register(i), as_Register(i+1)); 1535 } 1536 raw_pop(R30, ZR); 1537 #else 1538 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers 1539 add(SP, SP, wordSize); // discard saved PC 1540 #endif // AARCH64 1541 } 1542 1543 int MacroAssembler::save_caller_save_registers() { 1544 #ifdef AARCH64 1545 for (int i = 0; i <= 16; i += 2) { 1546 raw_push(as_Register(i), as_Register(i+1)); 1547 } 1548 raw_push(R18, LR); 1549 return 20*wordSize; 1550 #else 1551 #if R9_IS_SCRATCHED 1552 // Save also R10 to preserve alignment 1553 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1554 return 8*wordSize; 1555 #else 1556 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1557 return 6*wordSize; 1558 #endif 1559 #endif // AARCH64 1560 } 1561 1562 void MacroAssembler::restore_caller_save_registers() { 1563 #ifdef AARCH64 1564 raw_pop(R18, LR); 1565 for (int i = 16; i >= 0; i -= 2) { 1566 raw_pop(as_Register(i), as_Register(i+1)); 1567 } 1568 #else 1569 #if R9_IS_SCRATCHED 1570 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1571 #else 1572 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1573 #endif 1574 #endif // AARCH64 1575 } 1576 1577 void MacroAssembler::debug(const char* msg, const intx* registers) { 1578 // In order to get locks to work, we need to fake a in_VM state 1579 JavaThread* thread = JavaThread::current(); 1580 thread->set_thread_state(_thread_in_vm); 1581 1582 if (ShowMessageBoxOnError) { 1583 ttyLocker ttyl; 1584 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1585 BytecodeCounter::print(); 1586 } 1587 if (os::message_box(msg, "Execution stopped, print registers?")) { 1588 #ifdef AARCH64 1589 // saved registers: R0-R30, PC 1590 const int nregs = 32; 1591 #else 1592 // saved registers: R0-R12, LR, PC 1593 const int nregs = 15; 1594 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; 1595 #endif // AARCH64 1596 1597 for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { 1598 tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); 1599 } 1600 1601 #ifdef AARCH64 1602 tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); 1603 #endif // AARCH64 1604 1605 // derive original SP value from the address of register save area 1606 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); 1607 } 1608 BREAKPOINT; 1609 } else { 1610 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1611 } 1612 assert(false, "DEBUG MESSAGE: %s", msg); 1613 fatal("%s", msg); // returning from MacroAssembler::debug is not supported 1614 } 1615 1616 void MacroAssembler::unimplemented(const char* what) { 1617 const char* buf = NULL; 1618 { 1619 ResourceMark rm; 1620 stringStream ss; 1621 ss.print("unimplemented: %s", what); 1622 buf = code_string(ss.as_string()); 1623 } 1624 stop(buf); 1625 } 1626 1627 1628 // Implementation of FixedSizeCodeBlock 1629 1630 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : 1631 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { 1632 } 1633 1634 FixedSizeCodeBlock::~FixedSizeCodeBlock() { 1635 if (_enabled) { 1636 address curr_pc = _masm->pc(); 1637 1638 assert(_start < curr_pc, "invalid current pc"); 1639 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); 1640 1641 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; 1642 for (int i = 0; i < nops_count; i++) { 1643 _masm->nop(); 1644 } 1645 } 1646 } 1647 1648 #ifdef AARCH64 1649 1650 // Serializes memory. 1651 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM 1652 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { 1653 if (!os::is_MP()) return; 1654 1655 // TODO-AARCH64 investigate dsb vs dmb effects 1656 if (order_constraint == StoreStore) { 1657 dmb(DMB_st); 1658 } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { 1659 dmb(DMB_ld); 1660 } else { 1661 dmb(DMB_all); 1662 } 1663 } 1664 1665 #else 1666 1667 // Serializes memory. Potentially blows flags and reg. 1668 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) 1669 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. 1670 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. 1671 void MacroAssembler::membar(Membar_mask_bits order_constraint, 1672 Register tmp, 1673 bool preserve_flags, 1674 Register load_tgt) { 1675 if (!os::is_MP()) return; 1676 1677 if (order_constraint == StoreStore) { 1678 dmb(DMB_st, tmp); 1679 } else if ((order_constraint & StoreLoad) || 1680 (order_constraint & LoadLoad) || 1681 (order_constraint & StoreStore) || 1682 (load_tgt == noreg) || 1683 preserve_flags) { 1684 dmb(DMB_all, tmp); 1685 } else { 1686 // LoadStore: speculative stores reordeing is prohibited 1687 1688 // By providing an ordered load target register, we avoid an extra memory load reference 1689 Label not_taken; 1690 bind(not_taken); 1691 cmp(load_tgt, load_tgt); 1692 b(not_taken, ne); 1693 } 1694 } 1695 1696 #endif // AARCH64 1697 1698 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" 1699 // on failure, so fall-through can only mean success. 1700 // "one_shot" controls whether we loop and retry to mitigate spurious failures. 1701 // This is only needed for C2, which for some reason does not rety, 1702 // while C1/interpreter does. 1703 // TODO: measure if it makes a difference 1704 1705 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, 1706 Register base, Register tmp, Label &slow_case, 1707 bool allow_fallthrough_on_failure, bool one_shot) 1708 { 1709 1710 bool fallthrough_is_success = false; 1711 1712 // ARM Litmus Test example does prefetching here. 1713 // TODO: investigate if it helps performance 1714 1715 // The last store was to the displaced header, so to prevent 1716 // reordering we must issue a StoreStore or Release barrier before 1717 // the CAS store. 1718 1719 #ifdef AARCH64 1720 1721 Register Rscratch = tmp; 1722 Register Roop = base; 1723 Register mark = oldval; 1724 Register Rbox = newval; 1725 Label loop; 1726 1727 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1728 1729 // Instead of StoreStore here, we use store-release-exclusive below 1730 1731 bind(loop); 1732 1733 ldaxr(tmp, base); // acquire 1734 cmp(tmp, oldval); 1735 b(slow_case, ne); 1736 stlxr(tmp, newval, base); // release 1737 if (one_shot) { 1738 cmp_w(tmp, 0); 1739 } else { 1740 cbnz_w(tmp, loop); 1741 fallthrough_is_success = true; 1742 } 1743 1744 // MemBarAcquireLock would normally go here, but 1745 // we already do ldaxr+stlxr above, which has 1746 // Sequential Consistency 1747 1748 #else 1749 membar(MacroAssembler::StoreStore, noreg); 1750 1751 if (one_shot) { 1752 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1753 cmp(tmp, oldval); 1754 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1755 cmp(tmp, 0, eq); 1756 } else { 1757 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1758 } 1759 1760 // MemBarAcquireLock barrier 1761 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, 1762 // but that doesn't prevent a load or store from floating up between 1763 // the load and store in the CAS sequence, so play it safe and 1764 // do a full fence. 1765 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); 1766 #endif 1767 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1768 b(slow_case, ne); 1769 } 1770 } 1771 1772 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, 1773 Register base, Register tmp, Label &slow_case, 1774 bool allow_fallthrough_on_failure, bool one_shot) 1775 { 1776 1777 bool fallthrough_is_success = false; 1778 1779 assert_different_registers(oldval,newval,base,tmp); 1780 1781 #ifdef AARCH64 1782 Label loop; 1783 1784 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1785 1786 bind(loop); 1787 ldxr(tmp, base); 1788 cmp(tmp, oldval); 1789 b(slow_case, ne); 1790 // MemBarReleaseLock barrier 1791 stlxr(tmp, newval, base); 1792 if (one_shot) { 1793 cmp_w(tmp, 0); 1794 } else { 1795 cbnz_w(tmp, loop); 1796 fallthrough_is_success = true; 1797 } 1798 #else 1799 // MemBarReleaseLock barrier 1800 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, 1801 // but that doesn't prevent a load or store from floating down between 1802 // the load and store in the CAS sequence, so play it safe and 1803 // do a full fence. 1804 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); 1805 1806 if (one_shot) { 1807 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1808 cmp(tmp, oldval); 1809 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1810 cmp(tmp, 0, eq); 1811 } else { 1812 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1813 } 1814 #endif 1815 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1816 b(slow_case, ne); 1817 } 1818 1819 // ExitEnter 1820 // According to JSR-133 Cookbook, this should be StoreLoad, the same 1821 // barrier that follows volatile store. 1822 // TODO: Should be able to remove on armv8 if volatile loads 1823 // use the load-acquire instruction. 1824 membar(StoreLoad, noreg); 1825 } 1826 1827 #ifndef PRODUCT 1828 1829 // Preserves flags and all registers. 1830 // On SMP the updated value might not be visible to external observers without a sychronization barrier 1831 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { 1832 if (counter_addr != NULL) { 1833 InlinedAddress counter_addr_literal((address)counter_addr); 1834 Label done, retry; 1835 if (cond != al) { 1836 b(done, inverse(cond)); 1837 } 1838 1839 #ifdef AARCH64 1840 raw_push(R0, R1); 1841 raw_push(R2, ZR); 1842 1843 ldr_literal(R0, counter_addr_literal); 1844 1845 bind(retry); 1846 ldxr_w(R1, R0); 1847 add_w(R1, R1, 1); 1848 stxr_w(R2, R1, R0); 1849 cbnz_w(R2, retry); 1850 1851 raw_pop(R2, ZR); 1852 raw_pop(R0, R1); 1853 #else 1854 push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1855 ldr_literal(R0, counter_addr_literal); 1856 1857 mrs(CPSR, Rtemp); 1858 1859 bind(retry); 1860 ldr_s32(R1, Address(R0)); 1861 add(R2, R1, 1); 1862 atomic_cas_bool(R1, R2, R0, 0, R3); 1863 b(retry, ne); 1864 1865 msr(CPSR_fsxc, Rtemp); 1866 1867 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1868 #endif // AARCH64 1869 1870 b(done); 1871 bind_literal(counter_addr_literal); 1872 1873 bind(done); 1874 } 1875 } 1876 1877 #endif // !PRODUCT 1878 1879 1880 // Building block for CAS cases of biased locking: makes CAS and records statistics. 1881 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. 1882 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, 1883 Register tmp, Label& slow_case, int* counter_addr) { 1884 1885 cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); 1886 #ifdef ASSERT 1887 breakpoint(ne); // Fallthrough only on success 1888 #endif 1889 #ifndef PRODUCT 1890 if (counter_addr != NULL) { 1891 cond_atomic_inc32(al, counter_addr); 1892 } 1893 #endif // !PRODUCT 1894 } 1895 1896 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, 1897 bool swap_reg_contains_mark, 1898 Register tmp2, 1899 Label& done, Label& slow_case, 1900 BiasedLockingCounters* counters) { 1901 // obj_reg must be preserved (at least) if the bias locking fails 1902 // tmp_reg is a temporary register 1903 // swap_reg was used as a temporary but contained a value 1904 // that was used afterwards in some call pathes. Callers 1905 // have been fixed so that swap_reg no longer needs to be 1906 // saved. 1907 // Rtemp in no longer scratched 1908 1909 assert(UseBiasedLocking, "why call this otherwise?"); 1910 assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); 1911 guarantee(swap_reg!=tmp_reg, "invariant"); 1912 assert(tmp_reg != noreg, "must supply tmp_reg"); 1913 1914 #ifndef PRODUCT 1915 if (PrintBiasedLockingStatistics && (counters == NULL)) { 1916 counters = BiasedLocking::counters(); 1917 } 1918 #endif 1919 1920 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 1921 Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); 1922 1923 // Biased locking 1924 // See whether the lock is currently biased toward our thread and 1925 // whether the epoch is still valid 1926 // Note that the runtime guarantees sufficient alignment of JavaThread 1927 // pointers to allow age to be placed into low bits 1928 // First check to see whether biasing is even enabled for this object 1929 Label cas_label; 1930 1931 // The null check applies to the mark loading, if we need to load it. 1932 // If the mark has already been loaded in swap_reg then it has already 1933 // been performed and the offset is irrelevant. 1934 int null_check_offset = offset(); 1935 if (!swap_reg_contains_mark) { 1936 ldr(swap_reg, mark_addr); 1937 } 1938 1939 // On MP platform loads could return 'stale' values in some cases. 1940 // That is acceptable since either CAS or slow case path is taken in the worst case. 1941 1942 andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1943 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 1944 1945 b(cas_label, ne); 1946 1947 // The bias pattern is present in the object's header. Need to check 1948 // whether the bias owner and the epoch are both still current. 1949 load_klass(tmp_reg, obj_reg); 1950 ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 1951 orr(tmp_reg, tmp_reg, Rthread); 1952 eor(tmp_reg, tmp_reg, swap_reg); 1953 1954 #ifdef AARCH64 1955 ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); 1956 #else 1957 bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); 1958 #endif // AARCH64 1959 1960 #ifndef PRODUCT 1961 if (counters != NULL) { 1962 cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); 1963 } 1964 #endif // !PRODUCT 1965 1966 b(done, eq); 1967 1968 Label try_revoke_bias; 1969 Label try_rebias; 1970 1971 // At this point we know that the header has the bias pattern and 1972 // that we are not the bias owner in the current epoch. We need to 1973 // figure out more details about the state of the header in order to 1974 // know what operations can be legally performed on the object's 1975 // header. 1976 1977 // If the low three bits in the xor result aren't clear, that means 1978 // the prototype header is no longer biased and we have to revoke 1979 // the bias on this object. 1980 tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1981 b(try_revoke_bias, ne); 1982 1983 // Biasing is still enabled for this data type. See whether the 1984 // epoch of the current bias is still valid, meaning that the epoch 1985 // bits of the mark word are equal to the epoch bits of the 1986 // prototype header. (Note that the prototype header's epoch bits 1987 // only change at a safepoint.) If not, attempt to rebias the object 1988 // toward the current thread. Note that we must be absolutely sure 1989 // that the current epoch is invalid in order to do this because 1990 // otherwise the manipulations it performs on the mark word are 1991 // illegal. 1992 tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); 1993 b(try_rebias, ne); 1994 1995 // tmp_reg has the age, epoch and pattern bits cleared 1996 // The remaining (owner) bits are (Thread ^ current_owner) 1997 1998 // The epoch of the current bias is still valid but we know nothing 1999 // about the owner; it might be set or it might be clear. Try to 2000 // acquire the bias of the object using an atomic operation. If this 2001 // fails we will go in to the runtime to revoke the object's bias. 2002 // Note that we first construct the presumed unbiased header so we 2003 // don't accidentally blow away another thread's valid bias. 2004 2005 // Note that we know the owner is not ourself. Hence, success can 2006 // only happen when the owner bits is 0 2007 2008 #ifdef AARCH64 2009 // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has 2010 // cleared bit in the middle (cms bit). So it is loaded with separate instruction. 2011 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2012 andr(swap_reg, swap_reg, tmp2); 2013 #else 2014 // until the assembler can be made smarter, we need to make some assumptions about the values 2015 // so we can optimize this: 2016 assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); 2017 2018 mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); 2019 mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) 2020 #endif // AARCH64 2021 2022 orr(tmp_reg, swap_reg, Rthread); // new mark 2023 2024 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2025 (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); 2026 2027 // If the biasing toward our thread failed, this means that 2028 // another thread succeeded in biasing it toward itself and we 2029 // need to revoke that bias. The revocation will occur in the 2030 // interpreter runtime in the slow case. 2031 2032 b(done); 2033 2034 bind(try_rebias); 2035 2036 // At this point we know the epoch has expired, meaning that the 2037 // current "bias owner", if any, is actually invalid. Under these 2038 // circumstances _only_, we are allowed to use the current header's 2039 // value as the comparison value when doing the cas to acquire the 2040 // bias in the current epoch. In other words, we allow transfer of 2041 // the bias from one thread to another directly in this situation. 2042 2043 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2044 2045 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2046 2047 // owner bits 'random'. Set them to Rthread. 2048 #ifdef AARCH64 2049 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2050 andr(tmp_reg, tmp_reg, tmp2); 2051 #else 2052 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2053 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2054 #endif // AARCH64 2055 2056 orr(tmp_reg, tmp_reg, Rthread); // new mark 2057 2058 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2059 (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); 2060 2061 // If the biasing toward our thread failed, then another thread 2062 // succeeded in biasing it toward itself and we need to revoke that 2063 // bias. The revocation will occur in the runtime in the slow case. 2064 2065 b(done); 2066 2067 bind(try_revoke_bias); 2068 2069 // The prototype mark in the klass doesn't have the bias bit set any 2070 // more, indicating that objects of this data type are not supposed 2071 // to be biased any more. We are going to try to reset the mark of 2072 // this object to the prototype value and fall through to the 2073 // CAS-based locking scheme. Note that if our CAS fails, it means 2074 // that another thread raced us for the privilege of revoking the 2075 // bias of this particular object, so it's okay to continue in the 2076 // normal locking code. 2077 2078 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2079 2080 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2081 2082 // owner bits 'random'. Clear them 2083 #ifdef AARCH64 2084 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2085 andr(tmp_reg, tmp_reg, tmp2); 2086 #else 2087 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2088 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2089 #endif // AARCH64 2090 2091 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, 2092 (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); 2093 2094 // Fall through to the normal CAS-based lock, because no matter what 2095 // the result of the above CAS, some thread must have succeeded in 2096 // removing the bias bit from the object's header. 2097 2098 bind(cas_label); 2099 2100 return null_check_offset; 2101 } 2102 2103 2104 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { 2105 assert(UseBiasedLocking, "why call this otherwise?"); 2106 2107 // Check for biased locking unlock case, which is a no-op 2108 // Note: we do not have to check the thread ID for two reasons. 2109 // First, the interpreter checks for IllegalMonitorStateException at 2110 // a higher level. Second, if the bias was revoked while we held the 2111 // lock, the object could not be rebiased toward another thread, so 2112 // the bias bit would be clear. 2113 ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2114 2115 andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2116 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 2117 b(done, eq); 2118 } 2119 2120 2121 void MacroAssembler::resolve_jobject(Register value, 2122 Register tmp1, 2123 Register tmp2) { 2124 assert_different_registers(value, tmp1, tmp2); 2125 Label done, not_weak; 2126 cbz(value, done); // Use NULL as-is. 2127 STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); 2128 tbz(value, 0, not_weak); // Test for jweak tag. 2129 2130 // Resolve jweak. 2131 access_load_at(T_OBJECT, IN_ROOT | ON_PHANTOM_OOP_REF, 2132 Address(value, -JNIHandles::weak_tag_value), value, tmp1, tmp2, noreg); 2133 b(done); 2134 bind(not_weak); 2135 // Resolve (untagged) jobject. 2136 access_load_at(T_OBJECT, IN_ROOT | IN_CONCURRENT_ROOT, 2137 Address(value, 0), value, tmp1, tmp2, noreg); 2138 verify_oop(value); 2139 bind(done); 2140 } 2141 2142 2143 ////////////////////////////////////////////////////////////////////////////////// 2144 2145 #ifdef AARCH64 2146 2147 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2148 switch (size_in_bytes) { 2149 case 8: ldr(dst, src); break; 2150 case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; 2151 case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; 2152 case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; 2153 default: ShouldNotReachHere(); 2154 } 2155 } 2156 2157 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 2158 switch (size_in_bytes) { 2159 case 8: str(src, dst); break; 2160 case 4: str_32(src, dst); break; 2161 case 2: strh(src, dst); break; 2162 case 1: strb(src, dst); break; 2163 default: ShouldNotReachHere(); 2164 } 2165 } 2166 2167 #else 2168 2169 void MacroAssembler::load_sized_value(Register dst, Address src, 2170 size_t size_in_bytes, bool is_signed, AsmCondition cond) { 2171 switch (size_in_bytes) { 2172 case 4: ldr(dst, src, cond); break; 2173 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; 2174 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; 2175 default: ShouldNotReachHere(); 2176 } 2177 } 2178 2179 2180 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { 2181 switch (size_in_bytes) { 2182 case 4: str(src, dst, cond); break; 2183 case 2: strh(src, dst, cond); break; 2184 case 1: strb(src, dst, cond); break; 2185 default: ShouldNotReachHere(); 2186 } 2187 } 2188 #endif // AARCH64 2189 2190 // Look up the method for a megamorphic invokeinterface call. 2191 // The target method is determined by <Rinterf, Rindex>. 2192 // The receiver klass is in Rklass. 2193 // On success, the result will be in method_result, and execution falls through. 2194 // On failure, execution transfers to the given label. 2195 void MacroAssembler::lookup_interface_method(Register Rklass, 2196 Register Rintf, 2197 RegisterOrConstant itable_index, 2198 Register method_result, 2199 Register Rscan, 2200 Register Rtmp, 2201 Label& L_no_such_interface) { 2202 2203 assert_different_registers(Rklass, Rintf, Rscan, Rtmp); 2204 2205 const int entry_size = itableOffsetEntry::size() * HeapWordSize; 2206 assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience"); 2207 2208 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 2209 const int base = in_bytes(Klass::vtable_start_offset()); 2210 const int scale = exact_log2(vtableEntry::size_in_bytes()); 2211 ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable 2212 add(Rscan, Rklass, base); 2213 add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale)); 2214 2215 // Search through the itable for an interface equal to incoming Rintf 2216 // itable looks like [intface][offset][intface][offset][intface][offset] 2217 2218 Label loop; 2219 bind(loop); 2220 ldr(Rtmp, Address(Rscan, entry_size, post_indexed)); 2221 #ifdef AARCH64 2222 Label found; 2223 cmp(Rtmp, Rintf); 2224 b(found, eq); 2225 cbnz(Rtmp, loop); 2226 #else 2227 cmp(Rtmp, Rintf); // set ZF and CF if interface is found 2228 cmn(Rtmp, 0, ne); // check if tmp == 0 and clear CF if it is 2229 b(loop, ne); 2230 #endif // AARCH64 2231 2232 #ifdef AARCH64 2233 b(L_no_such_interface); 2234 bind(found); 2235 #else 2236 // CF == 0 means we reached the end of itable without finding icklass 2237 b(L_no_such_interface, cc); 2238 #endif // !AARCH64 2239 2240 if (method_result != noreg) { 2241 // Interface found at previous position of Rscan, now load the method 2242 ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size)); 2243 if (itable_index.is_register()) { 2244 add(Rtmp, Rtmp, Rklass); // Add offset to Klass* 2245 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); 2246 assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); 2247 ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register())); 2248 } else { 2249 int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() + 2250 itableMethodEntry::method_offset_in_bytes(); 2251 add_slow(method_result, Rklass, method_offset); 2252 ldr(method_result, Address(method_result, Rtmp)); 2253 } 2254 } 2255 } 2256 2257 #ifdef COMPILER2 2258 // TODO: 8 bytes at a time? pre-fetch? 2259 // Compare char[] arrays aligned to 4 bytes. 2260 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, 2261 Register limit, Register result, 2262 Register chr1, Register chr2, Label& Ldone) { 2263 Label Lvector, Lloop; 2264 2265 // Note: limit contains number of bytes (2*char_elements) != 0. 2266 tst(limit, 0x2); // trailing character ? 2267 b(Lvector, eq); 2268 2269 // compare the trailing char 2270 sub(limit, limit, sizeof(jchar)); 2271 ldrh(chr1, Address(ary1, limit)); 2272 ldrh(chr2, Address(ary2, limit)); 2273 cmp(chr1, chr2); 2274 mov(result, 0, ne); // not equal 2275 b(Ldone, ne); 2276 2277 // only one char ? 2278 tst(limit, limit); 2279 mov(result, 1, eq); 2280 b(Ldone, eq); 2281 2282 // word by word compare, dont't need alignment check 2283 bind(Lvector); 2284 2285 // Shift ary1 and ary2 to the end of the arrays, negate limit 2286 add(ary1, limit, ary1); 2287 add(ary2, limit, ary2); 2288 neg(limit, limit); 2289 2290 bind(Lloop); 2291 ldr_u32(chr1, Address(ary1, limit)); 2292 ldr_u32(chr2, Address(ary2, limit)); 2293 cmp_32(chr1, chr2); 2294 mov(result, 0, ne); // not equal 2295 b(Ldone, ne); 2296 adds(limit, limit, 2*sizeof(jchar)); 2297 b(Lloop, ne); 2298 2299 // Caller should set it: 2300 // mov(result_reg, 1); //equal 2301 } 2302 #endif 2303 2304 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { 2305 mov_slow(tmpreg1, counter_addr); 2306 ldr_s32(tmpreg2, tmpreg1); 2307 add_32(tmpreg2, tmpreg2, 1); 2308 str_32(tmpreg2, tmpreg1); 2309 } 2310 2311 void MacroAssembler::floating_cmp(Register dst) { 2312 #ifdef AARCH64 2313 NOT_TESTED(); 2314 cset(dst, gt); // 1 if '>', else 0 2315 csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 2316 #else 2317 vmrs(dst, FPSCR); 2318 orr(dst, dst, 0x08000000); 2319 eor(dst, dst, AsmOperand(dst, lsl, 3)); 2320 mov(dst, AsmOperand(dst, asr, 30)); 2321 #endif 2322 } 2323 2324 void MacroAssembler::restore_default_fp_mode() { 2325 #ifdef AARCH64 2326 msr(SysReg_FPCR, ZR); 2327 #else 2328 #ifndef __SOFTFP__ 2329 // Round to Near mode, IEEE compatible, masked exceptions 2330 mov(Rtemp, 0); 2331 vmsr(FPSCR, Rtemp); 2332 #endif // !__SOFTFP__ 2333 #endif // AARCH64 2334 } 2335 2336 #ifndef AARCH64 2337 // 24-bit word range == 26-bit byte range 2338 bool check26(int offset) { 2339 // this could be simplified, but it mimics encoding and decoding 2340 // an actual branch insrtuction 2341 int off1 = offset << 6 >> 8; 2342 int encoded = off1 & ((1<<24)-1); 2343 int decoded = encoded << 8 >> 6; 2344 return offset == decoded; 2345 } 2346 #endif // !AARCH64 2347 2348 // Perform some slight adjustments so the default 32MB code cache 2349 // is fully reachable. 2350 static inline address first_cache_address() { 2351 return CodeCache::low_bound() + sizeof(HeapBlock::Header); 2352 } 2353 static inline address last_cache_address() { 2354 return CodeCache::high_bound() - Assembler::InstructionSize; 2355 } 2356 2357 #ifdef AARCH64 2358 // Can we reach target using ADRP? 2359 bool MacroAssembler::page_reachable_from_cache(address target) { 2360 intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; 2361 intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; 2362 intptr_t addr = (intptr_t)target & ~0xfff; 2363 2364 intptr_t loffset = addr - cl; 2365 intptr_t hoffset = addr - ch; 2366 return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); 2367 } 2368 #endif 2369 2370 // Can we reach target using unconditional branch or call from anywhere 2371 // in the code cache (because code can be relocated)? 2372 bool MacroAssembler::_reachable_from_cache(address target) { 2373 #ifdef __thumb__ 2374 if ((1 & (intptr_t)target) != 0) { 2375 // Return false to avoid 'b' if we need switching to THUMB mode. 2376 return false; 2377 } 2378 #endif 2379 2380 address cl = first_cache_address(); 2381 address ch = last_cache_address(); 2382 2383 if (ForceUnreachable) { 2384 // Only addresses from CodeCache can be treated as reachable. 2385 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { 2386 return false; 2387 } 2388 } 2389 2390 intptr_t loffset = (intptr_t)target - (intptr_t)cl; 2391 intptr_t hoffset = (intptr_t)target - (intptr_t)ch; 2392 2393 #ifdef AARCH64 2394 return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); 2395 #else 2396 return check26(loffset - 8) && check26(hoffset - 8); 2397 #endif 2398 } 2399 2400 bool MacroAssembler::reachable_from_cache(address target) { 2401 assert(CodeCache::contains(pc()), "not supported"); 2402 return _reachable_from_cache(target); 2403 } 2404 2405 // Can we reach the entire code cache from anywhere else in the code cache? 2406 bool MacroAssembler::_cache_fully_reachable() { 2407 address cl = first_cache_address(); 2408 address ch = last_cache_address(); 2409 return _reachable_from_cache(cl) && _reachable_from_cache(ch); 2410 } 2411 2412 bool MacroAssembler::cache_fully_reachable() { 2413 assert(CodeCache::contains(pc()), "not supported"); 2414 return _cache_fully_reachable(); 2415 } 2416 2417 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2418 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2419 if (reachable_from_cache(target)) { 2420 relocate(rtype); 2421 b(target NOT_AARCH64_ARG(cond)); 2422 return; 2423 } 2424 2425 // Note: relocate is not needed for the code below, 2426 // encoding targets in absolute format. 2427 if (ignore_non_patchable_relocations()) { 2428 rtype = relocInfo::none; 2429 } 2430 2431 #ifdef AARCH64 2432 assert (scratch != noreg, "should be specified"); 2433 InlinedAddress address_literal(target, rtype); 2434 ldr_literal(scratch, address_literal); 2435 br(scratch); 2436 int off = offset(); 2437 bind_literal(address_literal); 2438 #ifdef COMPILER2 2439 if (offset() - off == wordSize) { 2440 // no padding, so insert nop for worst-case sizing 2441 nop(); 2442 } 2443 #endif 2444 #else 2445 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { 2446 // Note: this version cannot be (atomically) patched 2447 mov_slow(scratch, (intptr_t)target, cond); 2448 bx(scratch, cond); 2449 } else { 2450 Label skip; 2451 InlinedAddress address_literal(target); 2452 if (cond != al) { 2453 b(skip, inverse(cond)); 2454 } 2455 relocate(rtype); 2456 ldr_literal(PC, address_literal); 2457 bind_literal(address_literal); 2458 bind(skip); 2459 } 2460 #endif // AARCH64 2461 } 2462 2463 // Similar to jump except that: 2464 // - near calls are valid only if any destination in the cache is near 2465 // - no movt/movw (not atomically patchable) 2466 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2467 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2468 if (cache_fully_reachable()) { 2469 // Note: this assumes that all possible targets (the initial one 2470 // and the addressed patched to) are all in the code cache. 2471 assert(CodeCache::contains(target), "target might be too far"); 2472 relocate(rtype); 2473 b(target NOT_AARCH64_ARG(cond)); 2474 return; 2475 } 2476 2477 // Discard the relocation information if not needed for CacheCompiledCode 2478 // since the next encodings are all in absolute format. 2479 if (ignore_non_patchable_relocations()) { 2480 rtype = relocInfo::none; 2481 } 2482 2483 #ifdef AARCH64 2484 assert (scratch != noreg, "should be specified"); 2485 InlinedAddress address_literal(target); 2486 relocate(rtype); 2487 ldr_literal(scratch, address_literal); 2488 br(scratch); 2489 int off = offset(); 2490 bind_literal(address_literal); 2491 #ifdef COMPILER2 2492 if (offset() - off == wordSize) { 2493 // no padding, so insert nop for worst-case sizing 2494 nop(); 2495 } 2496 #endif 2497 #else 2498 { 2499 Label skip; 2500 InlinedAddress address_literal(target); 2501 if (cond != al) { 2502 b(skip, inverse(cond)); 2503 } 2504 relocate(rtype); 2505 ldr_literal(PC, address_literal); 2506 bind_literal(address_literal); 2507 bind(skip); 2508 } 2509 #endif // AARCH64 2510 } 2511 2512 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { 2513 Register scratch = LR; 2514 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); 2515 if (reachable_from_cache(target)) { 2516 relocate(rspec); 2517 bl(target NOT_AARCH64_ARG(cond)); 2518 return; 2519 } 2520 2521 // Note: relocate is not needed for the code below, 2522 // encoding targets in absolute format. 2523 if (ignore_non_patchable_relocations()) { 2524 // This assumes the information was needed only for relocating the code. 2525 rspec = RelocationHolder::none; 2526 } 2527 2528 #ifndef AARCH64 2529 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { 2530 // Note: this version cannot be (atomically) patched 2531 mov_slow(scratch, (intptr_t)target, cond); 2532 blx(scratch, cond); 2533 return; 2534 } 2535 #endif 2536 2537 { 2538 Label ret_addr; 2539 #ifndef AARCH64 2540 if (cond != al) { 2541 b(ret_addr, inverse(cond)); 2542 } 2543 #endif 2544 2545 2546 #ifdef AARCH64 2547 // TODO-AARCH64: make more optimal implementation 2548 // [ Keep in sync with MacroAssembler::call_size ] 2549 assert(rspec.type() == relocInfo::none, "call reloc not implemented"); 2550 mov_slow(scratch, target); 2551 blr(scratch); 2552 #else 2553 InlinedAddress address_literal(target); 2554 relocate(rspec); 2555 adr(LR, ret_addr); 2556 ldr_literal(PC, address_literal); 2557 2558 bind_literal(address_literal); 2559 bind(ret_addr); 2560 #endif 2561 } 2562 } 2563 2564 #if defined(AARCH64) && defined(COMPILER2) 2565 int MacroAssembler::call_size(address target, bool far, bool patchable) { 2566 // FIXME: mov_slow is variable-length 2567 if (!far) return 1; // bl 2568 if (patchable) return 2; // ldr; blr 2569 return instr_count_for_mov_slow((intptr_t)target) + 1; 2570 } 2571 #endif 2572 2573 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { 2574 assert(rspec.type() == relocInfo::static_call_type || 2575 rspec.type() == relocInfo::none || 2576 rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); 2577 2578 // Always generate the relocation information, needed for patching 2579 relocate(rspec); // used by NativeCall::is_call_before() 2580 if (cache_fully_reachable()) { 2581 // Note: this assumes that all possible targets (the initial one 2582 // and the addresses patched to) are all in the code cache. 2583 assert(CodeCache::contains(target), "target might be too far"); 2584 bl(target); 2585 } else { 2586 #if defined(AARCH64) && defined(COMPILER2) 2587 if (c2) { 2588 // return address needs to match call_size(). 2589 // no need to trash Rtemp 2590 int off = offset(); 2591 Label skip_literal; 2592 InlinedAddress address_literal(target); 2593 ldr_literal(LR, address_literal); 2594 blr(LR); 2595 int ret_addr_offset = offset(); 2596 assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); 2597 b(skip_literal); 2598 int off2 = offset(); 2599 bind_literal(address_literal); 2600 if (offset() - off2 == wordSize) { 2601 // no padding, so insert nop for worst-case sizing 2602 nop(); 2603 } 2604 bind(skip_literal); 2605 return ret_addr_offset; 2606 } 2607 #endif 2608 Label ret_addr; 2609 InlinedAddress address_literal(target); 2610 #ifdef AARCH64 2611 ldr_literal(Rtemp, address_literal); 2612 adr(LR, ret_addr); 2613 br(Rtemp); 2614 #else 2615 adr(LR, ret_addr); 2616 ldr_literal(PC, address_literal); 2617 #endif 2618 bind_literal(address_literal); 2619 bind(ret_addr); 2620 } 2621 return offset(); 2622 } 2623 2624 // ((OopHandle)result).resolve(); 2625 void MacroAssembler::resolve_oop_handle(Register result) { 2626 // OopHandle::resolve is an indirection. 2627 ldr(result, Address(result, 0)); 2628 } 2629 2630 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { 2631 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2632 ldr(tmp, Address(method, Method::const_offset())); 2633 ldr(tmp, Address(tmp, ConstMethod::constants_offset())); 2634 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); 2635 ldr(mirror, Address(tmp, mirror_offset)); 2636 resolve_oop_handle(mirror); 2637 } 2638 2639 2640 /////////////////////////////////////////////////////////////////////////////// 2641 2642 // Compressed pointers 2643 2644 #ifdef AARCH64 2645 2646 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { 2647 if (UseCompressedClassPointers) { 2648 ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2649 decode_klass_not_null(dst_klass); 2650 } else { 2651 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2652 } 2653 } 2654 2655 #else 2656 2657 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { 2658 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); 2659 } 2660 2661 #endif // AARCH64 2662 2663 // Blows src_klass. 2664 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { 2665 #ifdef AARCH64 2666 if (UseCompressedClassPointers) { 2667 assert(src_klass != dst_oop, "not enough registers"); 2668 encode_klass_not_null(src_klass); 2669 str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2670 return; 2671 } 2672 #endif // AARCH64 2673 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2674 } 2675 2676 #ifdef AARCH64 2677 2678 void MacroAssembler::store_klass_gap(Register dst) { 2679 if (UseCompressedClassPointers) { 2680 str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2681 } 2682 } 2683 2684 #endif // AARCH64 2685 2686 2687 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { 2688 access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3); 2689 } 2690 2691 // Blows src and flags. 2692 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { 2693 access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false); 2694 } 2695 2696 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { 2697 access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true); 2698 } 2699 2700 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2701 Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) { 2702 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2703 bool as_raw = (decorators & AS_RAW) != 0; 2704 if (as_raw) { 2705 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); 2706 } else { 2707 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); 2708 } 2709 } 2710 2711 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2712 Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) { 2713 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2714 bool as_raw = (decorators & AS_RAW) != 0; 2715 if (as_raw) { 2716 bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null); 2717 } else { 2718 bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null); 2719 } 2720 } 2721 2722 2723 #ifdef AARCH64 2724 2725 // Algorithm must match oop.inline.hpp encode_heap_oop. 2726 void MacroAssembler::encode_heap_oop(Register dst, Register src) { 2727 // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. 2728 // Update it at modifications. 2729 assert (UseCompressedOops, "must be compressed"); 2730 assert (Universe::heap() != NULL, "java heap should be initialized"); 2731 #ifdef ASSERT 2732 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2733 #endif 2734 verify_oop(src); 2735 if (Universe::narrow_oop_base() == NULL) { 2736 if (Universe::narrow_oop_shift() != 0) { 2737 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2738 _lsr(dst, src, Universe::narrow_oop_shift()); 2739 } else if (dst != src) { 2740 mov(dst, src); 2741 } 2742 } else { 2743 tst(src, src); 2744 csel(dst, Rheap_base, src, eq); 2745 sub(dst, dst, Rheap_base); 2746 if (Universe::narrow_oop_shift() != 0) { 2747 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2748 _lsr(dst, dst, Universe::narrow_oop_shift()); 2749 } 2750 } 2751 } 2752 2753 // Same algorithm as oop.inline.hpp decode_heap_oop. 2754 void MacroAssembler::decode_heap_oop(Register dst, Register src) { 2755 #ifdef ASSERT 2756 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2757 #endif 2758 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2759 if (Universe::narrow_oop_base() != NULL) { 2760 tst(src, src); 2761 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2762 csel(dst, dst, ZR, ne); 2763 } else { 2764 _lsl(dst, src, Universe::narrow_oop_shift()); 2765 } 2766 verify_oop(dst); 2767 } 2768 2769 #ifdef COMPILER2 2770 // Algorithm must match oop.inline.hpp encode_heap_oop. 2771 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule 2772 // must be changed. 2773 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2774 assert (UseCompressedOops, "must be compressed"); 2775 assert (Universe::heap() != NULL, "java heap should be initialized"); 2776 #ifdef ASSERT 2777 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2778 #endif 2779 verify_oop(src); 2780 if (Universe::narrow_oop_base() == NULL) { 2781 if (Universe::narrow_oop_shift() != 0) { 2782 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2783 _lsr(dst, src, Universe::narrow_oop_shift()); 2784 } else if (dst != src) { 2785 mov(dst, src); 2786 } 2787 } else { 2788 sub(dst, src, Rheap_base); 2789 if (Universe::narrow_oop_shift() != 0) { 2790 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2791 _lsr(dst, dst, Universe::narrow_oop_shift()); 2792 } 2793 } 2794 } 2795 2796 // Same algorithm as oops.inline.hpp decode_heap_oop. 2797 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule 2798 // must be changed. 2799 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2800 #ifdef ASSERT 2801 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2802 #endif 2803 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2804 if (Universe::narrow_oop_base() != NULL) { 2805 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2806 } else { 2807 _lsl(dst, src, Universe::narrow_oop_shift()); 2808 } 2809 verify_oop(dst); 2810 } 2811 2812 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 2813 assert(UseCompressedClassPointers, "should only be used for compressed header"); 2814 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 2815 int klass_index = oop_recorder()->find_index(k); 2816 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 2817 2818 // Relocation with special format (see relocInfo_arm.hpp). 2819 relocate(rspec); 2820 narrowKlass encoded_k = Klass::encode_klass(k); 2821 movz(dst, encoded_k & 0xffff, 0); 2822 movk(dst, (encoded_k >> 16) & 0xffff, 16); 2823 } 2824 2825 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 2826 assert(UseCompressedOops, "should only be used for compressed header"); 2827 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 2828 int oop_index = oop_recorder()->find_index(obj); 2829 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2830 2831 relocate(rspec); 2832 movz(dst, 0xffff, 0); 2833 movk(dst, 0xffff, 16); 2834 } 2835 2836 #endif // COMPILER2 2837 // Must preserve condition codes, or C2 encodeKlass_not_null rule 2838 // must be changed. 2839 void MacroAssembler::encode_klass_not_null(Register r) { 2840 if (Universe::narrow_klass_base() != NULL) { 2841 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 2842 assert(r != Rheap_base, "Encoding a klass in Rheap_base"); 2843 mov_slow(Rheap_base, Universe::narrow_klass_base()); 2844 sub(r, r, Rheap_base); 2845 } 2846 if (Universe::narrow_klass_shift() != 0) { 2847 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2848 _lsr(r, r, Universe::narrow_klass_shift()); 2849 } 2850 if (Universe::narrow_klass_base() != NULL) { 2851 reinit_heapbase(); 2852 } 2853 } 2854 2855 // Must preserve condition codes, or C2 encodeKlass_not_null rule 2856 // must be changed. 2857 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 2858 if (dst == src) { 2859 encode_klass_not_null(src); 2860 return; 2861 } 2862 if (Universe::narrow_klass_base() != NULL) { 2863 mov_slow(dst, (int64_t)Universe::narrow_klass_base()); 2864 sub(dst, src, dst); 2865 if (Universe::narrow_klass_shift() != 0) { 2866 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2867 _lsr(dst, dst, Universe::narrow_klass_shift()); 2868 } 2869 } else { 2870 if (Universe::narrow_klass_shift() != 0) { 2871 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2872 _lsr(dst, src, Universe::narrow_klass_shift()); 2873 } else { 2874 mov(dst, src); 2875 } 2876 } 2877 } 2878 2879 // Function instr_count_for_decode_klass_not_null() counts the instructions 2880 // generated by decode_klass_not_null(register r) and reinit_heapbase(), 2881 // when (Universe::heap() != NULL). Hence, if the instructions they 2882 // generate change, then this method needs to be updated. 2883 int MacroAssembler::instr_count_for_decode_klass_not_null() { 2884 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 2885 assert(Universe::heap() != NULL, "java heap should be initialized"); 2886 if (Universe::narrow_klass_base() != NULL) { 2887 return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow 2888 1 + // add 2889 instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow 2890 } else { 2891 if (Universe::narrow_klass_shift() != 0) { 2892 return 1; 2893 } 2894 } 2895 return 0; 2896 } 2897 2898 // Must preserve condition codes, or C2 decodeKlass_not_null rule 2899 // must be changed. 2900 void MacroAssembler::decode_klass_not_null(Register r) { 2901 int off = offset(); 2902 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2903 assert(Universe::heap() != NULL, "java heap should be initialized"); 2904 assert(r != Rheap_base, "Decoding a klass in Rheap_base"); 2905 // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. 2906 // Also do not verify_oop as this is called by verify_oop. 2907 if (Universe::narrow_klass_base() != NULL) { 2908 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 2909 mov_slow(Rheap_base, Universe::narrow_klass_base()); 2910 add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); 2911 reinit_heapbase(); 2912 } else { 2913 if (Universe::narrow_klass_shift() != 0) { 2914 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2915 _lsl(r, r, Universe::narrow_klass_shift()); 2916 } 2917 } 2918 assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); 2919 } 2920 2921 // Must preserve condition codes, or C2 decodeKlass_not_null rule 2922 // must be changed. 2923 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 2924 if (src == dst) { 2925 decode_klass_not_null(src); 2926 return; 2927 } 2928 2929 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2930 assert(Universe::heap() != NULL, "java heap should be initialized"); 2931 assert(src != Rheap_base, "Decoding a klass in Rheap_base"); 2932 assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); 2933 // Also do not verify_oop as this is called by verify_oop. 2934 if (Universe::narrow_klass_base() != NULL) { 2935 mov_slow(dst, Universe::narrow_klass_base()); 2936 add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); 2937 } else { 2938 _lsl(dst, src, Universe::narrow_klass_shift()); 2939 } 2940 } 2941 2942 2943 void MacroAssembler::reinit_heapbase() { 2944 if (UseCompressedOops || UseCompressedClassPointers) { 2945 if (Universe::heap() != NULL) { 2946 mov_slow(Rheap_base, Universe::narrow_ptrs_base()); 2947 } else { 2948 ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); 2949 } 2950 } 2951 } 2952 2953 #ifdef ASSERT 2954 void MacroAssembler::verify_heapbase(const char* msg) { 2955 // This code pattern is matched in NativeIntruction::skip_verify_heapbase. 2956 // Update it at modifications. 2957 assert (UseCompressedOops, "should be compressed"); 2958 assert (Universe::heap() != NULL, "java heap should be initialized"); 2959 if (CheckCompressedOops) { 2960 Label ok; 2961 str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 2962 raw_push(Rtemp, ZR); 2963 mrs(Rtemp, Assembler::SysReg_NZCV); 2964 str(Rtemp, Address(SP, 1 * wordSize)); 2965 mov_slow(Rtemp, Universe::narrow_ptrs_base()); 2966 cmp(Rheap_base, Rtemp); 2967 b(ok, eq); 2968 stop(msg); 2969 bind(ok); 2970 ldr(Rtemp, Address(SP, 1 * wordSize)); 2971 msr(Assembler::SysReg_NZCV, Rtemp); 2972 raw_pop(Rtemp, ZR); 2973 str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 2974 } 2975 } 2976 #endif // ASSERT 2977 2978 #endif // AARCH64 2979 2980 #ifdef COMPILER2 2981 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 2982 { 2983 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 2984 2985 Register Rmark = Rscratch2; 2986 2987 assert(Roop != Rscratch, ""); 2988 assert(Roop != Rmark, ""); 2989 assert(Rbox != Rscratch, ""); 2990 assert(Rbox != Rmark, ""); 2991 2992 Label fast_lock, done; 2993 2994 if (UseBiasedLocking && !UseOptoBiasInlining) { 2995 Label failed; 2996 #ifdef AARCH64 2997 biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); 2998 #else 2999 biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); 3000 #endif 3001 bind(failed); 3002 } 3003 3004 ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); 3005 tst(Rmark, markOopDesc::unlocked_value); 3006 b(fast_lock, ne); 3007 3008 // Check for recursive lock 3009 // See comments in InterpreterMacroAssembler::lock_object for 3010 // explanations on the fast recursive locking check. 3011 #ifdef AARCH64 3012 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); 3013 Assembler::LogicalImmediate imm(mask, false); 3014 mov(Rscratch, SP); 3015 sub(Rscratch, Rmark, Rscratch); 3016 ands(Rscratch, Rscratch, imm); 3017 // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107) 3018 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3019 b(done); 3020 3021 #else 3022 // -1- test low 2 bits 3023 movs(Rscratch, AsmOperand(Rmark, lsl, 30)); 3024 // -2- test (hdr - SP) if the low two bits are 0 3025 sub(Rscratch, Rmark, SP, eq); 3026 movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); 3027 // If still 'eq' then recursive locking OK 3028 // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107) 3029 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3030 b(done); 3031 #endif 3032 3033 bind(fast_lock); 3034 str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3035 3036 bool allow_fallthrough_on_failure = true; 3037 bool one_shot = true; 3038 cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3039 3040 bind(done); 3041 3042 } 3043 3044 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3045 { 3046 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3047 3048 Register Rmark = Rscratch2; 3049 3050 assert(Roop != Rscratch, ""); 3051 assert(Roop != Rmark, ""); 3052 assert(Rbox != Rscratch, ""); 3053 assert(Rbox != Rmark, ""); 3054 3055 Label done; 3056 3057 if (UseBiasedLocking && !UseOptoBiasInlining) { 3058 biased_locking_exit(Roop, Rscratch, done); 3059 } 3060 3061 ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3062 // If hdr is NULL, we've got recursive locking and there's nothing more to do 3063 cmp(Rmark, 0); 3064 b(done, eq); 3065 3066 // Restore the object header 3067 bool allow_fallthrough_on_failure = true; 3068 bool one_shot = true; 3069 cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3070 3071 bind(done); 3072 3073 } 3074 #endif // COMPILER2