1 /* 2 * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "asm/macroAssembler.hpp" 29 #include "ci/ciEnv.hpp" 30 #include "code/nativeInst.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/barrierSet.hpp" 33 #include "gc/shared/cardTable.hpp" 34 #include "gc/shared/barrierSetAssembler.hpp" 35 #include "gc/shared/cardTableBarrierSet.hpp" 36 #include "gc/shared/collectedHeap.inline.hpp" 37 #include "interpreter/interpreter.hpp" 38 #include "memory/resourceArea.hpp" 39 #include "oops/accessDecorators.hpp" 40 #include "oops/klass.inline.hpp" 41 #include "prims/methodHandles.hpp" 42 #include "runtime/biasedLocking.hpp" 43 #include "runtime/interfaceSupport.inline.hpp" 44 #include "runtime/objectMonitor.hpp" 45 #include "runtime/os.hpp" 46 #include "runtime/sharedRuntime.hpp" 47 #include "runtime/stubRoutines.hpp" 48 #include "utilities/macros.hpp" 49 50 // Implementation of AddressLiteral 51 52 void AddressLiteral::set_rspec(relocInfo::relocType rtype) { 53 switch (rtype) { 54 case relocInfo::oop_type: 55 // Oops are a special case. Normally they would be their own section 56 // but in cases like icBuffer they are literals in the code stream that 57 // we don't have a section for. We use none so that we get a literal address 58 // which is always patchable. 59 break; 60 case relocInfo::external_word_type: 61 _rspec = external_word_Relocation::spec(_target); 62 break; 63 case relocInfo::internal_word_type: 64 _rspec = internal_word_Relocation::spec(_target); 65 break; 66 case relocInfo::opt_virtual_call_type: 67 _rspec = opt_virtual_call_Relocation::spec(); 68 break; 69 case relocInfo::static_call_type: 70 _rspec = static_call_Relocation::spec(); 71 break; 72 case relocInfo::runtime_call_type: 73 _rspec = runtime_call_Relocation::spec(); 74 break; 75 case relocInfo::poll_type: 76 case relocInfo::poll_return_type: 77 _rspec = Relocation::spec_simple(rtype); 78 break; 79 case relocInfo::none: 80 break; 81 default: 82 ShouldNotReachHere(); 83 break; 84 } 85 } 86 87 // Initially added to the Assembler interface as a pure virtual: 88 // RegisterConstant delayed_value(..) 89 // for: 90 // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) 91 // this was subsequently modified to its present name and return type 92 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 93 Register tmp, 94 int offset) { 95 ShouldNotReachHere(); 96 return RegisterOrConstant(-1); 97 } 98 99 100 #ifdef AARCH64 101 // Note: ARM32 version is OS dependent 102 void MacroAssembler::breakpoint(AsmCondition cond) { 103 if (cond == al) { 104 brk(); 105 } else { 106 Label L; 107 b(L, inverse(cond)); 108 brk(); 109 bind(L); 110 } 111 } 112 #endif // AARCH64 113 114 115 // virtual method calling 116 void MacroAssembler::lookup_virtual_method(Register recv_klass, 117 Register vtable_index, 118 Register method_result) { 119 const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); 120 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 121 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); 122 ldr(method_result, Address(recv_klass, base_offset)); 123 } 124 125 126 // Simplified, combined version, good for typical uses. 127 // Falls through on failure. 128 void MacroAssembler::check_klass_subtype(Register sub_klass, 129 Register super_klass, 130 Register temp_reg, 131 Register temp_reg2, 132 Register temp_reg3, 133 Label& L_success) { 134 Label L_failure; 135 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); 136 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); 137 bind(L_failure); 138 }; 139 140 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 141 Register super_klass, 142 Register temp_reg, 143 Register temp_reg2, 144 Label* L_success, 145 Label* L_failure, 146 Label* L_slow_path) { 147 148 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); 149 const Register super_check_offset = temp_reg2; 150 151 Label L_fallthrough; 152 int label_nulls = 0; 153 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 154 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 155 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 156 assert(label_nulls <= 1, "at most one NULL in the batch"); 157 158 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 159 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 160 Address super_check_offset_addr(super_klass, sco_offset); 161 162 // If the pointers are equal, we are done (e.g., String[] elements). 163 // This self-check enables sharing of secondary supertype arrays among 164 // non-primary types such as array-of-interface. Otherwise, each such 165 // type would need its own customized SSA. 166 // We move this check to the front of the fast path because many 167 // type checks are in fact trivially successful in this manner, 168 // so we get a nicely predicted branch right at the start of the check. 169 cmp(sub_klass, super_klass); 170 b(*L_success, eq); 171 172 // Check the supertype display: 173 ldr_u32(super_check_offset, super_check_offset_addr); 174 175 Address super_check_addr(sub_klass, super_check_offset); 176 ldr(temp_reg, super_check_addr); 177 cmp(super_klass, temp_reg); // load displayed supertype 178 179 // This check has worked decisively for primary supers. 180 // Secondary supers are sought in the super_cache ('super_cache_addr'). 181 // (Secondary supers are interfaces and very deeply nested subtypes.) 182 // This works in the same check above because of a tricky aliasing 183 // between the super_cache and the primary super display elements. 184 // (The 'super_check_addr' can address either, as the case requires.) 185 // Note that the cache is updated below if it does not help us find 186 // what we need immediately. 187 // So if it was a primary super, we can just fail immediately. 188 // Otherwise, it's the slow path for us (no success at this point). 189 190 b(*L_success, eq); 191 cmp_32(super_check_offset, sc_offset); 192 if (L_failure == &L_fallthrough) { 193 b(*L_slow_path, eq); 194 } else { 195 b(*L_failure, ne); 196 if (L_slow_path != &L_fallthrough) { 197 b(*L_slow_path); 198 } 199 } 200 201 bind(L_fallthrough); 202 } 203 204 205 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 206 Register super_klass, 207 Register temp_reg, 208 Register temp2_reg, 209 Register temp3_reg, 210 Label* L_success, 211 Label* L_failure, 212 bool set_cond_codes) { 213 #ifdef AARCH64 214 NOT_IMPLEMENTED(); 215 #else 216 // Note: if used by code that expects a register to be 0 on success, 217 // this register must be temp_reg and set_cond_codes must be true 218 219 Register saved_reg = noreg; 220 221 // get additional tmp registers 222 if (temp3_reg == noreg) { 223 saved_reg = temp3_reg = LR; 224 push(saved_reg); 225 } 226 227 assert(temp2_reg != noreg, "need all the temporary registers"); 228 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); 229 230 Register cmp_temp = temp_reg; 231 Register scan_temp = temp3_reg; 232 Register count_temp = temp2_reg; 233 234 Label L_fallthrough; 235 int label_nulls = 0; 236 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 237 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 238 assert(label_nulls <= 1, "at most one NULL in the batch"); 239 240 // a couple of useful fields in sub_klass: 241 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 242 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 243 Address secondary_supers_addr(sub_klass, ss_offset); 244 Address super_cache_addr( sub_klass, sc_offset); 245 246 #ifndef PRODUCT 247 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); 248 #endif 249 250 // We will consult the secondary-super array. 251 ldr(scan_temp, Address(sub_klass, ss_offset)); 252 253 assert(! UseCompressedOops, "search_key must be the compressed super_klass"); 254 // else search_key is the 255 Register search_key = super_klass; 256 257 // Load the array length. 258 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes())); 259 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes()); 260 261 add(count_temp, count_temp, 1); 262 263 Label L_loop, L_setnz_and_fail, L_fail; 264 265 // Top of search loop 266 bind(L_loop); 267 // Notes: 268 // scan_temp starts at the array elements 269 // count_temp is 1+size 270 subs(count_temp, count_temp, 1); 271 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { 272 // direct jump to L_failure if failed and no cleanup needed 273 b(*L_failure, eq); // not found and 274 } else { 275 b(L_fail, eq); // not found in the array 276 } 277 278 // Load next super to check 279 // In the array of super classes elements are pointer sized. 280 int element_size = wordSize; 281 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); 282 283 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list 284 subs(cmp_temp, cmp_temp, search_key); 285 286 // A miss means we are NOT a subtype and need to keep looping 287 b(L_loop, ne); 288 289 // Falling out the bottom means we found a hit; we ARE a subtype 290 291 // Note: temp_reg/cmp_temp is already 0 and flag Z is set 292 293 // Success. Cache the super we found and proceed in triumph. 294 str(super_klass, Address(sub_klass, sc_offset)); 295 296 if (saved_reg != noreg) { 297 // Return success 298 pop(saved_reg); 299 } 300 301 b(*L_success); 302 303 bind(L_fail); 304 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here 305 if (set_cond_codes) { 306 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed 307 } 308 if (saved_reg != noreg) { 309 pop(saved_reg); 310 } 311 if (L_failure != &L_fallthrough) { 312 b(*L_failure); 313 } 314 315 bind(L_fallthrough); 316 #endif 317 } 318 319 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. 320 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { 321 assert_different_registers(params_base, params_count); 322 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); 323 return Address(tmp, -Interpreter::stackElementSize); 324 } 325 326 327 void MacroAssembler::align(int modulus) { 328 while (offset() % modulus != 0) { 329 nop(); 330 } 331 } 332 333 int MacroAssembler::set_last_Java_frame(Register last_java_sp, 334 Register last_java_fp, 335 bool save_last_java_pc, 336 Register tmp) { 337 int pc_offset; 338 if (last_java_fp != noreg) { 339 // optional 340 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); 341 _fp_saved = true; 342 } else { 343 _fp_saved = false; 344 } 345 if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM 346 #ifdef AARCH64 347 pc_offset = mov_pc_to(tmp); 348 str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); 349 #else 350 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); 351 pc_offset = offset() + VM_Version::stored_pc_adjustment(); 352 #endif 353 _pc_saved = true; 354 } else { 355 _pc_saved = false; 356 pc_offset = -1; 357 } 358 // According to comment in javaFrameAnchorm SP must be saved last, so that other 359 // entries are valid when SP is set. 360 361 // However, this is probably not a strong constrainst since for instance PC is 362 // sometimes read from the stack at SP... but is pushed later (by the call). Hence, 363 // we now write the fields in the expected order but we have not added a StoreStore 364 // barrier. 365 366 // XXX: if the ordering is really important, PC should always be saved (without forgetting 367 // to update oop_map offsets) and a StoreStore barrier might be needed. 368 369 if (last_java_sp == noreg) { 370 last_java_sp = SP; // always saved 371 } 372 #ifdef AARCH64 373 if (last_java_sp == SP) { 374 mov(tmp, SP); 375 str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); 376 } else { 377 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 378 } 379 #else 380 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 381 #endif 382 383 return pc_offset; // for oopmaps 384 } 385 386 void MacroAssembler::reset_last_Java_frame(Register tmp) { 387 const Register Rzero = zero_register(tmp); 388 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); 389 if (_fp_saved) { 390 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); 391 } 392 if (_pc_saved) { 393 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); 394 } 395 } 396 397 398 // Implementation of call_VM versions 399 400 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { 401 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 402 assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); 403 404 #ifndef AARCH64 405 // Safer to save R9 here since callers may have been written 406 // assuming R9 survives. This is suboptimal but is not worth 407 // optimizing for the few platforms where R9 is scratched. 408 push(RegisterSet(R4) | R9ifScratched); 409 mov(R4, SP); 410 bic(SP, SP, StackAlignmentInBytes - 1); 411 #endif // AARCH64 412 call(entry_point, relocInfo::runtime_call_type); 413 #ifndef AARCH64 414 mov(SP, R4); 415 pop(RegisterSet(R4) | R9ifScratched); 416 #endif // AARCH64 417 } 418 419 420 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 421 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 422 assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); 423 424 const Register tmp = Rtemp; 425 assert_different_registers(oop_result, tmp); 426 427 set_last_Java_frame(SP, FP, true, tmp); 428 429 #ifdef ASSERT 430 AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); 431 #endif // ASSERT 432 433 #ifndef AARCH64 434 #if R9_IS_SCRATCHED 435 // Safer to save R9 here since callers may have been written 436 // assuming R9 survives. This is suboptimal but is not worth 437 // optimizing for the few platforms where R9 is scratched. 438 439 // Note: cannot save R9 above the saved SP (some calls expect for 440 // instance the Java stack top at the saved SP) 441 // => once saved (with set_last_Java_frame), decrease SP before rounding to 442 // ensure the slot at SP will be free for R9). 443 sub(SP, SP, 4); 444 bic(SP, SP, StackAlignmentInBytes - 1); 445 str(R9, Address(SP, 0)); 446 #else 447 bic(SP, SP, StackAlignmentInBytes - 1); 448 #endif // R9_IS_SCRATCHED 449 #endif 450 451 mov(R0, Rthread); 452 call(entry_point, relocInfo::runtime_call_type); 453 454 #ifndef AARCH64 455 #if R9_IS_SCRATCHED 456 ldr(R9, Address(SP, 0)); 457 #endif 458 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); 459 #endif 460 461 reset_last_Java_frame(tmp); 462 463 // C++ interp handles this in the interpreter 464 check_and_handle_popframe(); 465 check_and_handle_earlyret(); 466 467 if (check_exceptions) { 468 // check for pending exceptions 469 ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); 470 #ifdef AARCH64 471 Label L; 472 cbz(tmp, L); 473 mov_pc_to(Rexception_pc); 474 b(StubRoutines::forward_exception_entry()); 475 bind(L); 476 #else 477 cmp(tmp, 0); 478 mov(Rexception_pc, PC, ne); 479 b(StubRoutines::forward_exception_entry(), ne); 480 #endif // AARCH64 481 } 482 483 // get oop result if there is one and reset the value in the thread 484 if (oop_result->is_valid()) { 485 get_vm_result(oop_result, tmp); 486 } 487 } 488 489 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 490 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 491 } 492 493 494 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 495 assert (arg_1 == R1, "fixed register for arg_1"); 496 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 497 } 498 499 500 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 501 assert (arg_1 == R1, "fixed register for arg_1"); 502 assert (arg_2 == R2, "fixed register for arg_2"); 503 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 504 } 505 506 507 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 508 assert (arg_1 == R1, "fixed register for arg_1"); 509 assert (arg_2 == R2, "fixed register for arg_2"); 510 assert (arg_3 == R3, "fixed register for arg_3"); 511 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 512 } 513 514 515 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { 516 // Not used on ARM 517 Unimplemented(); 518 } 519 520 521 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 522 // Not used on ARM 523 Unimplemented(); 524 } 525 526 527 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 528 // Not used on ARM 529 Unimplemented(); 530 } 531 532 533 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 534 // Not used on ARM 535 Unimplemented(); 536 } 537 538 // Raw call, without saving/restoring registers, exception handling, etc. 539 // Mainly used from various stubs. 540 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { 541 const Register tmp = Rtemp; // Rtemp free since scratched by call 542 set_last_Java_frame(SP, FP, true, tmp); 543 #if R9_IS_SCRATCHED 544 if (save_R9_if_scratched) { 545 // Note: Saving also R10 for alignment. 546 push(RegisterSet(R9, R10)); 547 } 548 #endif 549 mov(R0, Rthread); 550 call(entry_point, relocInfo::runtime_call_type); 551 #if R9_IS_SCRATCHED 552 if (save_R9_if_scratched) { 553 pop(RegisterSet(R9, R10)); 554 } 555 #endif 556 reset_last_Java_frame(tmp); 557 } 558 559 void MacroAssembler::call_VM_leaf(address entry_point) { 560 call_VM_leaf_helper(entry_point, 0); 561 } 562 563 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 564 assert (arg_1 == R0, "fixed register for arg_1"); 565 call_VM_leaf_helper(entry_point, 1); 566 } 567 568 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 569 assert (arg_1 == R0, "fixed register for arg_1"); 570 assert (arg_2 == R1, "fixed register for arg_2"); 571 call_VM_leaf_helper(entry_point, 2); 572 } 573 574 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 575 assert (arg_1 == R0, "fixed register for arg_1"); 576 assert (arg_2 == R1, "fixed register for arg_2"); 577 assert (arg_3 == R2, "fixed register for arg_3"); 578 call_VM_leaf_helper(entry_point, 3); 579 } 580 581 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { 582 assert (arg_1 == R0, "fixed register for arg_1"); 583 assert (arg_2 == R1, "fixed register for arg_2"); 584 assert (arg_3 == R2, "fixed register for arg_3"); 585 assert (arg_4 == R3, "fixed register for arg_4"); 586 call_VM_leaf_helper(entry_point, 4); 587 } 588 589 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { 590 assert_different_registers(oop_result, tmp); 591 ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); 592 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); 593 verify_oop(oop_result); 594 } 595 596 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { 597 assert_different_registers(metadata_result, tmp); 598 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); 599 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); 600 } 601 602 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { 603 if (arg2.is_register()) { 604 add(dst, arg1, arg2.as_register()); 605 } else { 606 add(dst, arg1, arg2.as_constant()); 607 } 608 } 609 610 void MacroAssembler::add_slow(Register rd, Register rn, int c) { 611 #ifdef AARCH64 612 if (c == 0) { 613 if (rd != rn) { 614 mov(rd, rn); 615 } 616 return; 617 } 618 if (c < 0) { 619 sub_slow(rd, rn, -c); 620 return; 621 } 622 if (c > right_n_bits(24)) { 623 guarantee(rd != rn, "no large add_slow with only one register"); 624 mov_slow(rd, c); 625 add(rd, rn, rd); 626 } else { 627 int lo = c & right_n_bits(12); 628 int hi = (c >> 12) & right_n_bits(12); 629 if (lo != 0) { 630 add(rd, rn, lo, lsl0); 631 } 632 if (hi != 0) { 633 add(rd, (lo == 0) ? rn : rd, hi, lsl12); 634 } 635 } 636 #else 637 // This function is used in compiler for handling large frame offsets 638 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 639 return sub(rd, rn, (-c)); 640 } 641 int low = c & 0x3fc; 642 if (low != 0) { 643 add(rd, rn, low); 644 rn = rd; 645 } 646 if (c & ~0x3fc) { 647 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); 648 add(rd, rn, c & ~0x3fc); 649 } else if (rd != rn) { 650 assert(c == 0, ""); 651 mov(rd, rn); // need to generate at least one move! 652 } 653 #endif // AARCH64 654 } 655 656 void MacroAssembler::sub_slow(Register rd, Register rn, int c) { 657 #ifdef AARCH64 658 if (c <= 0) { 659 add_slow(rd, rn, -c); 660 return; 661 } 662 if (c > right_n_bits(24)) { 663 guarantee(rd != rn, "no large sub_slow with only one register"); 664 mov_slow(rd, c); 665 sub(rd, rn, rd); 666 } else { 667 int lo = c & right_n_bits(12); 668 int hi = (c >> 12) & right_n_bits(12); 669 if (lo != 0) { 670 sub(rd, rn, lo, lsl0); 671 } 672 if (hi != 0) { 673 sub(rd, (lo == 0) ? rn : rd, hi, lsl12); 674 } 675 } 676 #else 677 // This function is used in compiler for handling large frame offsets 678 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 679 return add(rd, rn, (-c)); 680 } 681 int low = c & 0x3fc; 682 if (low != 0) { 683 sub(rd, rn, low); 684 rn = rd; 685 } 686 if (c & ~0x3fc) { 687 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); 688 sub(rd, rn, c & ~0x3fc); 689 } else if (rd != rn) { 690 assert(c == 0, ""); 691 mov(rd, rn); // need to generate at least one move! 692 } 693 #endif // AARCH64 694 } 695 696 void MacroAssembler::mov_slow(Register rd, address addr) { 697 // do *not* call the non relocated mov_related_address 698 mov_slow(rd, (intptr_t)addr); 699 } 700 701 void MacroAssembler::mov_slow(Register rd, const char *str) { 702 mov_slow(rd, (intptr_t)str); 703 } 704 705 #ifdef AARCH64 706 707 // Common code for mov_slow and instr_count_for_mov_slow. 708 // Returns number of instructions of mov_slow pattern, 709 // generating it if non-null MacroAssembler is given. 710 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { 711 // This code pattern is matched in NativeIntruction::is_mov_slow. 712 // Update it at modifications. 713 714 const intx mask = right_n_bits(16); 715 // 1 movz instruction 716 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 717 if ((c & ~(mask << base_shift)) == 0) { 718 if (masm != NULL) { 719 masm->movz(rd, ((uintx)c) >> base_shift, base_shift); 720 } 721 return 1; 722 } 723 } 724 // 1 movn instruction 725 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 726 if (((~c) & ~(mask << base_shift)) == 0) { 727 if (masm != NULL) { 728 masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); 729 } 730 return 1; 731 } 732 } 733 // 1 orr instruction 734 { 735 LogicalImmediate imm(c, false); 736 if (imm.is_encoded()) { 737 if (masm != NULL) { 738 masm->orr(rd, ZR, imm); 739 } 740 return 1; 741 } 742 } 743 // 1 movz/movn + up to 3 movk instructions 744 int zeroes = 0; 745 int ones = 0; 746 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 747 int part = (c >> base_shift) & mask; 748 if (part == 0) { 749 ++zeroes; 750 } else if (part == mask) { 751 ++ones; 752 } 753 } 754 int def_bits = 0; 755 if (ones > zeroes) { 756 def_bits = mask; 757 } 758 int inst_count = 0; 759 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 760 int part = (c >> base_shift) & mask; 761 if (part != def_bits) { 762 if (masm != NULL) { 763 if (inst_count > 0) { 764 masm->movk(rd, part, base_shift); 765 } else { 766 if (def_bits == 0) { 767 masm->movz(rd, part, base_shift); 768 } else { 769 masm->movn(rd, ~part & mask, base_shift); 770 } 771 } 772 } 773 inst_count++; 774 } 775 } 776 assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); 777 return inst_count; 778 } 779 780 void MacroAssembler::mov_slow(Register rd, intptr_t c) { 781 #ifdef ASSERT 782 int off = offset(); 783 #endif 784 (void) mov_slow_helper(rd, c, this); 785 assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); 786 } 787 788 // Counts instructions generated by mov_slow(rd, c). 789 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { 790 return mov_slow_helper(noreg, c, NULL); 791 } 792 793 int MacroAssembler::instr_count_for_mov_slow(address c) { 794 return mov_slow_helper(noreg, (intptr_t)c, NULL); 795 } 796 797 #else 798 799 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { 800 if (AsmOperand::is_rotated_imm(c)) { 801 mov(rd, c, cond); 802 } else if (AsmOperand::is_rotated_imm(~c)) { 803 mvn(rd, ~c, cond); 804 } else if (VM_Version::supports_movw()) { 805 movw(rd, c & 0xffff, cond); 806 if ((unsigned int)c >> 16) { 807 movt(rd, (unsigned int)c >> 16, cond); 808 } 809 } else { 810 // Find first non-zero bit 811 int shift = 0; 812 while ((c & (3 << shift)) == 0) { 813 shift += 2; 814 } 815 // Put the least significant part of the constant 816 int mask = 0xff << shift; 817 mov(rd, c & mask, cond); 818 // Add up to 3 other parts of the constant; 819 // each of them can be represented as rotated_imm 820 if (c & (mask << 8)) { 821 orr(rd, rd, c & (mask << 8), cond); 822 } 823 if (c & (mask << 16)) { 824 orr(rd, rd, c & (mask << 16), cond); 825 } 826 if (c & (mask << 24)) { 827 orr(rd, rd, c & (mask << 24), cond); 828 } 829 } 830 } 831 832 #endif // AARCH64 833 834 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, 835 #ifdef AARCH64 836 bool patchable 837 #else 838 AsmCondition cond 839 #endif 840 ) { 841 842 if (o == NULL) { 843 #ifdef AARCH64 844 if (patchable) { 845 nop(); 846 } 847 mov(rd, ZR); 848 #else 849 mov(rd, 0, cond); 850 #endif 851 return; 852 } 853 854 if (oop_index == 0) { 855 oop_index = oop_recorder()->allocate_oop_index(o); 856 } 857 relocate(oop_Relocation::spec(oop_index)); 858 859 #ifdef AARCH64 860 if (patchable) { 861 nop(); 862 } 863 ldr(rd, pc()); 864 #else 865 if (VM_Version::supports_movw()) { 866 movw(rd, 0, cond); 867 movt(rd, 0, cond); 868 } else { 869 ldr(rd, Address(PC), cond); 870 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). 871 nop(); 872 } 873 #endif 874 } 875 876 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { 877 if (o == NULL) { 878 #ifdef AARCH64 879 if (patchable) { 880 nop(); 881 } 882 #endif 883 mov(rd, 0); 884 return; 885 } 886 887 if (metadata_index == 0) { 888 metadata_index = oop_recorder()->allocate_metadata_index(o); 889 } 890 relocate(metadata_Relocation::spec(metadata_index)); 891 892 #ifdef AARCH64 893 if (patchable) { 894 nop(); 895 } 896 #ifdef COMPILER2 897 if (!patchable && VM_Version::prefer_moves_over_load_literal()) { 898 mov_slow(rd, (address)o); 899 return; 900 } 901 #endif 902 ldr(rd, pc()); 903 #else 904 if (VM_Version::supports_movw()) { 905 movw(rd, ((int)o) & 0xffff); 906 movt(rd, (unsigned int)o >> 16); 907 } else { 908 ldr(rd, Address(PC)); 909 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). 910 nop(); 911 } 912 #endif // AARCH64 913 } 914 915 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { 916 Label skip_constant; 917 union { 918 jfloat f; 919 jint i; 920 } accessor; 921 accessor.f = c; 922 923 #ifdef AARCH64 924 // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow 925 Label L; 926 ldr_s(fd, target(L)); 927 b(skip_constant); 928 bind(L); 929 emit_int32(accessor.i); 930 bind(skip_constant); 931 #else 932 flds(fd, Address(PC), cond); 933 b(skip_constant); 934 emit_int32(accessor.i); 935 bind(skip_constant); 936 #endif // AARCH64 937 } 938 939 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { 940 Label skip_constant; 941 union { 942 jdouble d; 943 jint i[2]; 944 } accessor; 945 accessor.d = c; 946 947 #ifdef AARCH64 948 // TODO-AARCH64 - try to optimize loading of double constants with fmov 949 Label L; 950 ldr_d(fd, target(L)); 951 b(skip_constant); 952 align(wordSize); 953 bind(L); 954 emit_int32(accessor.i[0]); 955 emit_int32(accessor.i[1]); 956 bind(skip_constant); 957 #else 958 fldd(fd, Address(PC), cond); 959 b(skip_constant); 960 emit_int32(accessor.i[0]); 961 emit_int32(accessor.i[1]); 962 bind(skip_constant); 963 #endif // AARCH64 964 } 965 966 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { 967 intptr_t addr = (intptr_t) address_of_global; 968 #ifdef AARCH64 969 assert((addr & 0x3) == 0, "address should be aligned"); 970 971 // FIXME: TODO 972 if (false && page_reachable_from_cache(address_of_global)) { 973 assert(false,"TODO: relocate"); 974 //relocate(); 975 adrp(reg, address_of_global); 976 ldrsw(reg, Address(reg, addr & 0xfff)); 977 } else { 978 mov_slow(reg, addr & ~0x3fff); 979 ldrsw(reg, Address(reg, addr & 0x3fff)); 980 } 981 #else 982 mov_slow(reg, addr & ~0xfff); 983 ldr(reg, Address(reg, addr & 0xfff)); 984 #endif 985 } 986 987 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { 988 #ifdef AARCH64 989 intptr_t addr = (intptr_t) address_of_global; 990 assert ((addr & 0x7) == 0, "address should be aligned"); 991 mov_slow(reg, addr & ~0x7fff); 992 ldr(reg, Address(reg, addr & 0x7fff)); 993 #else 994 ldr_global_s32(reg, address_of_global); 995 #endif 996 } 997 998 void MacroAssembler::ldrb_global(Register reg, address address_of_global) { 999 intptr_t addr = (intptr_t) address_of_global; 1000 mov_slow(reg, addr & ~0xfff); 1001 ldrb(reg, Address(reg, addr & 0xfff)); 1002 } 1003 1004 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { 1005 #ifdef AARCH64 1006 switch (bits) { 1007 case 8: uxtb(rd, rn); break; 1008 case 16: uxth(rd, rn); break; 1009 case 32: mov_w(rd, rn); break; 1010 default: ShouldNotReachHere(); 1011 } 1012 #else 1013 if (bits <= 8) { 1014 andr(rd, rn, (1 << bits) - 1); 1015 } else if (bits >= 24) { 1016 bic(rd, rn, -1 << bits); 1017 } else { 1018 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1019 mov(rd, AsmOperand(rd, lsr, 32 - bits)); 1020 } 1021 #endif 1022 } 1023 1024 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { 1025 #ifdef AARCH64 1026 switch (bits) { 1027 case 8: sxtb(rd, rn); break; 1028 case 16: sxth(rd, rn); break; 1029 case 32: sxtw(rd, rn); break; 1030 default: ShouldNotReachHere(); 1031 } 1032 #else 1033 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1034 mov(rd, AsmOperand(rd, asr, 32 - bits)); 1035 #endif 1036 } 1037 1038 #ifndef AARCH64 1039 1040 void MacroAssembler::long_move(Register rd_lo, Register rd_hi, 1041 Register rn_lo, Register rn_hi, 1042 AsmCondition cond) { 1043 if (rd_lo != rn_hi) { 1044 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1045 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1046 } else if (rd_hi != rn_lo) { 1047 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1048 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1049 } else { 1050 eor(rd_lo, rd_hi, rd_lo, cond); 1051 eor(rd_hi, rd_lo, rd_hi, cond); 1052 eor(rd_lo, rd_hi, rd_lo, cond); 1053 } 1054 } 1055 1056 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1057 Register rn_lo, Register rn_hi, 1058 AsmShift shift, Register count) { 1059 Register tmp; 1060 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { 1061 tmp = rd_lo; 1062 } else { 1063 tmp = rd_hi; 1064 } 1065 assert_different_registers(tmp, count, rn_lo, rn_hi); 1066 1067 subs(tmp, count, 32); 1068 if (shift == lsl) { 1069 assert_different_registers(rd_hi, rn_lo); 1070 assert_different_registers(count, rd_hi); 1071 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); 1072 rsb(tmp, count, 32, mi); 1073 if (rd_hi == rn_hi) { 1074 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1075 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1076 } else { 1077 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1078 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1079 } 1080 mov(rd_lo, AsmOperand(rn_lo, shift, count)); 1081 } else { 1082 assert_different_registers(rd_lo, rn_hi); 1083 assert_different_registers(rd_lo, count); 1084 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); 1085 rsb(tmp, count, 32, mi); 1086 if (rd_lo == rn_lo) { 1087 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1088 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1089 } else { 1090 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1091 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1092 } 1093 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1094 } 1095 } 1096 1097 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1098 Register rn_lo, Register rn_hi, 1099 AsmShift shift, int count) { 1100 assert(count != 0 && (count & ~63) == 0, "must be"); 1101 1102 if (shift == lsl) { 1103 assert_different_registers(rd_hi, rn_lo); 1104 if (count >= 32) { 1105 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); 1106 mov(rd_lo, 0); 1107 } else { 1108 mov(rd_hi, AsmOperand(rn_hi, lsl, count)); 1109 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); 1110 mov(rd_lo, AsmOperand(rn_lo, lsl, count)); 1111 } 1112 } else { 1113 assert_different_registers(rd_lo, rn_hi); 1114 if (count >= 32) { 1115 if (count == 32) { 1116 mov(rd_lo, rn_hi); 1117 } else { 1118 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); 1119 } 1120 if (shift == asr) { 1121 mov(rd_hi, AsmOperand(rn_hi, asr, 0)); 1122 } else { 1123 mov(rd_hi, 0); 1124 } 1125 } else { 1126 mov(rd_lo, AsmOperand(rn_lo, lsr, count)); 1127 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); 1128 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1129 } 1130 } 1131 } 1132 #endif // !AARCH64 1133 1134 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 1135 // This code pattern is matched in NativeIntruction::skip_verify_oop. 1136 // Update it at modifications. 1137 if (!VerifyOops) return; 1138 1139 char buffer[64]; 1140 #ifdef COMPILER1 1141 if (CommentedAssembly) { 1142 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); 1143 block_comment(buffer); 1144 } 1145 #endif 1146 const char* msg_buffer = NULL; 1147 { 1148 ResourceMark rm; 1149 stringStream ss; 1150 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); 1151 msg_buffer = code_string(ss.as_string()); 1152 } 1153 1154 save_all_registers(); 1155 1156 if (reg != R2) { 1157 mov(R2, reg); // oop to verify 1158 } 1159 mov(R1, SP); // register save area 1160 1161 Label done; 1162 InlinedString Lmsg(msg_buffer); 1163 ldr_literal(R0, Lmsg); // message 1164 1165 // call indirectly to solve generation ordering problem 1166 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1167 call(Rtemp); 1168 1169 restore_all_registers(); 1170 1171 b(done); 1172 #ifdef COMPILER2 1173 int off = offset(); 1174 #endif 1175 bind_literal(Lmsg); 1176 #ifdef COMPILER2 1177 if (offset() - off == 1 * wordSize) { 1178 // no padding, so insert nop for worst-case sizing 1179 nop(); 1180 } 1181 #endif 1182 bind(done); 1183 } 1184 1185 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 1186 if (!VerifyOops) return; 1187 1188 const char* msg_buffer = NULL; 1189 { 1190 ResourceMark rm; 1191 stringStream ss; 1192 if ((addr.base() == SP) && (addr.index()==noreg)) { 1193 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); 1194 } else { 1195 ss.print("verify_oop_addr: %s", s); 1196 } 1197 ss.print(" (%s:%d)", file, line); 1198 msg_buffer = code_string(ss.as_string()); 1199 } 1200 1201 int push_size = save_all_registers(); 1202 1203 if (addr.base() == SP) { 1204 // computes an addr that takes into account the push 1205 if (addr.index() != noreg) { 1206 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index 1207 add(new_base, SP, push_size); 1208 addr = addr.rebase(new_base); 1209 } else { 1210 addr = addr.plus_disp(push_size); 1211 } 1212 } 1213 1214 ldr(R2, addr); // oop to verify 1215 mov(R1, SP); // register save area 1216 1217 Label done; 1218 InlinedString Lmsg(msg_buffer); 1219 ldr_literal(R0, Lmsg); // message 1220 1221 // call indirectly to solve generation ordering problem 1222 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1223 call(Rtemp); 1224 1225 restore_all_registers(); 1226 1227 b(done); 1228 bind_literal(Lmsg); 1229 bind(done); 1230 } 1231 1232 void MacroAssembler::c2bool(Register x) { 1233 tst(x, 0xff); // Only look at the lowest byte 1234 #ifdef AARCH64 1235 cset(x, ne); 1236 #else 1237 mov(x, 1, ne); 1238 #endif 1239 } 1240 1241 void MacroAssembler::null_check(Register reg, Register tmp, int offset) { 1242 if (needs_explicit_null_check(offset)) { 1243 #ifdef AARCH64 1244 ldr(ZR, Address(reg)); 1245 #else 1246 assert_different_registers(reg, tmp); 1247 if (tmp == noreg) { 1248 tmp = Rtemp; 1249 assert((! Thread::current()->is_Compiler_thread()) || 1250 (! (ciEnv::current()->task() == NULL)) || 1251 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), 1252 "Rtemp not available in C2"); // explicit tmp register required 1253 // XXX: could we mark the code buffer as not compatible with C2 ? 1254 } 1255 ldr(tmp, Address(reg)); 1256 #endif 1257 } 1258 } 1259 1260 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1261 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, 1262 RegisterOrConstant size_expression, Label& slow_case) { 1263 if (!Universe::heap()->supports_inline_contig_alloc()) { 1264 b(slow_case); 1265 return; 1266 } 1267 1268 CollectedHeap* ch = Universe::heap(); 1269 1270 const Register top_addr = tmp1; 1271 const Register heap_end = tmp2; 1272 1273 if (size_expression.is_register()) { 1274 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); 1275 } else { 1276 assert_different_registers(obj, obj_end, top_addr, heap_end); 1277 } 1278 1279 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance 1280 if (load_const) { 1281 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); 1282 } else { 1283 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); 1284 } 1285 // Calculate new heap_top by adding the size of the object 1286 Label retry; 1287 bind(retry); 1288 1289 #ifdef AARCH64 1290 ldxr(obj, top_addr); 1291 #else 1292 ldr(obj, Address(top_addr)); 1293 #endif // AARCH64 1294 1295 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); 1296 add_rc(obj_end, obj, size_expression); 1297 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. 1298 cmp(obj_end, obj); 1299 b(slow_case, lo); 1300 // Update heap_top if allocation succeeded 1301 cmp(obj_end, heap_end); 1302 b(slow_case, hi); 1303 1304 #ifdef AARCH64 1305 stxr(heap_end/*scratched*/, obj_end, top_addr); 1306 cbnz_w(heap_end, retry); 1307 #else 1308 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); 1309 b(retry, ne); 1310 #endif // AARCH64 1311 } 1312 1313 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1314 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, 1315 RegisterOrConstant size_expression, Label& slow_case) { 1316 const Register tlab_end = tmp1; 1317 assert_different_registers(obj, obj_end, tlab_end); 1318 1319 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); 1320 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); 1321 add_rc(obj_end, obj, size_expression); 1322 cmp(obj_end, tlab_end); 1323 b(slow_case, hi); 1324 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); 1325 } 1326 1327 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. 1328 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { 1329 Label loop; 1330 const Register ptr = start; 1331 1332 #ifdef AARCH64 1333 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x 1334 const Register size = tmp; 1335 Label remaining, done; 1336 1337 sub(size, end, start); 1338 1339 #ifdef ASSERT 1340 { Label L; 1341 tst(size, wordSize - 1); 1342 b(L, eq); 1343 stop("size is not a multiple of wordSize"); 1344 bind(L); 1345 } 1346 #endif // ASSERT 1347 1348 subs(size, size, wordSize); 1349 b(remaining, le); 1350 1351 // Zero by 2 words per iteration. 1352 bind(loop); 1353 subs(size, size, 2*wordSize); 1354 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); 1355 b(loop, gt); 1356 1357 bind(remaining); 1358 b(done, ne); 1359 str(ZR, Address(ptr)); 1360 bind(done); 1361 #else 1362 mov(tmp, 0); 1363 bind(loop); 1364 cmp(ptr, end); 1365 str(tmp, Address(ptr, wordSize, post_indexed), lo); 1366 b(loop, lo); 1367 #endif // AARCH64 1368 } 1369 1370 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { 1371 #ifdef AARCH64 1372 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1373 add_rc(tmp, tmp, size_in_bytes); 1374 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1375 #else 1376 // Bump total bytes allocated by this thread 1377 Label done; 1378 1379 // Borrow the Rthread for alloc counter 1380 Register Ralloc = Rthread; 1381 add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset())); 1382 ldr(tmp, Address(Ralloc)); 1383 adds(tmp, tmp, size_in_bytes); 1384 str(tmp, Address(Ralloc), cc); 1385 b(done, cc); 1386 1387 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) 1388 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by 1389 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. 1390 Register low, high; 1391 // Select ether R0/R1 or R2/R3 1392 1393 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { 1394 low = R2; 1395 high = R3; 1396 } else { 1397 low = R0; 1398 high = R1; 1399 } 1400 push(RegisterSet(low, high)); 1401 1402 ldrd(low, Address(Ralloc)); 1403 adds(low, low, size_in_bytes); 1404 adc(high, high, 0); 1405 strd(low, Address(Ralloc)); 1406 1407 pop(RegisterSet(low, high)); 1408 1409 bind(done); 1410 1411 // Unborrow the Rthread 1412 sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset())); 1413 #endif // AARCH64 1414 } 1415 1416 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { 1417 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM 1418 if (UseStackBanging) { 1419 const int page_size = os::vm_page_size(); 1420 1421 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); 1422 strb(R0, Address(tmp)); 1423 #ifdef AARCH64 1424 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { 1425 sub(tmp, tmp, page_size); 1426 strb(R0, Address(tmp)); 1427 } 1428 #else 1429 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { 1430 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1431 } 1432 #endif // AARCH64 1433 } 1434 } 1435 1436 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { 1437 if (UseStackBanging) { 1438 Label loop; 1439 1440 mov(tmp, SP); 1441 add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); 1442 #ifdef AARCH64 1443 sub(tmp, tmp, Rsize); 1444 bind(loop); 1445 subs(Rsize, Rsize, os::vm_page_size()); 1446 strb(ZR, Address(tmp, Rsize)); 1447 #else 1448 bind(loop); 1449 subs(Rsize, Rsize, 0xff0); 1450 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1451 #endif // AARCH64 1452 b(loop, hi); 1453 } 1454 } 1455 1456 void MacroAssembler::stop(const char* msg) { 1457 // This code pattern is matched in NativeIntruction::is_stop. 1458 // Update it at modifications. 1459 #ifdef COMPILER1 1460 if (CommentedAssembly) { 1461 block_comment("stop"); 1462 } 1463 #endif 1464 1465 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); 1466 InlinedString Lmsg(msg); 1467 1468 // save all registers for further inspection 1469 save_all_registers(); 1470 1471 ldr_literal(R0, Lmsg); // message 1472 mov(R1, SP); // register save area 1473 1474 #ifdef AARCH64 1475 ldr_literal(Rtemp, Ldebug); 1476 br(Rtemp); 1477 #else 1478 ldr_literal(PC, Ldebug); // call MacroAssembler::debug 1479 #endif // AARCH64 1480 1481 #if defined(COMPILER2) && defined(AARCH64) 1482 int off = offset(); 1483 #endif 1484 bind_literal(Lmsg); 1485 bind_literal(Ldebug); 1486 #if defined(COMPILER2) && defined(AARCH64) 1487 if (offset() - off == 2 * wordSize) { 1488 // no padding, so insert nop for worst-case sizing 1489 nop(); 1490 } 1491 #endif 1492 } 1493 1494 void MacroAssembler::warn(const char* msg) { 1495 #ifdef COMPILER1 1496 if (CommentedAssembly) { 1497 block_comment("warn"); 1498 } 1499 #endif 1500 1501 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); 1502 InlinedString Lmsg(msg); 1503 Label done; 1504 1505 int push_size = save_caller_save_registers(); 1506 1507 #ifdef AARCH64 1508 // TODO-AARCH64 - get rid of extra debug parameters 1509 mov(R1, LR); 1510 mov(R2, FP); 1511 add(R3, SP, push_size); 1512 #endif 1513 1514 ldr_literal(R0, Lmsg); // message 1515 ldr_literal(LR, Lwarn); // call warning 1516 1517 call(LR); 1518 1519 restore_caller_save_registers(); 1520 1521 b(done); 1522 bind_literal(Lmsg); 1523 bind_literal(Lwarn); 1524 bind(done); 1525 } 1526 1527 1528 int MacroAssembler::save_all_registers() { 1529 // This code pattern is matched in NativeIntruction::is_save_all_registers. 1530 // Update it at modifications. 1531 #ifdef AARCH64 1532 const Register tmp = Rtemp; 1533 raw_push(R30, ZR); 1534 for (int i = 28; i >= 0; i -= 2) { 1535 raw_push(as_Register(i), as_Register(i+1)); 1536 } 1537 mov_pc_to(tmp); 1538 str(tmp, Address(SP, 31*wordSize)); 1539 ldr(tmp, Address(SP, tmp->encoding()*wordSize)); 1540 return 32*wordSize; 1541 #else 1542 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); 1543 return 15*wordSize; 1544 #endif // AARCH64 1545 } 1546 1547 void MacroAssembler::restore_all_registers() { 1548 #ifdef AARCH64 1549 for (int i = 0; i <= 28; i += 2) { 1550 raw_pop(as_Register(i), as_Register(i+1)); 1551 } 1552 raw_pop(R30, ZR); 1553 #else 1554 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers 1555 add(SP, SP, wordSize); // discard saved PC 1556 #endif // AARCH64 1557 } 1558 1559 int MacroAssembler::save_caller_save_registers() { 1560 #ifdef AARCH64 1561 for (int i = 0; i <= 16; i += 2) { 1562 raw_push(as_Register(i), as_Register(i+1)); 1563 } 1564 raw_push(R18, LR); 1565 return 20*wordSize; 1566 #else 1567 #if R9_IS_SCRATCHED 1568 // Save also R10 to preserve alignment 1569 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1570 return 8*wordSize; 1571 #else 1572 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1573 return 6*wordSize; 1574 #endif 1575 #endif // AARCH64 1576 } 1577 1578 void MacroAssembler::restore_caller_save_registers() { 1579 #ifdef AARCH64 1580 raw_pop(R18, LR); 1581 for (int i = 16; i >= 0; i -= 2) { 1582 raw_pop(as_Register(i), as_Register(i+1)); 1583 } 1584 #else 1585 #if R9_IS_SCRATCHED 1586 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1587 #else 1588 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1589 #endif 1590 #endif // AARCH64 1591 } 1592 1593 void MacroAssembler::debug(const char* msg, const intx* registers) { 1594 // In order to get locks to work, we need to fake a in_VM state 1595 JavaThread* thread = JavaThread::current(); 1596 thread->set_thread_state(_thread_in_vm); 1597 1598 if (ShowMessageBoxOnError) { 1599 ttyLocker ttyl; 1600 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1601 BytecodeCounter::print(); 1602 } 1603 if (os::message_box(msg, "Execution stopped, print registers?")) { 1604 #ifdef AARCH64 1605 // saved registers: R0-R30, PC 1606 const int nregs = 32; 1607 #else 1608 // saved registers: R0-R12, LR, PC 1609 const int nregs = 15; 1610 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; 1611 #endif // AARCH64 1612 1613 for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { 1614 tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); 1615 } 1616 1617 #ifdef AARCH64 1618 tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); 1619 #endif // AARCH64 1620 1621 // derive original SP value from the address of register save area 1622 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); 1623 } 1624 BREAKPOINT; 1625 } else { 1626 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1627 } 1628 assert(false, "DEBUG MESSAGE: %s", msg); 1629 fatal("%s", msg); // returning from MacroAssembler::debug is not supported 1630 } 1631 1632 void MacroAssembler::unimplemented(const char* what) { 1633 const char* buf = NULL; 1634 { 1635 ResourceMark rm; 1636 stringStream ss; 1637 ss.print("unimplemented: %s", what); 1638 buf = code_string(ss.as_string()); 1639 } 1640 stop(buf); 1641 } 1642 1643 1644 // Implementation of FixedSizeCodeBlock 1645 1646 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : 1647 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { 1648 } 1649 1650 FixedSizeCodeBlock::~FixedSizeCodeBlock() { 1651 if (_enabled) { 1652 address curr_pc = _masm->pc(); 1653 1654 assert(_start < curr_pc, "invalid current pc"); 1655 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); 1656 1657 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; 1658 for (int i = 0; i < nops_count; i++) { 1659 _masm->nop(); 1660 } 1661 } 1662 } 1663 1664 #ifdef AARCH64 1665 1666 // Serializes memory. 1667 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM 1668 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { 1669 if (!os::is_MP()) return; 1670 1671 // TODO-AARCH64 investigate dsb vs dmb effects 1672 if (order_constraint == StoreStore) { 1673 dmb(DMB_st); 1674 } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { 1675 dmb(DMB_ld); 1676 } else { 1677 dmb(DMB_all); 1678 } 1679 } 1680 1681 #else 1682 1683 // Serializes memory. Potentially blows flags and reg. 1684 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) 1685 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. 1686 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. 1687 void MacroAssembler::membar(Membar_mask_bits order_constraint, 1688 Register tmp, 1689 bool preserve_flags, 1690 Register load_tgt) { 1691 if (!os::is_MP()) return; 1692 1693 if (order_constraint == StoreStore) { 1694 dmb(DMB_st, tmp); 1695 } else if ((order_constraint & StoreLoad) || 1696 (order_constraint & LoadLoad) || 1697 (order_constraint & StoreStore) || 1698 (load_tgt == noreg) || 1699 preserve_flags) { 1700 dmb(DMB_all, tmp); 1701 } else { 1702 // LoadStore: speculative stores reordeing is prohibited 1703 1704 // By providing an ordered load target register, we avoid an extra memory load reference 1705 Label not_taken; 1706 bind(not_taken); 1707 cmp(load_tgt, load_tgt); 1708 b(not_taken, ne); 1709 } 1710 } 1711 1712 #endif // AARCH64 1713 1714 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" 1715 // on failure, so fall-through can only mean success. 1716 // "one_shot" controls whether we loop and retry to mitigate spurious failures. 1717 // This is only needed for C2, which for some reason does not rety, 1718 // while C1/interpreter does. 1719 // TODO: measure if it makes a difference 1720 1721 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, 1722 Register base, Register tmp, Label &slow_case, 1723 bool allow_fallthrough_on_failure, bool one_shot) 1724 { 1725 1726 bool fallthrough_is_success = false; 1727 1728 // ARM Litmus Test example does prefetching here. 1729 // TODO: investigate if it helps performance 1730 1731 // The last store was to the displaced header, so to prevent 1732 // reordering we must issue a StoreStore or Release barrier before 1733 // the CAS store. 1734 1735 #ifdef AARCH64 1736 1737 Register Rscratch = tmp; 1738 Register Roop = base; 1739 Register mark = oldval; 1740 Register Rbox = newval; 1741 Label loop; 1742 1743 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1744 1745 // Instead of StoreStore here, we use store-release-exclusive below 1746 1747 bind(loop); 1748 1749 ldaxr(tmp, base); // acquire 1750 cmp(tmp, oldval); 1751 b(slow_case, ne); 1752 stlxr(tmp, newval, base); // release 1753 if (one_shot) { 1754 cmp_w(tmp, 0); 1755 } else { 1756 cbnz_w(tmp, loop); 1757 fallthrough_is_success = true; 1758 } 1759 1760 // MemBarAcquireLock would normally go here, but 1761 // we already do ldaxr+stlxr above, which has 1762 // Sequential Consistency 1763 1764 #else 1765 membar(MacroAssembler::StoreStore, noreg); 1766 1767 if (one_shot) { 1768 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1769 cmp(tmp, oldval); 1770 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1771 cmp(tmp, 0, eq); 1772 } else { 1773 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1774 } 1775 1776 // MemBarAcquireLock barrier 1777 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, 1778 // but that doesn't prevent a load or store from floating up between 1779 // the load and store in the CAS sequence, so play it safe and 1780 // do a full fence. 1781 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); 1782 #endif 1783 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1784 b(slow_case, ne); 1785 } 1786 } 1787 1788 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, 1789 Register base, Register tmp, Label &slow_case, 1790 bool allow_fallthrough_on_failure, bool one_shot) 1791 { 1792 1793 bool fallthrough_is_success = false; 1794 1795 assert_different_registers(oldval,newval,base,tmp); 1796 1797 #ifdef AARCH64 1798 Label loop; 1799 1800 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1801 1802 bind(loop); 1803 ldxr(tmp, base); 1804 cmp(tmp, oldval); 1805 b(slow_case, ne); 1806 // MemBarReleaseLock barrier 1807 stlxr(tmp, newval, base); 1808 if (one_shot) { 1809 cmp_w(tmp, 0); 1810 } else { 1811 cbnz_w(tmp, loop); 1812 fallthrough_is_success = true; 1813 } 1814 #else 1815 // MemBarReleaseLock barrier 1816 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, 1817 // but that doesn't prevent a load or store from floating down between 1818 // the load and store in the CAS sequence, so play it safe and 1819 // do a full fence. 1820 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); 1821 1822 if (one_shot) { 1823 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1824 cmp(tmp, oldval); 1825 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1826 cmp(tmp, 0, eq); 1827 } else { 1828 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1829 } 1830 #endif 1831 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1832 b(slow_case, ne); 1833 } 1834 1835 // ExitEnter 1836 // According to JSR-133 Cookbook, this should be StoreLoad, the same 1837 // barrier that follows volatile store. 1838 // TODO: Should be able to remove on armv8 if volatile loads 1839 // use the load-acquire instruction. 1840 membar(StoreLoad, noreg); 1841 } 1842 1843 #ifndef PRODUCT 1844 1845 // Preserves flags and all registers. 1846 // On SMP the updated value might not be visible to external observers without a sychronization barrier 1847 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { 1848 if (counter_addr != NULL) { 1849 InlinedAddress counter_addr_literal((address)counter_addr); 1850 Label done, retry; 1851 if (cond != al) { 1852 b(done, inverse(cond)); 1853 } 1854 1855 #ifdef AARCH64 1856 raw_push(R0, R1); 1857 raw_push(R2, ZR); 1858 1859 ldr_literal(R0, counter_addr_literal); 1860 1861 bind(retry); 1862 ldxr_w(R1, R0); 1863 add_w(R1, R1, 1); 1864 stxr_w(R2, R1, R0); 1865 cbnz_w(R2, retry); 1866 1867 raw_pop(R2, ZR); 1868 raw_pop(R0, R1); 1869 #else 1870 push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1871 ldr_literal(R0, counter_addr_literal); 1872 1873 mrs(CPSR, Rtemp); 1874 1875 bind(retry); 1876 ldr_s32(R1, Address(R0)); 1877 add(R2, R1, 1); 1878 atomic_cas_bool(R1, R2, R0, 0, R3); 1879 b(retry, ne); 1880 1881 msr(CPSR_fsxc, Rtemp); 1882 1883 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1884 #endif // AARCH64 1885 1886 b(done); 1887 bind_literal(counter_addr_literal); 1888 1889 bind(done); 1890 } 1891 } 1892 1893 #endif // !PRODUCT 1894 1895 1896 // Building block for CAS cases of biased locking: makes CAS and records statistics. 1897 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. 1898 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, 1899 Register tmp, Label& slow_case, int* counter_addr) { 1900 1901 cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); 1902 #ifdef ASSERT 1903 breakpoint(ne); // Fallthrough only on success 1904 #endif 1905 #ifndef PRODUCT 1906 if (counter_addr != NULL) { 1907 cond_atomic_inc32(al, counter_addr); 1908 } 1909 #endif // !PRODUCT 1910 } 1911 1912 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, 1913 bool swap_reg_contains_mark, 1914 Register tmp2, 1915 Label& done, Label& slow_case, 1916 BiasedLockingCounters* counters) { 1917 // obj_reg must be preserved (at least) if the bias locking fails 1918 // tmp_reg is a temporary register 1919 // swap_reg was used as a temporary but contained a value 1920 // that was used afterwards in some call pathes. Callers 1921 // have been fixed so that swap_reg no longer needs to be 1922 // saved. 1923 // Rtemp in no longer scratched 1924 1925 assert(UseBiasedLocking, "why call this otherwise?"); 1926 assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); 1927 guarantee(swap_reg!=tmp_reg, "invariant"); 1928 assert(tmp_reg != noreg, "must supply tmp_reg"); 1929 1930 #ifndef PRODUCT 1931 if (PrintBiasedLockingStatistics && (counters == NULL)) { 1932 counters = BiasedLocking::counters(); 1933 } 1934 #endif 1935 1936 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 1937 Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); 1938 1939 // Biased locking 1940 // See whether the lock is currently biased toward our thread and 1941 // whether the epoch is still valid 1942 // Note that the runtime guarantees sufficient alignment of JavaThread 1943 // pointers to allow age to be placed into low bits 1944 // First check to see whether biasing is even enabled for this object 1945 Label cas_label; 1946 1947 // The null check applies to the mark loading, if we need to load it. 1948 // If the mark has already been loaded in swap_reg then it has already 1949 // been performed and the offset is irrelevant. 1950 int null_check_offset = offset(); 1951 if (!swap_reg_contains_mark) { 1952 ldr(swap_reg, mark_addr); 1953 } 1954 1955 // On MP platform loads could return 'stale' values in some cases. 1956 // That is acceptable since either CAS or slow case path is taken in the worst case. 1957 1958 andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1959 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 1960 1961 b(cas_label, ne); 1962 1963 // The bias pattern is present in the object's header. Need to check 1964 // whether the bias owner and the epoch are both still current. 1965 load_klass(tmp_reg, obj_reg); 1966 ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 1967 orr(tmp_reg, tmp_reg, Rthread); 1968 eor(tmp_reg, tmp_reg, swap_reg); 1969 1970 #ifdef AARCH64 1971 ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); 1972 #else 1973 bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); 1974 #endif // AARCH64 1975 1976 #ifndef PRODUCT 1977 if (counters != NULL) { 1978 cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); 1979 } 1980 #endif // !PRODUCT 1981 1982 b(done, eq); 1983 1984 Label try_revoke_bias; 1985 Label try_rebias; 1986 1987 // At this point we know that the header has the bias pattern and 1988 // that we are not the bias owner in the current epoch. We need to 1989 // figure out more details about the state of the header in order to 1990 // know what operations can be legally performed on the object's 1991 // header. 1992 1993 // If the low three bits in the xor result aren't clear, that means 1994 // the prototype header is no longer biased and we have to revoke 1995 // the bias on this object. 1996 tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1997 b(try_revoke_bias, ne); 1998 1999 // Biasing is still enabled for this data type. See whether the 2000 // epoch of the current bias is still valid, meaning that the epoch 2001 // bits of the mark word are equal to the epoch bits of the 2002 // prototype header. (Note that the prototype header's epoch bits 2003 // only change at a safepoint.) If not, attempt to rebias the object 2004 // toward the current thread. Note that we must be absolutely sure 2005 // that the current epoch is invalid in order to do this because 2006 // otherwise the manipulations it performs on the mark word are 2007 // illegal. 2008 tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); 2009 b(try_rebias, ne); 2010 2011 // tmp_reg has the age, epoch and pattern bits cleared 2012 // The remaining (owner) bits are (Thread ^ current_owner) 2013 2014 // The epoch of the current bias is still valid but we know nothing 2015 // about the owner; it might be set or it might be clear. Try to 2016 // acquire the bias of the object using an atomic operation. If this 2017 // fails we will go in to the runtime to revoke the object's bias. 2018 // Note that we first construct the presumed unbiased header so we 2019 // don't accidentally blow away another thread's valid bias. 2020 2021 // Note that we know the owner is not ourself. Hence, success can 2022 // only happen when the owner bits is 0 2023 2024 #ifdef AARCH64 2025 // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has 2026 // cleared bit in the middle (cms bit). So it is loaded with separate instruction. 2027 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2028 andr(swap_reg, swap_reg, tmp2); 2029 #else 2030 // until the assembler can be made smarter, we need to make some assumptions about the values 2031 // so we can optimize this: 2032 assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); 2033 2034 mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); 2035 mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) 2036 #endif // AARCH64 2037 2038 orr(tmp_reg, swap_reg, Rthread); // new mark 2039 2040 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2041 (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); 2042 2043 // If the biasing toward our thread failed, this means that 2044 // another thread succeeded in biasing it toward itself and we 2045 // need to revoke that bias. The revocation will occur in the 2046 // interpreter runtime in the slow case. 2047 2048 b(done); 2049 2050 bind(try_rebias); 2051 2052 // At this point we know the epoch has expired, meaning that the 2053 // current "bias owner", if any, is actually invalid. Under these 2054 // circumstances _only_, we are allowed to use the current header's 2055 // value as the comparison value when doing the cas to acquire the 2056 // bias in the current epoch. In other words, we allow transfer of 2057 // the bias from one thread to another directly in this situation. 2058 2059 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2060 2061 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2062 2063 // owner bits 'random'. Set them to Rthread. 2064 #ifdef AARCH64 2065 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2066 andr(tmp_reg, tmp_reg, tmp2); 2067 #else 2068 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2069 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2070 #endif // AARCH64 2071 2072 orr(tmp_reg, tmp_reg, Rthread); // new mark 2073 2074 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2075 (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); 2076 2077 // If the biasing toward our thread failed, then another thread 2078 // succeeded in biasing it toward itself and we need to revoke that 2079 // bias. The revocation will occur in the runtime in the slow case. 2080 2081 b(done); 2082 2083 bind(try_revoke_bias); 2084 2085 // The prototype mark in the klass doesn't have the bias bit set any 2086 // more, indicating that objects of this data type are not supposed 2087 // to be biased any more. We are going to try to reset the mark of 2088 // this object to the prototype value and fall through to the 2089 // CAS-based locking scheme. Note that if our CAS fails, it means 2090 // that another thread raced us for the privilege of revoking the 2091 // bias of this particular object, so it's okay to continue in the 2092 // normal locking code. 2093 2094 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2095 2096 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2097 2098 // owner bits 'random'. Clear them 2099 #ifdef AARCH64 2100 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2101 andr(tmp_reg, tmp_reg, tmp2); 2102 #else 2103 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2104 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2105 #endif // AARCH64 2106 2107 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, 2108 (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); 2109 2110 // Fall through to the normal CAS-based lock, because no matter what 2111 // the result of the above CAS, some thread must have succeeded in 2112 // removing the bias bit from the object's header. 2113 2114 bind(cas_label); 2115 2116 return null_check_offset; 2117 } 2118 2119 2120 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { 2121 assert(UseBiasedLocking, "why call this otherwise?"); 2122 2123 // Check for biased locking unlock case, which is a no-op 2124 // Note: we do not have to check the thread ID for two reasons. 2125 // First, the interpreter checks for IllegalMonitorStateException at 2126 // a higher level. Second, if the bias was revoked while we held the 2127 // lock, the object could not be rebiased toward another thread, so 2128 // the bias bit would be clear. 2129 ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2130 2131 andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2132 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 2133 b(done, eq); 2134 } 2135 2136 2137 void MacroAssembler::resolve_jobject(Register value, 2138 Register tmp1, 2139 Register tmp2) { 2140 assert_different_registers(value, tmp1, tmp2); 2141 Label done, not_weak; 2142 cbz(value, done); // Use NULL as-is. 2143 STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); 2144 tbz(value, 0, not_weak); // Test for jweak tag. 2145 2146 // Resolve jweak. 2147 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, 2148 Address(value, -JNIHandles::weak_tag_value), value, tmp1, tmp2, noreg); 2149 b(done); 2150 bind(not_weak); 2151 // Resolve (untagged) jobject. 2152 access_load_at(T_OBJECT, IN_NATIVE, 2153 Address(value, 0), value, tmp1, tmp2, noreg); 2154 verify_oop(value); 2155 bind(done); 2156 } 2157 2158 2159 ////////////////////////////////////////////////////////////////////////////////// 2160 2161 #ifdef AARCH64 2162 2163 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2164 switch (size_in_bytes) { 2165 case 8: ldr(dst, src); break; 2166 case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; 2167 case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; 2168 case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; 2169 default: ShouldNotReachHere(); 2170 } 2171 } 2172 2173 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 2174 switch (size_in_bytes) { 2175 case 8: str(src, dst); break; 2176 case 4: str_32(src, dst); break; 2177 case 2: strh(src, dst); break; 2178 case 1: strb(src, dst); break; 2179 default: ShouldNotReachHere(); 2180 } 2181 } 2182 2183 #else 2184 2185 void MacroAssembler::load_sized_value(Register dst, Address src, 2186 size_t size_in_bytes, bool is_signed, AsmCondition cond) { 2187 switch (size_in_bytes) { 2188 case 4: ldr(dst, src, cond); break; 2189 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; 2190 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; 2191 default: ShouldNotReachHere(); 2192 } 2193 } 2194 2195 2196 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { 2197 switch (size_in_bytes) { 2198 case 4: str(src, dst, cond); break; 2199 case 2: strh(src, dst, cond); break; 2200 case 1: strb(src, dst, cond); break; 2201 default: ShouldNotReachHere(); 2202 } 2203 } 2204 #endif // AARCH64 2205 2206 // Look up the method for a megamorphic invokeinterface call. 2207 // The target method is determined by <Rinterf, Rindex>. 2208 // The receiver klass is in Rklass. 2209 // On success, the result will be in method_result, and execution falls through. 2210 // On failure, execution transfers to the given label. 2211 void MacroAssembler::lookup_interface_method(Register Rklass, 2212 Register Rintf, 2213 RegisterOrConstant itable_index, 2214 Register method_result, 2215 Register Rscan, 2216 Register Rtmp, 2217 Label& L_no_such_interface) { 2218 2219 assert_different_registers(Rklass, Rintf, Rscan, Rtmp); 2220 2221 const int entry_size = itableOffsetEntry::size() * HeapWordSize; 2222 assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience"); 2223 2224 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 2225 const int base = in_bytes(Klass::vtable_start_offset()); 2226 const int scale = exact_log2(vtableEntry::size_in_bytes()); 2227 ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable 2228 add(Rscan, Rklass, base); 2229 add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale)); 2230 2231 // Search through the itable for an interface equal to incoming Rintf 2232 // itable looks like [intface][offset][intface][offset][intface][offset] 2233 2234 Label loop; 2235 bind(loop); 2236 ldr(Rtmp, Address(Rscan, entry_size, post_indexed)); 2237 #ifdef AARCH64 2238 Label found; 2239 cmp(Rtmp, Rintf); 2240 b(found, eq); 2241 cbnz(Rtmp, loop); 2242 #else 2243 cmp(Rtmp, Rintf); // set ZF and CF if interface is found 2244 cmn(Rtmp, 0, ne); // check if tmp == 0 and clear CF if it is 2245 b(loop, ne); 2246 #endif // AARCH64 2247 2248 #ifdef AARCH64 2249 b(L_no_such_interface); 2250 bind(found); 2251 #else 2252 // CF == 0 means we reached the end of itable without finding icklass 2253 b(L_no_such_interface, cc); 2254 #endif // !AARCH64 2255 2256 if (method_result != noreg) { 2257 // Interface found at previous position of Rscan, now load the method 2258 ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size)); 2259 if (itable_index.is_register()) { 2260 add(Rtmp, Rtmp, Rklass); // Add offset to Klass* 2261 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); 2262 assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); 2263 ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register())); 2264 } else { 2265 int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() + 2266 itableMethodEntry::method_offset_in_bytes(); 2267 add_slow(method_result, Rklass, method_offset); 2268 ldr(method_result, Address(method_result, Rtmp)); 2269 } 2270 } 2271 } 2272 2273 #ifdef COMPILER2 2274 // TODO: 8 bytes at a time? pre-fetch? 2275 // Compare char[] arrays aligned to 4 bytes. 2276 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, 2277 Register limit, Register result, 2278 Register chr1, Register chr2, Label& Ldone) { 2279 Label Lvector, Lloop; 2280 2281 // Note: limit contains number of bytes (2*char_elements) != 0. 2282 tst(limit, 0x2); // trailing character ? 2283 b(Lvector, eq); 2284 2285 // compare the trailing char 2286 sub(limit, limit, sizeof(jchar)); 2287 ldrh(chr1, Address(ary1, limit)); 2288 ldrh(chr2, Address(ary2, limit)); 2289 cmp(chr1, chr2); 2290 mov(result, 0, ne); // not equal 2291 b(Ldone, ne); 2292 2293 // only one char ? 2294 tst(limit, limit); 2295 mov(result, 1, eq); 2296 b(Ldone, eq); 2297 2298 // word by word compare, dont't need alignment check 2299 bind(Lvector); 2300 2301 // Shift ary1 and ary2 to the end of the arrays, negate limit 2302 add(ary1, limit, ary1); 2303 add(ary2, limit, ary2); 2304 neg(limit, limit); 2305 2306 bind(Lloop); 2307 ldr_u32(chr1, Address(ary1, limit)); 2308 ldr_u32(chr2, Address(ary2, limit)); 2309 cmp_32(chr1, chr2); 2310 mov(result, 0, ne); // not equal 2311 b(Ldone, ne); 2312 adds(limit, limit, 2*sizeof(jchar)); 2313 b(Lloop, ne); 2314 2315 // Caller should set it: 2316 // mov(result_reg, 1); //equal 2317 } 2318 #endif 2319 2320 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { 2321 mov_slow(tmpreg1, counter_addr); 2322 ldr_s32(tmpreg2, tmpreg1); 2323 add_32(tmpreg2, tmpreg2, 1); 2324 str_32(tmpreg2, tmpreg1); 2325 } 2326 2327 void MacroAssembler::floating_cmp(Register dst) { 2328 #ifdef AARCH64 2329 NOT_TESTED(); 2330 cset(dst, gt); // 1 if '>', else 0 2331 csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 2332 #else 2333 vmrs(dst, FPSCR); 2334 orr(dst, dst, 0x08000000); 2335 eor(dst, dst, AsmOperand(dst, lsl, 3)); 2336 mov(dst, AsmOperand(dst, asr, 30)); 2337 #endif 2338 } 2339 2340 void MacroAssembler::restore_default_fp_mode() { 2341 #ifdef AARCH64 2342 msr(SysReg_FPCR, ZR); 2343 #else 2344 #ifndef __SOFTFP__ 2345 // Round to Near mode, IEEE compatible, masked exceptions 2346 mov(Rtemp, 0); 2347 vmsr(FPSCR, Rtemp); 2348 #endif // !__SOFTFP__ 2349 #endif // AARCH64 2350 } 2351 2352 #ifndef AARCH64 2353 // 24-bit word range == 26-bit byte range 2354 bool check26(int offset) { 2355 // this could be simplified, but it mimics encoding and decoding 2356 // an actual branch insrtuction 2357 int off1 = offset << 6 >> 8; 2358 int encoded = off1 & ((1<<24)-1); 2359 int decoded = encoded << 8 >> 6; 2360 return offset == decoded; 2361 } 2362 #endif // !AARCH64 2363 2364 // Perform some slight adjustments so the default 32MB code cache 2365 // is fully reachable. 2366 static inline address first_cache_address() { 2367 return CodeCache::low_bound() + sizeof(HeapBlock::Header); 2368 } 2369 static inline address last_cache_address() { 2370 return CodeCache::high_bound() - Assembler::InstructionSize; 2371 } 2372 2373 #ifdef AARCH64 2374 // Can we reach target using ADRP? 2375 bool MacroAssembler::page_reachable_from_cache(address target) { 2376 intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; 2377 intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; 2378 intptr_t addr = (intptr_t)target & ~0xfff; 2379 2380 intptr_t loffset = addr - cl; 2381 intptr_t hoffset = addr - ch; 2382 return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); 2383 } 2384 #endif 2385 2386 // Can we reach target using unconditional branch or call from anywhere 2387 // in the code cache (because code can be relocated)? 2388 bool MacroAssembler::_reachable_from_cache(address target) { 2389 #ifdef __thumb__ 2390 if ((1 & (intptr_t)target) != 0) { 2391 // Return false to avoid 'b' if we need switching to THUMB mode. 2392 return false; 2393 } 2394 #endif 2395 2396 address cl = first_cache_address(); 2397 address ch = last_cache_address(); 2398 2399 if (ForceUnreachable) { 2400 // Only addresses from CodeCache can be treated as reachable. 2401 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { 2402 return false; 2403 } 2404 } 2405 2406 intptr_t loffset = (intptr_t)target - (intptr_t)cl; 2407 intptr_t hoffset = (intptr_t)target - (intptr_t)ch; 2408 2409 #ifdef AARCH64 2410 return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); 2411 #else 2412 return check26(loffset - 8) && check26(hoffset - 8); 2413 #endif 2414 } 2415 2416 bool MacroAssembler::reachable_from_cache(address target) { 2417 assert(CodeCache::contains(pc()), "not supported"); 2418 return _reachable_from_cache(target); 2419 } 2420 2421 // Can we reach the entire code cache from anywhere else in the code cache? 2422 bool MacroAssembler::_cache_fully_reachable() { 2423 address cl = first_cache_address(); 2424 address ch = last_cache_address(); 2425 return _reachable_from_cache(cl) && _reachable_from_cache(ch); 2426 } 2427 2428 bool MacroAssembler::cache_fully_reachable() { 2429 assert(CodeCache::contains(pc()), "not supported"); 2430 return _cache_fully_reachable(); 2431 } 2432 2433 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2434 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2435 if (reachable_from_cache(target)) { 2436 relocate(rtype); 2437 b(target NOT_AARCH64_ARG(cond)); 2438 return; 2439 } 2440 2441 // Note: relocate is not needed for the code below, 2442 // encoding targets in absolute format. 2443 if (ignore_non_patchable_relocations()) { 2444 rtype = relocInfo::none; 2445 } 2446 2447 #ifdef AARCH64 2448 assert (scratch != noreg, "should be specified"); 2449 InlinedAddress address_literal(target, rtype); 2450 ldr_literal(scratch, address_literal); 2451 br(scratch); 2452 int off = offset(); 2453 bind_literal(address_literal); 2454 #ifdef COMPILER2 2455 if (offset() - off == wordSize) { 2456 // no padding, so insert nop for worst-case sizing 2457 nop(); 2458 } 2459 #endif 2460 #else 2461 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { 2462 // Note: this version cannot be (atomically) patched 2463 mov_slow(scratch, (intptr_t)target, cond); 2464 bx(scratch, cond); 2465 } else { 2466 Label skip; 2467 InlinedAddress address_literal(target); 2468 if (cond != al) { 2469 b(skip, inverse(cond)); 2470 } 2471 relocate(rtype); 2472 ldr_literal(PC, address_literal); 2473 bind_literal(address_literal); 2474 bind(skip); 2475 } 2476 #endif // AARCH64 2477 } 2478 2479 // Similar to jump except that: 2480 // - near calls are valid only if any destination in the cache is near 2481 // - no movt/movw (not atomically patchable) 2482 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2483 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2484 if (cache_fully_reachable()) { 2485 // Note: this assumes that all possible targets (the initial one 2486 // and the addressed patched to) are all in the code cache. 2487 assert(CodeCache::contains(target), "target might be too far"); 2488 relocate(rtype); 2489 b(target NOT_AARCH64_ARG(cond)); 2490 return; 2491 } 2492 2493 // Discard the relocation information if not needed for CacheCompiledCode 2494 // since the next encodings are all in absolute format. 2495 if (ignore_non_patchable_relocations()) { 2496 rtype = relocInfo::none; 2497 } 2498 2499 #ifdef AARCH64 2500 assert (scratch != noreg, "should be specified"); 2501 InlinedAddress address_literal(target); 2502 relocate(rtype); 2503 ldr_literal(scratch, address_literal); 2504 br(scratch); 2505 int off = offset(); 2506 bind_literal(address_literal); 2507 #ifdef COMPILER2 2508 if (offset() - off == wordSize) { 2509 // no padding, so insert nop for worst-case sizing 2510 nop(); 2511 } 2512 #endif 2513 #else 2514 { 2515 Label skip; 2516 InlinedAddress address_literal(target); 2517 if (cond != al) { 2518 b(skip, inverse(cond)); 2519 } 2520 relocate(rtype); 2521 ldr_literal(PC, address_literal); 2522 bind_literal(address_literal); 2523 bind(skip); 2524 } 2525 #endif // AARCH64 2526 } 2527 2528 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { 2529 Register scratch = LR; 2530 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); 2531 if (reachable_from_cache(target)) { 2532 relocate(rspec); 2533 bl(target NOT_AARCH64_ARG(cond)); 2534 return; 2535 } 2536 2537 // Note: relocate is not needed for the code below, 2538 // encoding targets in absolute format. 2539 if (ignore_non_patchable_relocations()) { 2540 // This assumes the information was needed only for relocating the code. 2541 rspec = RelocationHolder::none; 2542 } 2543 2544 #ifndef AARCH64 2545 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { 2546 // Note: this version cannot be (atomically) patched 2547 mov_slow(scratch, (intptr_t)target, cond); 2548 blx(scratch, cond); 2549 return; 2550 } 2551 #endif 2552 2553 { 2554 Label ret_addr; 2555 #ifndef AARCH64 2556 if (cond != al) { 2557 b(ret_addr, inverse(cond)); 2558 } 2559 #endif 2560 2561 2562 #ifdef AARCH64 2563 // TODO-AARCH64: make more optimal implementation 2564 // [ Keep in sync with MacroAssembler::call_size ] 2565 assert(rspec.type() == relocInfo::none, "call reloc not implemented"); 2566 mov_slow(scratch, target); 2567 blr(scratch); 2568 #else 2569 InlinedAddress address_literal(target); 2570 relocate(rspec); 2571 adr(LR, ret_addr); 2572 ldr_literal(PC, address_literal); 2573 2574 bind_literal(address_literal); 2575 bind(ret_addr); 2576 #endif 2577 } 2578 } 2579 2580 #if defined(AARCH64) && defined(COMPILER2) 2581 int MacroAssembler::call_size(address target, bool far, bool patchable) { 2582 // FIXME: mov_slow is variable-length 2583 if (!far) return 1; // bl 2584 if (patchable) return 2; // ldr; blr 2585 return instr_count_for_mov_slow((intptr_t)target) + 1; 2586 } 2587 #endif 2588 2589 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { 2590 assert(rspec.type() == relocInfo::static_call_type || 2591 rspec.type() == relocInfo::none || 2592 rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); 2593 2594 // Always generate the relocation information, needed for patching 2595 relocate(rspec); // used by NativeCall::is_call_before() 2596 if (cache_fully_reachable()) { 2597 // Note: this assumes that all possible targets (the initial one 2598 // and the addresses patched to) are all in the code cache. 2599 assert(CodeCache::contains(target), "target might be too far"); 2600 bl(target); 2601 } else { 2602 #if defined(AARCH64) && defined(COMPILER2) 2603 if (c2) { 2604 // return address needs to match call_size(). 2605 // no need to trash Rtemp 2606 int off = offset(); 2607 Label skip_literal; 2608 InlinedAddress address_literal(target); 2609 ldr_literal(LR, address_literal); 2610 blr(LR); 2611 int ret_addr_offset = offset(); 2612 assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); 2613 b(skip_literal); 2614 int off2 = offset(); 2615 bind_literal(address_literal); 2616 if (offset() - off2 == wordSize) { 2617 // no padding, so insert nop for worst-case sizing 2618 nop(); 2619 } 2620 bind(skip_literal); 2621 return ret_addr_offset; 2622 } 2623 #endif 2624 Label ret_addr; 2625 InlinedAddress address_literal(target); 2626 #ifdef AARCH64 2627 ldr_literal(Rtemp, address_literal); 2628 adr(LR, ret_addr); 2629 br(Rtemp); 2630 #else 2631 adr(LR, ret_addr); 2632 ldr_literal(PC, address_literal); 2633 #endif 2634 bind_literal(address_literal); 2635 bind(ret_addr); 2636 } 2637 return offset(); 2638 } 2639 2640 // ((OopHandle)result).resolve(); 2641 void MacroAssembler::resolve_oop_handle(Register result) { 2642 // OopHandle::resolve is an indirection. 2643 ldr(result, Address(result, 0)); 2644 } 2645 2646 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { 2647 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2648 ldr(tmp, Address(method, Method::const_offset())); 2649 ldr(tmp, Address(tmp, ConstMethod::constants_offset())); 2650 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); 2651 ldr(mirror, Address(tmp, mirror_offset)); 2652 resolve_oop_handle(mirror); 2653 } 2654 2655 2656 /////////////////////////////////////////////////////////////////////////////// 2657 2658 // Compressed pointers 2659 2660 #ifdef AARCH64 2661 2662 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { 2663 if (UseCompressedClassPointers) { 2664 ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2665 decode_klass_not_null(dst_klass); 2666 } else { 2667 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2668 } 2669 } 2670 2671 #else 2672 2673 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { 2674 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); 2675 } 2676 2677 #endif // AARCH64 2678 2679 // Blows src_klass. 2680 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { 2681 #ifdef AARCH64 2682 if (UseCompressedClassPointers) { 2683 assert(src_klass != dst_oop, "not enough registers"); 2684 encode_klass_not_null(src_klass); 2685 str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2686 return; 2687 } 2688 #endif // AARCH64 2689 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2690 } 2691 2692 #ifdef AARCH64 2693 2694 void MacroAssembler::store_klass_gap(Register dst) { 2695 if (UseCompressedClassPointers) { 2696 str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2697 } 2698 } 2699 2700 #endif // AARCH64 2701 2702 2703 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { 2704 access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3); 2705 } 2706 2707 // Blows src and flags. 2708 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { 2709 access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false); 2710 } 2711 2712 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { 2713 access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true); 2714 } 2715 2716 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, 2717 Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) { 2718 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2719 decorators = AccessInternal::decorator_fixup(decorators); 2720 bool as_raw = (decorators & AS_RAW) != 0; 2721 if (as_raw) { 2722 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); 2723 } else { 2724 bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); 2725 } 2726 } 2727 2728 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, 2729 Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) { 2730 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 2731 decorators = AccessInternal::decorator_fixup(decorators); 2732 bool as_raw = (decorators & AS_RAW) != 0; 2733 if (as_raw) { 2734 bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null); 2735 } else { 2736 bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null); 2737 } 2738 } 2739 2740 2741 #ifdef AARCH64 2742 2743 // Algorithm must match oop.inline.hpp encode_heap_oop. 2744 void MacroAssembler::encode_heap_oop(Register dst, Register src) { 2745 // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. 2746 // Update it at modifications. 2747 assert (UseCompressedOops, "must be compressed"); 2748 assert (Universe::heap() != NULL, "java heap should be initialized"); 2749 #ifdef ASSERT 2750 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2751 #endif 2752 verify_oop(src); 2753 if (Universe::narrow_oop_base() == NULL) { 2754 if (Universe::narrow_oop_shift() != 0) { 2755 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2756 _lsr(dst, src, Universe::narrow_oop_shift()); 2757 } else if (dst != src) { 2758 mov(dst, src); 2759 } 2760 } else { 2761 tst(src, src); 2762 csel(dst, Rheap_base, src, eq); 2763 sub(dst, dst, Rheap_base); 2764 if (Universe::narrow_oop_shift() != 0) { 2765 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2766 _lsr(dst, dst, Universe::narrow_oop_shift()); 2767 } 2768 } 2769 } 2770 2771 // Same algorithm as oop.inline.hpp decode_heap_oop. 2772 void MacroAssembler::decode_heap_oop(Register dst, Register src) { 2773 #ifdef ASSERT 2774 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2775 #endif 2776 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2777 if (Universe::narrow_oop_base() != NULL) { 2778 tst(src, src); 2779 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2780 csel(dst, dst, ZR, ne); 2781 } else { 2782 _lsl(dst, src, Universe::narrow_oop_shift()); 2783 } 2784 verify_oop(dst); 2785 } 2786 2787 #ifdef COMPILER2 2788 // Algorithm must match oop.inline.hpp encode_heap_oop. 2789 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule 2790 // must be changed. 2791 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2792 assert (UseCompressedOops, "must be compressed"); 2793 assert (Universe::heap() != NULL, "java heap should be initialized"); 2794 #ifdef ASSERT 2795 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2796 #endif 2797 verify_oop(src); 2798 if (Universe::narrow_oop_base() == NULL) { 2799 if (Universe::narrow_oop_shift() != 0) { 2800 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2801 _lsr(dst, src, Universe::narrow_oop_shift()); 2802 } else if (dst != src) { 2803 mov(dst, src); 2804 } 2805 } else { 2806 sub(dst, src, Rheap_base); 2807 if (Universe::narrow_oop_shift() != 0) { 2808 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2809 _lsr(dst, dst, Universe::narrow_oop_shift()); 2810 } 2811 } 2812 } 2813 2814 // Same algorithm as oops.inline.hpp decode_heap_oop. 2815 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule 2816 // must be changed. 2817 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2818 #ifdef ASSERT 2819 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2820 #endif 2821 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2822 if (Universe::narrow_oop_base() != NULL) { 2823 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2824 } else { 2825 _lsl(dst, src, Universe::narrow_oop_shift()); 2826 } 2827 verify_oop(dst); 2828 } 2829 2830 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 2831 assert(UseCompressedClassPointers, "should only be used for compressed header"); 2832 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 2833 int klass_index = oop_recorder()->find_index(k); 2834 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 2835 2836 // Relocation with special format (see relocInfo_arm.hpp). 2837 relocate(rspec); 2838 narrowKlass encoded_k = Klass::encode_klass(k); 2839 movz(dst, encoded_k & 0xffff, 0); 2840 movk(dst, (encoded_k >> 16) & 0xffff, 16); 2841 } 2842 2843 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 2844 assert(UseCompressedOops, "should only be used for compressed header"); 2845 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 2846 int oop_index = oop_recorder()->find_index(obj); 2847 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2848 2849 relocate(rspec); 2850 movz(dst, 0xffff, 0); 2851 movk(dst, 0xffff, 16); 2852 } 2853 2854 #endif // COMPILER2 2855 // Must preserve condition codes, or C2 encodeKlass_not_null rule 2856 // must be changed. 2857 void MacroAssembler::encode_klass_not_null(Register r) { 2858 if (Universe::narrow_klass_base() != NULL) { 2859 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 2860 assert(r != Rheap_base, "Encoding a klass in Rheap_base"); 2861 mov_slow(Rheap_base, Universe::narrow_klass_base()); 2862 sub(r, r, Rheap_base); 2863 } 2864 if (Universe::narrow_klass_shift() != 0) { 2865 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2866 _lsr(r, r, Universe::narrow_klass_shift()); 2867 } 2868 if (Universe::narrow_klass_base() != NULL) { 2869 reinit_heapbase(); 2870 } 2871 } 2872 2873 // Must preserve condition codes, or C2 encodeKlass_not_null rule 2874 // must be changed. 2875 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 2876 if (dst == src) { 2877 encode_klass_not_null(src); 2878 return; 2879 } 2880 if (Universe::narrow_klass_base() != NULL) { 2881 mov_slow(dst, (int64_t)Universe::narrow_klass_base()); 2882 sub(dst, src, dst); 2883 if (Universe::narrow_klass_shift() != 0) { 2884 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2885 _lsr(dst, dst, Universe::narrow_klass_shift()); 2886 } 2887 } else { 2888 if (Universe::narrow_klass_shift() != 0) { 2889 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2890 _lsr(dst, src, Universe::narrow_klass_shift()); 2891 } else { 2892 mov(dst, src); 2893 } 2894 } 2895 } 2896 2897 // Function instr_count_for_decode_klass_not_null() counts the instructions 2898 // generated by decode_klass_not_null(register r) and reinit_heapbase(), 2899 // when (Universe::heap() != NULL). Hence, if the instructions they 2900 // generate change, then this method needs to be updated. 2901 int MacroAssembler::instr_count_for_decode_klass_not_null() { 2902 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 2903 assert(Universe::heap() != NULL, "java heap should be initialized"); 2904 if (Universe::narrow_klass_base() != NULL) { 2905 return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow 2906 1 + // add 2907 instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow 2908 } else { 2909 if (Universe::narrow_klass_shift() != 0) { 2910 return 1; 2911 } 2912 } 2913 return 0; 2914 } 2915 2916 // Must preserve condition codes, or C2 decodeKlass_not_null rule 2917 // must be changed. 2918 void MacroAssembler::decode_klass_not_null(Register r) { 2919 int off = offset(); 2920 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2921 assert(Universe::heap() != NULL, "java heap should be initialized"); 2922 assert(r != Rheap_base, "Decoding a klass in Rheap_base"); 2923 // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. 2924 // Also do not verify_oop as this is called by verify_oop. 2925 if (Universe::narrow_klass_base() != NULL) { 2926 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 2927 mov_slow(Rheap_base, Universe::narrow_klass_base()); 2928 add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); 2929 reinit_heapbase(); 2930 } else { 2931 if (Universe::narrow_klass_shift() != 0) { 2932 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2933 _lsl(r, r, Universe::narrow_klass_shift()); 2934 } 2935 } 2936 assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); 2937 } 2938 2939 // Must preserve condition codes, or C2 decodeKlass_not_null rule 2940 // must be changed. 2941 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 2942 if (src == dst) { 2943 decode_klass_not_null(src); 2944 return; 2945 } 2946 2947 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2948 assert(Universe::heap() != NULL, "java heap should be initialized"); 2949 assert(src != Rheap_base, "Decoding a klass in Rheap_base"); 2950 assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); 2951 // Also do not verify_oop as this is called by verify_oop. 2952 if (Universe::narrow_klass_base() != NULL) { 2953 mov_slow(dst, Universe::narrow_klass_base()); 2954 add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); 2955 } else { 2956 _lsl(dst, src, Universe::narrow_klass_shift()); 2957 } 2958 } 2959 2960 2961 void MacroAssembler::reinit_heapbase() { 2962 if (UseCompressedOops || UseCompressedClassPointers) { 2963 if (Universe::heap() != NULL) { 2964 mov_slow(Rheap_base, Universe::narrow_ptrs_base()); 2965 } else { 2966 ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); 2967 } 2968 } 2969 } 2970 2971 #ifdef ASSERT 2972 void MacroAssembler::verify_heapbase(const char* msg) { 2973 // This code pattern is matched in NativeIntruction::skip_verify_heapbase. 2974 // Update it at modifications. 2975 assert (UseCompressedOops, "should be compressed"); 2976 assert (Universe::heap() != NULL, "java heap should be initialized"); 2977 if (CheckCompressedOops) { 2978 Label ok; 2979 str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 2980 raw_push(Rtemp, ZR); 2981 mrs(Rtemp, Assembler::SysReg_NZCV); 2982 str(Rtemp, Address(SP, 1 * wordSize)); 2983 mov_slow(Rtemp, Universe::narrow_ptrs_base()); 2984 cmp(Rheap_base, Rtemp); 2985 b(ok, eq); 2986 stop(msg); 2987 bind(ok); 2988 ldr(Rtemp, Address(SP, 1 * wordSize)); 2989 msr(Assembler::SysReg_NZCV, Rtemp); 2990 raw_pop(Rtemp, ZR); 2991 str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 2992 } 2993 } 2994 #endif // ASSERT 2995 2996 #endif // AARCH64 2997 2998 #ifdef COMPILER2 2999 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3000 { 3001 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3002 3003 Register Rmark = Rscratch2; 3004 3005 assert(Roop != Rscratch, ""); 3006 assert(Roop != Rmark, ""); 3007 assert(Rbox != Rscratch, ""); 3008 assert(Rbox != Rmark, ""); 3009 3010 Label fast_lock, done; 3011 3012 if (UseBiasedLocking && !UseOptoBiasInlining) { 3013 Label failed; 3014 #ifdef AARCH64 3015 biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); 3016 #else 3017 biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); 3018 #endif 3019 bind(failed); 3020 } 3021 3022 ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); 3023 tst(Rmark, markOopDesc::unlocked_value); 3024 b(fast_lock, ne); 3025 3026 // Check for recursive lock 3027 // See comments in InterpreterMacroAssembler::lock_object for 3028 // explanations on the fast recursive locking check. 3029 #ifdef AARCH64 3030 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); 3031 Assembler::LogicalImmediate imm(mask, false); 3032 mov(Rscratch, SP); 3033 sub(Rscratch, Rmark, Rscratch); 3034 ands(Rscratch, Rscratch, imm); 3035 // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107) 3036 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3037 b(done); 3038 3039 #else 3040 // -1- test low 2 bits 3041 movs(Rscratch, AsmOperand(Rmark, lsl, 30)); 3042 // -2- test (hdr - SP) if the low two bits are 0 3043 sub(Rscratch, Rmark, SP, eq); 3044 movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); 3045 // If still 'eq' then recursive locking OK 3046 // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107) 3047 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3048 b(done); 3049 #endif 3050 3051 bind(fast_lock); 3052 str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3053 3054 bool allow_fallthrough_on_failure = true; 3055 bool one_shot = true; 3056 cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3057 3058 bind(done); 3059 3060 } 3061 3062 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3063 { 3064 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3065 3066 Register Rmark = Rscratch2; 3067 3068 assert(Roop != Rscratch, ""); 3069 assert(Roop != Rmark, ""); 3070 assert(Rbox != Rscratch, ""); 3071 assert(Rbox != Rmark, ""); 3072 3073 Label done; 3074 3075 if (UseBiasedLocking && !UseOptoBiasInlining) { 3076 biased_locking_exit(Roop, Rscratch, done); 3077 } 3078 3079 ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3080 // If hdr is NULL, we've got recursive locking and there's nothing more to do 3081 cmp(Rmark, 0); 3082 b(done, eq); 3083 3084 // Restore the object header 3085 bool allow_fallthrough_on_failure = true; 3086 bool one_shot = true; 3087 cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3088 3089 bind(done); 3090 3091 } 3092 #endif // COMPILER2