1 /* 2 * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "asm/macroAssembler.hpp" 29 #include "ci/ciEnv.hpp" 30 #include "code/nativeInst.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/cardTableModRefBS.hpp" 33 #include "gc/shared/collectedHeap.inline.hpp" 34 #include "interpreter/interpreter.hpp" 35 #include "memory/resourceArea.hpp" 36 #include "oops/klass.inline.hpp" 37 #include "prims/methodHandles.hpp" 38 #include "runtime/biasedLocking.hpp" 39 #include "runtime/interfaceSupport.hpp" 40 #include "runtime/objectMonitor.hpp" 41 #include "runtime/os.hpp" 42 #include "runtime/sharedRuntime.hpp" 43 #include "runtime/stubRoutines.hpp" 44 #include "utilities/macros.hpp" 45 #if INCLUDE_ALL_GCS 46 #include "gc/g1/g1CollectedHeap.inline.hpp" 47 #include "gc/g1/g1SATBCardTableModRefBS.hpp" 48 #include "gc/g1/heapRegion.hpp" 49 #endif 50 51 // Implementation of AddressLiteral 52 53 void AddressLiteral::set_rspec(relocInfo::relocType rtype) { 54 switch (rtype) { 55 case relocInfo::oop_type: 56 // Oops are a special case. Normally they would be their own section 57 // but in cases like icBuffer they are literals in the code stream that 58 // we don't have a section for. We use none so that we get a literal address 59 // which is always patchable. 60 break; 61 case relocInfo::external_word_type: 62 _rspec = external_word_Relocation::spec(_target); 63 break; 64 case relocInfo::internal_word_type: 65 _rspec = internal_word_Relocation::spec(_target); 66 break; 67 case relocInfo::opt_virtual_call_type: 68 _rspec = opt_virtual_call_Relocation::spec(); 69 break; 70 case relocInfo::static_call_type: 71 _rspec = static_call_Relocation::spec(); 72 break; 73 case relocInfo::runtime_call_type: 74 _rspec = runtime_call_Relocation::spec(); 75 break; 76 case relocInfo::poll_type: 77 case relocInfo::poll_return_type: 78 _rspec = Relocation::spec_simple(rtype); 79 break; 80 case relocInfo::none: 81 break; 82 default: 83 ShouldNotReachHere(); 84 break; 85 } 86 } 87 88 // Initially added to the Assembler interface as a pure virtual: 89 // RegisterConstant delayed_value(..) 90 // for: 91 // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) 92 // this was subsequently modified to its present name and return type 93 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 94 Register tmp, 95 int offset) { 96 ShouldNotReachHere(); 97 return RegisterOrConstant(-1); 98 } 99 100 101 #ifdef AARCH64 102 // Note: ARM32 version is OS dependent 103 void MacroAssembler::breakpoint(AsmCondition cond) { 104 if (cond == al) { 105 brk(); 106 } else { 107 Label L; 108 b(L, inverse(cond)); 109 brk(); 110 bind(L); 111 } 112 } 113 #endif // AARCH64 114 115 116 // virtual method calling 117 void MacroAssembler::lookup_virtual_method(Register recv_klass, 118 Register vtable_index, 119 Register method_result) { 120 const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); 121 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 122 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); 123 ldr(method_result, Address(recv_klass, base_offset)); 124 } 125 126 127 // Simplified, combined version, good for typical uses. 128 // Falls through on failure. 129 void MacroAssembler::check_klass_subtype(Register sub_klass, 130 Register super_klass, 131 Register temp_reg, 132 Register temp_reg2, 133 Register temp_reg3, 134 Label& L_success) { 135 Label L_failure; 136 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); 137 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); 138 bind(L_failure); 139 }; 140 141 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 142 Register super_klass, 143 Register temp_reg, 144 Register temp_reg2, 145 Label* L_success, 146 Label* L_failure, 147 Label* L_slow_path) { 148 149 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); 150 const Register super_check_offset = temp_reg2; 151 152 Label L_fallthrough; 153 int label_nulls = 0; 154 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 155 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 156 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 157 assert(label_nulls <= 1, "at most one NULL in the batch"); 158 159 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 160 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 161 Address super_check_offset_addr(super_klass, sco_offset); 162 163 // If the pointers are equal, we are done (e.g., String[] elements). 164 // This self-check enables sharing of secondary supertype arrays among 165 // non-primary types such as array-of-interface. Otherwise, each such 166 // type would need its own customized SSA. 167 // We move this check to the front of the fast path because many 168 // type checks are in fact trivially successful in this manner, 169 // so we get a nicely predicted branch right at the start of the check. 170 cmp(sub_klass, super_klass); 171 b(*L_success, eq); 172 173 // Check the supertype display: 174 ldr_u32(super_check_offset, super_check_offset_addr); 175 176 Address super_check_addr(sub_klass, super_check_offset); 177 ldr(temp_reg, super_check_addr); 178 cmp(super_klass, temp_reg); // load displayed supertype 179 180 // This check has worked decisively for primary supers. 181 // Secondary supers are sought in the super_cache ('super_cache_addr'). 182 // (Secondary supers are interfaces and very deeply nested subtypes.) 183 // This works in the same check above because of a tricky aliasing 184 // between the super_cache and the primary super display elements. 185 // (The 'super_check_addr' can address either, as the case requires.) 186 // Note that the cache is updated below if it does not help us find 187 // what we need immediately. 188 // So if it was a primary super, we can just fail immediately. 189 // Otherwise, it's the slow path for us (no success at this point). 190 191 b(*L_success, eq); 192 cmp_32(super_check_offset, sc_offset); 193 if (L_failure == &L_fallthrough) { 194 b(*L_slow_path, eq); 195 } else { 196 b(*L_failure, ne); 197 if (L_slow_path != &L_fallthrough) { 198 b(*L_slow_path); 199 } 200 } 201 202 bind(L_fallthrough); 203 } 204 205 206 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 207 Register super_klass, 208 Register temp_reg, 209 Register temp2_reg, 210 Register temp3_reg, 211 Label* L_success, 212 Label* L_failure, 213 bool set_cond_codes) { 214 #ifdef AARCH64 215 NOT_IMPLEMENTED(); 216 #else 217 // Note: if used by code that expects a register to be 0 on success, 218 // this register must be temp_reg and set_cond_codes must be true 219 220 Register saved_reg = noreg; 221 222 // get additional tmp registers 223 if (temp3_reg == noreg) { 224 saved_reg = temp3_reg = LR; 225 push(saved_reg); 226 } 227 228 assert(temp2_reg != noreg, "need all the temporary registers"); 229 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); 230 231 Register cmp_temp = temp_reg; 232 Register scan_temp = temp3_reg; 233 Register count_temp = temp2_reg; 234 235 Label L_fallthrough; 236 int label_nulls = 0; 237 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 238 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 239 assert(label_nulls <= 1, "at most one NULL in the batch"); 240 241 // a couple of useful fields in sub_klass: 242 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 243 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 244 Address secondary_supers_addr(sub_klass, ss_offset); 245 Address super_cache_addr( sub_klass, sc_offset); 246 247 #ifndef PRODUCT 248 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); 249 #endif 250 251 // We will consult the secondary-super array. 252 ldr(scan_temp, Address(sub_klass, ss_offset)); 253 254 assert(! UseCompressedOops, "search_key must be the compressed super_klass"); 255 // else search_key is the 256 Register search_key = super_klass; 257 258 // Load the array length. 259 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes())); 260 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes()); 261 262 add(count_temp, count_temp, 1); 263 264 Label L_loop, L_setnz_and_fail, L_fail; 265 266 // Top of search loop 267 bind(L_loop); 268 // Notes: 269 // scan_temp starts at the array elements 270 // count_temp is 1+size 271 subs(count_temp, count_temp, 1); 272 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { 273 // direct jump to L_failure if failed and no cleanup needed 274 b(*L_failure, eq); // not found and 275 } else { 276 b(L_fail, eq); // not found in the array 277 } 278 279 // Load next super to check 280 // In the array of super classes elements are pointer sized. 281 int element_size = wordSize; 282 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); 283 284 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list 285 subs(cmp_temp, cmp_temp, search_key); 286 287 // A miss means we are NOT a subtype and need to keep looping 288 b(L_loop, ne); 289 290 // Falling out the bottom means we found a hit; we ARE a subtype 291 292 // Note: temp_reg/cmp_temp is already 0 and flag Z is set 293 294 // Success. Cache the super we found and proceed in triumph. 295 str(super_klass, Address(sub_klass, sc_offset)); 296 297 if (saved_reg != noreg) { 298 // Return success 299 pop(saved_reg); 300 } 301 302 b(*L_success); 303 304 bind(L_fail); 305 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here 306 if (set_cond_codes) { 307 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed 308 } 309 if (saved_reg != noreg) { 310 pop(saved_reg); 311 } 312 if (L_failure != &L_fallthrough) { 313 b(*L_failure); 314 } 315 316 bind(L_fallthrough); 317 #endif 318 } 319 320 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. 321 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { 322 assert_different_registers(params_base, params_count); 323 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); 324 return Address(tmp, -Interpreter::stackElementSize); 325 } 326 327 328 void MacroAssembler::align(int modulus) { 329 while (offset() % modulus != 0) { 330 nop(); 331 } 332 } 333 334 int MacroAssembler::set_last_Java_frame(Register last_java_sp, 335 Register last_java_fp, 336 bool save_last_java_pc, 337 Register tmp) { 338 int pc_offset; 339 if (last_java_fp != noreg) { 340 // optional 341 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); 342 _fp_saved = true; 343 } else { 344 _fp_saved = false; 345 } 346 if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM 347 #ifdef AARCH64 348 pc_offset = mov_pc_to(tmp); 349 str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); 350 #else 351 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); 352 pc_offset = offset() + VM_Version::stored_pc_adjustment(); 353 #endif 354 _pc_saved = true; 355 } else { 356 _pc_saved = false; 357 pc_offset = -1; 358 } 359 // According to comment in javaFrameAnchorm SP must be saved last, so that other 360 // entries are valid when SP is set. 361 362 // However, this is probably not a strong constrainst since for instance PC is 363 // sometimes read from the stack at SP... but is pushed later (by the call). Hence, 364 // we now write the fields in the expected order but we have not added a StoreStore 365 // barrier. 366 367 // XXX: if the ordering is really important, PC should always be saved (without forgetting 368 // to update oop_map offsets) and a StoreStore barrier might be needed. 369 370 if (last_java_sp == noreg) { 371 last_java_sp = SP; // always saved 372 } 373 #ifdef AARCH64 374 if (last_java_sp == SP) { 375 mov(tmp, SP); 376 str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); 377 } else { 378 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 379 } 380 #else 381 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 382 #endif 383 384 return pc_offset; // for oopmaps 385 } 386 387 void MacroAssembler::reset_last_Java_frame(Register tmp) { 388 const Register Rzero = zero_register(tmp); 389 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); 390 if (_fp_saved) { 391 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); 392 } 393 if (_pc_saved) { 394 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); 395 } 396 } 397 398 399 // Implementation of call_VM versions 400 401 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { 402 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 403 assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); 404 405 #ifndef AARCH64 406 // Safer to save R9 here since callers may have been written 407 // assuming R9 survives. This is suboptimal but is not worth 408 // optimizing for the few platforms where R9 is scratched. 409 push(RegisterSet(R4) | R9ifScratched); 410 mov(R4, SP); 411 bic(SP, SP, StackAlignmentInBytes - 1); 412 #endif // AARCH64 413 call(entry_point, relocInfo::runtime_call_type); 414 #ifndef AARCH64 415 mov(SP, R4); 416 pop(RegisterSet(R4) | R9ifScratched); 417 #endif // AARCH64 418 } 419 420 421 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 422 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 423 assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); 424 425 const Register tmp = Rtemp; 426 assert_different_registers(oop_result, tmp); 427 428 set_last_Java_frame(SP, FP, true, tmp); 429 430 #ifdef ASSERT 431 AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); 432 #endif // ASSERT 433 434 #ifndef AARCH64 435 #if R9_IS_SCRATCHED 436 // Safer to save R9 here since callers may have been written 437 // assuming R9 survives. This is suboptimal but is not worth 438 // optimizing for the few platforms where R9 is scratched. 439 440 // Note: cannot save R9 above the saved SP (some calls expect for 441 // instance the Java stack top at the saved SP) 442 // => once saved (with set_last_Java_frame), decrease SP before rounding to 443 // ensure the slot at SP will be free for R9). 444 sub(SP, SP, 4); 445 bic(SP, SP, StackAlignmentInBytes - 1); 446 str(R9, Address(SP, 0)); 447 #else 448 bic(SP, SP, StackAlignmentInBytes - 1); 449 #endif // R9_IS_SCRATCHED 450 #endif 451 452 mov(R0, Rthread); 453 call(entry_point, relocInfo::runtime_call_type); 454 455 #ifndef AARCH64 456 #if R9_IS_SCRATCHED 457 ldr(R9, Address(SP, 0)); 458 #endif 459 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); 460 #endif 461 462 reset_last_Java_frame(tmp); 463 464 // C++ interp handles this in the interpreter 465 check_and_handle_popframe(); 466 check_and_handle_earlyret(); 467 468 if (check_exceptions) { 469 // check for pending exceptions 470 ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); 471 #ifdef AARCH64 472 Label L; 473 cbz(tmp, L); 474 mov_pc_to(Rexception_pc); 475 b(StubRoutines::forward_exception_entry()); 476 bind(L); 477 #else 478 cmp(tmp, 0); 479 mov(Rexception_pc, PC, ne); 480 b(StubRoutines::forward_exception_entry(), ne); 481 #endif // AARCH64 482 } 483 484 // get oop result if there is one and reset the value in the thread 485 if (oop_result->is_valid()) { 486 get_vm_result(oop_result, tmp); 487 } 488 } 489 490 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 491 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 492 } 493 494 495 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 496 assert (arg_1 == R1, "fixed register for arg_1"); 497 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 498 } 499 500 501 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 502 assert (arg_1 == R1, "fixed register for arg_1"); 503 assert (arg_2 == R2, "fixed register for arg_2"); 504 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 505 } 506 507 508 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 509 assert (arg_1 == R1, "fixed register for arg_1"); 510 assert (arg_2 == R2, "fixed register for arg_2"); 511 assert (arg_3 == R3, "fixed register for arg_3"); 512 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 513 } 514 515 516 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { 517 // Not used on ARM 518 Unimplemented(); 519 } 520 521 522 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 523 // Not used on ARM 524 Unimplemented(); 525 } 526 527 528 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 529 // Not used on ARM 530 Unimplemented(); 531 } 532 533 534 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 535 // Not used on ARM 536 Unimplemented(); 537 } 538 539 // Raw call, without saving/restoring registers, exception handling, etc. 540 // Mainly used from various stubs. 541 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { 542 const Register tmp = Rtemp; // Rtemp free since scratched by call 543 set_last_Java_frame(SP, FP, true, tmp); 544 #if R9_IS_SCRATCHED 545 if (save_R9_if_scratched) { 546 // Note: Saving also R10 for alignment. 547 push(RegisterSet(R9, R10)); 548 } 549 #endif 550 mov(R0, Rthread); 551 call(entry_point, relocInfo::runtime_call_type); 552 #if R9_IS_SCRATCHED 553 if (save_R9_if_scratched) { 554 pop(RegisterSet(R9, R10)); 555 } 556 #endif 557 reset_last_Java_frame(tmp); 558 } 559 560 void MacroAssembler::call_VM_leaf(address entry_point) { 561 call_VM_leaf_helper(entry_point, 0); 562 } 563 564 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 565 assert (arg_1 == R0, "fixed register for arg_1"); 566 call_VM_leaf_helper(entry_point, 1); 567 } 568 569 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 570 assert (arg_1 == R0, "fixed register for arg_1"); 571 assert (arg_2 == R1, "fixed register for arg_2"); 572 call_VM_leaf_helper(entry_point, 2); 573 } 574 575 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 576 assert (arg_1 == R0, "fixed register for arg_1"); 577 assert (arg_2 == R1, "fixed register for arg_2"); 578 assert (arg_3 == R2, "fixed register for arg_3"); 579 call_VM_leaf_helper(entry_point, 3); 580 } 581 582 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { 583 assert (arg_1 == R0, "fixed register for arg_1"); 584 assert (arg_2 == R1, "fixed register for arg_2"); 585 assert (arg_3 == R2, "fixed register for arg_3"); 586 assert (arg_4 == R3, "fixed register for arg_4"); 587 call_VM_leaf_helper(entry_point, 4); 588 } 589 590 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { 591 assert_different_registers(oop_result, tmp); 592 ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); 593 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); 594 verify_oop(oop_result); 595 } 596 597 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { 598 assert_different_registers(metadata_result, tmp); 599 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); 600 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); 601 } 602 603 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { 604 if (arg2.is_register()) { 605 add(dst, arg1, arg2.as_register()); 606 } else { 607 add(dst, arg1, arg2.as_constant()); 608 } 609 } 610 611 void MacroAssembler::add_slow(Register rd, Register rn, int c) { 612 #ifdef AARCH64 613 if (c == 0) { 614 if (rd != rn) { 615 mov(rd, rn); 616 } 617 return; 618 } 619 if (c < 0) { 620 sub_slow(rd, rn, -c); 621 return; 622 } 623 if (c > right_n_bits(24)) { 624 guarantee(rd != rn, "no large add_slow with only one register"); 625 mov_slow(rd, c); 626 add(rd, rn, rd); 627 } else { 628 int lo = c & right_n_bits(12); 629 int hi = (c >> 12) & right_n_bits(12); 630 if (lo != 0) { 631 add(rd, rn, lo, lsl0); 632 } 633 if (hi != 0) { 634 add(rd, (lo == 0) ? rn : rd, hi, lsl12); 635 } 636 } 637 #else 638 // This function is used in compiler for handling large frame offsets 639 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 640 return sub(rd, rn, (-c)); 641 } 642 int low = c & 0x3fc; 643 if (low != 0) { 644 add(rd, rn, low); 645 rn = rd; 646 } 647 if (c & ~0x3fc) { 648 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); 649 add(rd, rn, c & ~0x3fc); 650 } else if (rd != rn) { 651 assert(c == 0, ""); 652 mov(rd, rn); // need to generate at least one move! 653 } 654 #endif // AARCH64 655 } 656 657 void MacroAssembler::sub_slow(Register rd, Register rn, int c) { 658 #ifdef AARCH64 659 if (c <= 0) { 660 add_slow(rd, rn, -c); 661 return; 662 } 663 if (c > right_n_bits(24)) { 664 guarantee(rd != rn, "no large sub_slow with only one register"); 665 mov_slow(rd, c); 666 sub(rd, rn, rd); 667 } else { 668 int lo = c & right_n_bits(12); 669 int hi = (c >> 12) & right_n_bits(12); 670 if (lo != 0) { 671 sub(rd, rn, lo, lsl0); 672 } 673 if (hi != 0) { 674 sub(rd, (lo == 0) ? rn : rd, hi, lsl12); 675 } 676 } 677 #else 678 // This function is used in compiler for handling large frame offsets 679 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 680 return add(rd, rn, (-c)); 681 } 682 int low = c & 0x3fc; 683 if (low != 0) { 684 sub(rd, rn, low); 685 rn = rd; 686 } 687 if (c & ~0x3fc) { 688 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); 689 sub(rd, rn, c & ~0x3fc); 690 } else if (rd != rn) { 691 assert(c == 0, ""); 692 mov(rd, rn); // need to generate at least one move! 693 } 694 #endif // AARCH64 695 } 696 697 void MacroAssembler::mov_slow(Register rd, address addr) { 698 // do *not* call the non relocated mov_related_address 699 mov_slow(rd, (intptr_t)addr); 700 } 701 702 void MacroAssembler::mov_slow(Register rd, const char *str) { 703 mov_slow(rd, (intptr_t)str); 704 } 705 706 #ifdef AARCH64 707 708 // Common code for mov_slow and instr_count_for_mov_slow. 709 // Returns number of instructions of mov_slow pattern, 710 // generating it if non-null MacroAssembler is given. 711 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { 712 // This code pattern is matched in NativeIntruction::is_mov_slow. 713 // Update it at modifications. 714 715 const intx mask = right_n_bits(16); 716 // 1 movz instruction 717 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 718 if ((c & ~(mask << base_shift)) == 0) { 719 if (masm != NULL) { 720 masm->movz(rd, ((uintx)c) >> base_shift, base_shift); 721 } 722 return 1; 723 } 724 } 725 // 1 movn instruction 726 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 727 if (((~c) & ~(mask << base_shift)) == 0) { 728 if (masm != NULL) { 729 masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); 730 } 731 return 1; 732 } 733 } 734 // 1 orr instruction 735 { 736 LogicalImmediate imm(c, false); 737 if (imm.is_encoded()) { 738 if (masm != NULL) { 739 masm->orr(rd, ZR, imm); 740 } 741 return 1; 742 } 743 } 744 // 1 movz/movn + up to 3 movk instructions 745 int zeroes = 0; 746 int ones = 0; 747 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 748 int part = (c >> base_shift) & mask; 749 if (part == 0) { 750 ++zeroes; 751 } else if (part == mask) { 752 ++ones; 753 } 754 } 755 int def_bits = 0; 756 if (ones > zeroes) { 757 def_bits = mask; 758 } 759 int inst_count = 0; 760 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 761 int part = (c >> base_shift) & mask; 762 if (part != def_bits) { 763 if (masm != NULL) { 764 if (inst_count > 0) { 765 masm->movk(rd, part, base_shift); 766 } else { 767 if (def_bits == 0) { 768 masm->movz(rd, part, base_shift); 769 } else { 770 masm->movn(rd, ~part & mask, base_shift); 771 } 772 } 773 } 774 inst_count++; 775 } 776 } 777 assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); 778 return inst_count; 779 } 780 781 void MacroAssembler::mov_slow(Register rd, intptr_t c) { 782 #ifdef ASSERT 783 int off = offset(); 784 #endif 785 (void) mov_slow_helper(rd, c, this); 786 assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); 787 } 788 789 // Counts instructions generated by mov_slow(rd, c). 790 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { 791 return mov_slow_helper(noreg, c, NULL); 792 } 793 794 int MacroAssembler::instr_count_for_mov_slow(address c) { 795 return mov_slow_helper(noreg, (intptr_t)c, NULL); 796 } 797 798 #else 799 800 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { 801 if (AsmOperand::is_rotated_imm(c)) { 802 mov(rd, c, cond); 803 } else if (AsmOperand::is_rotated_imm(~c)) { 804 mvn(rd, ~c, cond); 805 } else if (VM_Version::supports_movw()) { 806 movw(rd, c & 0xffff, cond); 807 if ((unsigned int)c >> 16) { 808 movt(rd, (unsigned int)c >> 16, cond); 809 } 810 } else { 811 // Find first non-zero bit 812 int shift = 0; 813 while ((c & (3 << shift)) == 0) { 814 shift += 2; 815 } 816 // Put the least significant part of the constant 817 int mask = 0xff << shift; 818 mov(rd, c & mask, cond); 819 // Add up to 3 other parts of the constant; 820 // each of them can be represented as rotated_imm 821 if (c & (mask << 8)) { 822 orr(rd, rd, c & (mask << 8), cond); 823 } 824 if (c & (mask << 16)) { 825 orr(rd, rd, c & (mask << 16), cond); 826 } 827 if (c & (mask << 24)) { 828 orr(rd, rd, c & (mask << 24), cond); 829 } 830 } 831 } 832 833 #endif // AARCH64 834 835 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, 836 #ifdef AARCH64 837 bool patchable 838 #else 839 AsmCondition cond 840 #endif 841 ) { 842 843 if (o == NULL) { 844 #ifdef AARCH64 845 if (patchable) { 846 nop(); 847 } 848 mov(rd, ZR); 849 #else 850 mov(rd, 0, cond); 851 #endif 852 return; 853 } 854 855 if (oop_index == 0) { 856 oop_index = oop_recorder()->allocate_oop_index(o); 857 } 858 relocate(oop_Relocation::spec(oop_index)); 859 860 #ifdef AARCH64 861 if (patchable) { 862 nop(); 863 } 864 ldr(rd, pc()); 865 #else 866 if (VM_Version::supports_movw()) { 867 movw(rd, 0, cond); 868 movt(rd, 0, cond); 869 } else { 870 ldr(rd, Address(PC), cond); 871 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). 872 nop(); 873 } 874 #endif 875 } 876 877 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { 878 if (o == NULL) { 879 #ifdef AARCH64 880 if (patchable) { 881 nop(); 882 } 883 #endif 884 mov(rd, 0); 885 return; 886 } 887 888 if (metadata_index == 0) { 889 metadata_index = oop_recorder()->allocate_metadata_index(o); 890 } 891 relocate(metadata_Relocation::spec(metadata_index)); 892 893 #ifdef AARCH64 894 if (patchable) { 895 nop(); 896 } 897 #ifdef COMPILER2 898 if (!patchable && VM_Version::prefer_moves_over_load_literal()) { 899 mov_slow(rd, (address)o); 900 return; 901 } 902 #endif 903 ldr(rd, pc()); 904 #else 905 if (VM_Version::supports_movw()) { 906 movw(rd, ((int)o) & 0xffff); 907 movt(rd, (unsigned int)o >> 16); 908 } else { 909 ldr(rd, Address(PC)); 910 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). 911 nop(); 912 } 913 #endif // AARCH64 914 } 915 916 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { 917 Label skip_constant; 918 union { 919 jfloat f; 920 jint i; 921 } accessor; 922 accessor.f = c; 923 924 #ifdef AARCH64 925 // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow 926 Label L; 927 ldr_s(fd, target(L)); 928 b(skip_constant); 929 bind(L); 930 emit_int32(accessor.i); 931 bind(skip_constant); 932 #else 933 flds(fd, Address(PC), cond); 934 b(skip_constant); 935 emit_int32(accessor.i); 936 bind(skip_constant); 937 #endif // AARCH64 938 } 939 940 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { 941 Label skip_constant; 942 union { 943 jdouble d; 944 jint i[2]; 945 } accessor; 946 accessor.d = c; 947 948 #ifdef AARCH64 949 // TODO-AARCH64 - try to optimize loading of double constants with fmov 950 Label L; 951 ldr_d(fd, target(L)); 952 b(skip_constant); 953 align(wordSize); 954 bind(L); 955 emit_int32(accessor.i[0]); 956 emit_int32(accessor.i[1]); 957 bind(skip_constant); 958 #else 959 fldd(fd, Address(PC), cond); 960 b(skip_constant); 961 emit_int32(accessor.i[0]); 962 emit_int32(accessor.i[1]); 963 bind(skip_constant); 964 #endif // AARCH64 965 } 966 967 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { 968 intptr_t addr = (intptr_t) address_of_global; 969 #ifdef AARCH64 970 assert((addr & 0x3) == 0, "address should be aligned"); 971 972 // FIXME: TODO 973 if (false && page_reachable_from_cache(address_of_global)) { 974 assert(false,"TODO: relocate"); 975 //relocate(); 976 adrp(reg, address_of_global); 977 ldrsw(reg, Address(reg, addr & 0xfff)); 978 } else { 979 mov_slow(reg, addr & ~0x3fff); 980 ldrsw(reg, Address(reg, addr & 0x3fff)); 981 } 982 #else 983 mov_slow(reg, addr & ~0xfff); 984 ldr(reg, Address(reg, addr & 0xfff)); 985 #endif 986 } 987 988 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { 989 #ifdef AARCH64 990 intptr_t addr = (intptr_t) address_of_global; 991 assert ((addr & 0x7) == 0, "address should be aligned"); 992 mov_slow(reg, addr & ~0x7fff); 993 ldr(reg, Address(reg, addr & 0x7fff)); 994 #else 995 ldr_global_s32(reg, address_of_global); 996 #endif 997 } 998 999 void MacroAssembler::ldrb_global(Register reg, address address_of_global) { 1000 intptr_t addr = (intptr_t) address_of_global; 1001 mov_slow(reg, addr & ~0xfff); 1002 ldrb(reg, Address(reg, addr & 0xfff)); 1003 } 1004 1005 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { 1006 #ifdef AARCH64 1007 switch (bits) { 1008 case 8: uxtb(rd, rn); break; 1009 case 16: uxth(rd, rn); break; 1010 case 32: mov_w(rd, rn); break; 1011 default: ShouldNotReachHere(); 1012 } 1013 #else 1014 if (bits <= 8) { 1015 andr(rd, rn, (1 << bits) - 1); 1016 } else if (bits >= 24) { 1017 bic(rd, rn, -1 << bits); 1018 } else { 1019 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1020 mov(rd, AsmOperand(rd, lsr, 32 - bits)); 1021 } 1022 #endif 1023 } 1024 1025 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { 1026 #ifdef AARCH64 1027 switch (bits) { 1028 case 8: sxtb(rd, rn); break; 1029 case 16: sxth(rd, rn); break; 1030 case 32: sxtw(rd, rn); break; 1031 default: ShouldNotReachHere(); 1032 } 1033 #else 1034 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1035 mov(rd, AsmOperand(rd, asr, 32 - bits)); 1036 #endif 1037 } 1038 1039 #ifndef AARCH64 1040 1041 void MacroAssembler::long_move(Register rd_lo, Register rd_hi, 1042 Register rn_lo, Register rn_hi, 1043 AsmCondition cond) { 1044 if (rd_lo != rn_hi) { 1045 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1046 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1047 } else if (rd_hi != rn_lo) { 1048 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1049 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1050 } else { 1051 eor(rd_lo, rd_hi, rd_lo, cond); 1052 eor(rd_hi, rd_lo, rd_hi, cond); 1053 eor(rd_lo, rd_hi, rd_lo, cond); 1054 } 1055 } 1056 1057 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1058 Register rn_lo, Register rn_hi, 1059 AsmShift shift, Register count) { 1060 Register tmp; 1061 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { 1062 tmp = rd_lo; 1063 } else { 1064 tmp = rd_hi; 1065 } 1066 assert_different_registers(tmp, count, rn_lo, rn_hi); 1067 1068 subs(tmp, count, 32); 1069 if (shift == lsl) { 1070 assert_different_registers(rd_hi, rn_lo); 1071 assert_different_registers(count, rd_hi); 1072 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); 1073 rsb(tmp, count, 32, mi); 1074 if (rd_hi == rn_hi) { 1075 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1076 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1077 } else { 1078 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1079 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1080 } 1081 mov(rd_lo, AsmOperand(rn_lo, shift, count)); 1082 } else { 1083 assert_different_registers(rd_lo, rn_hi); 1084 assert_different_registers(rd_lo, count); 1085 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); 1086 rsb(tmp, count, 32, mi); 1087 if (rd_lo == rn_lo) { 1088 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1089 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1090 } else { 1091 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1092 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1093 } 1094 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1095 } 1096 } 1097 1098 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1099 Register rn_lo, Register rn_hi, 1100 AsmShift shift, int count) { 1101 assert(count != 0 && (count & ~63) == 0, "must be"); 1102 1103 if (shift == lsl) { 1104 assert_different_registers(rd_hi, rn_lo); 1105 if (count >= 32) { 1106 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); 1107 mov(rd_lo, 0); 1108 } else { 1109 mov(rd_hi, AsmOperand(rn_hi, lsl, count)); 1110 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); 1111 mov(rd_lo, AsmOperand(rn_lo, lsl, count)); 1112 } 1113 } else { 1114 assert_different_registers(rd_lo, rn_hi); 1115 if (count >= 32) { 1116 if (count == 32) { 1117 mov(rd_lo, rn_hi); 1118 } else { 1119 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); 1120 } 1121 if (shift == asr) { 1122 mov(rd_hi, AsmOperand(rn_hi, asr, 0)); 1123 } else { 1124 mov(rd_hi, 0); 1125 } 1126 } else { 1127 mov(rd_lo, AsmOperand(rn_lo, lsr, count)); 1128 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); 1129 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1130 } 1131 } 1132 } 1133 #endif // !AARCH64 1134 1135 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 1136 // This code pattern is matched in NativeIntruction::skip_verify_oop. 1137 // Update it at modifications. 1138 if (!VerifyOops) return; 1139 1140 char buffer[64]; 1141 #ifdef COMPILER1 1142 if (CommentedAssembly) { 1143 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); 1144 block_comment(buffer); 1145 } 1146 #endif 1147 const char* msg_buffer = NULL; 1148 { 1149 ResourceMark rm; 1150 stringStream ss; 1151 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); 1152 msg_buffer = code_string(ss.as_string()); 1153 } 1154 1155 save_all_registers(); 1156 1157 if (reg != R2) { 1158 mov(R2, reg); // oop to verify 1159 } 1160 mov(R1, SP); // register save area 1161 1162 Label done; 1163 InlinedString Lmsg(msg_buffer); 1164 ldr_literal(R0, Lmsg); // message 1165 1166 // call indirectly to solve generation ordering problem 1167 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1168 call(Rtemp); 1169 1170 restore_all_registers(); 1171 1172 b(done); 1173 #ifdef COMPILER2 1174 int off = offset(); 1175 #endif 1176 bind_literal(Lmsg); 1177 #ifdef COMPILER2 1178 if (offset() - off == 1 * wordSize) { 1179 // no padding, so insert nop for worst-case sizing 1180 nop(); 1181 } 1182 #endif 1183 bind(done); 1184 } 1185 1186 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 1187 if (!VerifyOops) return; 1188 1189 const char* msg_buffer = NULL; 1190 { 1191 ResourceMark rm; 1192 stringStream ss; 1193 if ((addr.base() == SP) && (addr.index()==noreg)) { 1194 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); 1195 } else { 1196 ss.print("verify_oop_addr: %s", s); 1197 } 1198 ss.print(" (%s:%d)", file, line); 1199 msg_buffer = code_string(ss.as_string()); 1200 } 1201 1202 int push_size = save_all_registers(); 1203 1204 if (addr.base() == SP) { 1205 // computes an addr that takes into account the push 1206 if (addr.index() != noreg) { 1207 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index 1208 add(new_base, SP, push_size); 1209 addr = addr.rebase(new_base); 1210 } else { 1211 addr = addr.plus_disp(push_size); 1212 } 1213 } 1214 1215 ldr(R2, addr); // oop to verify 1216 mov(R1, SP); // register save area 1217 1218 Label done; 1219 InlinedString Lmsg(msg_buffer); 1220 ldr_literal(R0, Lmsg); // message 1221 1222 // call indirectly to solve generation ordering problem 1223 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1224 call(Rtemp); 1225 1226 restore_all_registers(); 1227 1228 b(done); 1229 bind_literal(Lmsg); 1230 bind(done); 1231 } 1232 1233 void MacroAssembler::null_check(Register reg, Register tmp, int offset) { 1234 if (needs_explicit_null_check(offset)) { 1235 #ifdef AARCH64 1236 ldr(ZR, Address(reg)); 1237 #else 1238 assert_different_registers(reg, tmp); 1239 if (tmp == noreg) { 1240 tmp = Rtemp; 1241 assert((! Thread::current()->is_Compiler_thread()) || 1242 (! (ciEnv::current()->task() == NULL)) || 1243 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), 1244 "Rtemp not available in C2"); // explicit tmp register required 1245 // XXX: could we mark the code buffer as not compatible with C2 ? 1246 } 1247 ldr(tmp, Address(reg)); 1248 #endif 1249 } 1250 } 1251 1252 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1253 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, 1254 RegisterOrConstant size_expression, Label& slow_case) { 1255 if (!Universe::heap()->supports_inline_contig_alloc()) { 1256 b(slow_case); 1257 return; 1258 } 1259 1260 CollectedHeap* ch = Universe::heap(); 1261 1262 const Register top_addr = tmp1; 1263 const Register heap_end = tmp2; 1264 1265 if (size_expression.is_register()) { 1266 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); 1267 } else { 1268 assert_different_registers(obj, obj_end, top_addr, heap_end); 1269 } 1270 1271 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance 1272 if (load_const) { 1273 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); 1274 } else { 1275 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); 1276 } 1277 // Calculate new heap_top by adding the size of the object 1278 Label retry; 1279 bind(retry); 1280 1281 #ifdef AARCH64 1282 ldxr(obj, top_addr); 1283 #else 1284 ldr(obj, Address(top_addr)); 1285 #endif // AARCH64 1286 1287 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); 1288 add_rc(obj_end, obj, size_expression); 1289 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. 1290 cmp(obj_end, obj); 1291 b(slow_case, lo); 1292 // Update heap_top if allocation succeeded 1293 cmp(obj_end, heap_end); 1294 b(slow_case, hi); 1295 1296 #ifdef AARCH64 1297 stxr(heap_end/*scratched*/, obj_end, top_addr); 1298 cbnz_w(heap_end, retry); 1299 #else 1300 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); 1301 b(retry, ne); 1302 #endif // AARCH64 1303 } 1304 1305 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1306 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, 1307 RegisterOrConstant size_expression, Label& slow_case) { 1308 const Register tlab_end = tmp1; 1309 assert_different_registers(obj, obj_end, tlab_end); 1310 1311 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); 1312 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); 1313 add_rc(obj_end, obj, size_expression); 1314 cmp(obj_end, tlab_end); 1315 b(slow_case, hi); 1316 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); 1317 } 1318 1319 void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2, 1320 Register tmp3, Register tmp4, 1321 Label& try_eden, Label& slow_case) { 1322 if (!Universe::heap()->supports_inline_contig_alloc()) { 1323 b(slow_case); 1324 return; 1325 } 1326 1327 InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr()); 1328 Label discard_tlab, do_refill; 1329 ldr(top, Address(Rthread, JavaThread::tlab_top_offset())); 1330 ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); 1331 ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); 1332 1333 // Calculate amount of free space 1334 sub(tmp1, tmp1, top); 1335 // Retain tlab and allocate in shared space 1336 // if the amount of free space in tlab is too large to discard 1337 cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize)); 1338 b(discard_tlab, ge); 1339 1340 // Increment waste limit to prevent getting stuck on this slow path 1341 mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment()); 1342 add(tmp2, tmp2, tmp3); 1343 str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); 1344 if (TLABStats) { 1345 ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); 1346 add_32(tmp2, tmp2, 1); 1347 str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); 1348 } 1349 b(try_eden); 1350 bind_literal(intArrayKlass_addr); 1351 1352 bind(discard_tlab); 1353 if (TLABStats) { 1354 ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); 1355 ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); 1356 add_32(tmp2, tmp2, 1); 1357 add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize)); 1358 str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); 1359 str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); 1360 } 1361 // If tlab is currently allocated (top or end != null) 1362 // then fill [top, end + alignment_reserve) with array object 1363 cbz(top, do_refill); 1364 1365 // Set up the mark word 1366 mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 1367 str(tmp2, Address(top, oopDesc::mark_offset_in_bytes())); 1368 // Set klass to intArrayKlass and the length to the remaining space 1369 ldr_literal(tmp2, intArrayKlass_addr); 1370 add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() - 1371 typeArrayOopDesc::header_size(T_INT) * HeapWordSize); 1372 Register klass = tmp2; 1373 ldr(klass, Address(tmp2)); 1374 logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint) 1375 str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes())); 1376 store_klass(klass, top); // blows klass: 1377 klass = noreg; 1378 1379 ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset())); 1380 sub(tmp1, top, tmp1); // size of tlab's allocated portion 1381 incr_allocated_bytes(tmp1, tmp2); 1382 1383 bind(do_refill); 1384 // Refill the tlab with an eden allocation 1385 ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset())); 1386 logical_shift_left(tmp4, tmp1, LogHeapWordSize); 1387 eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case); 1388 str(top, Address(Rthread, JavaThread::tlab_start_offset())); 1389 str(top, Address(Rthread, JavaThread::tlab_top_offset())); 1390 1391 #ifdef ASSERT 1392 // Verify that tmp1 contains tlab_end 1393 ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset())); 1394 add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize)); 1395 cmp(tmp1, tmp2); 1396 breakpoint(ne); 1397 #endif 1398 1399 sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 1400 str(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); 1401 1402 if (ZeroTLAB) { 1403 // clobbers start and tmp 1404 // top must be preserved! 1405 add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 1406 ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset())); 1407 zero_memory(tmp2, tmp1, tmp3); 1408 } 1409 } 1410 1411 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. 1412 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { 1413 Label loop; 1414 const Register ptr = start; 1415 1416 #ifdef AARCH64 1417 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x 1418 const Register size = tmp; 1419 Label remaining, done; 1420 1421 sub(size, end, start); 1422 1423 #ifdef ASSERT 1424 { Label L; 1425 tst(size, wordSize - 1); 1426 b(L, eq); 1427 stop("size is not a multiple of wordSize"); 1428 bind(L); 1429 } 1430 #endif // ASSERT 1431 1432 subs(size, size, wordSize); 1433 b(remaining, le); 1434 1435 // Zero by 2 words per iteration. 1436 bind(loop); 1437 subs(size, size, 2*wordSize); 1438 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); 1439 b(loop, gt); 1440 1441 bind(remaining); 1442 b(done, ne); 1443 str(ZR, Address(ptr)); 1444 bind(done); 1445 #else 1446 mov(tmp, 0); 1447 bind(loop); 1448 cmp(ptr, end); 1449 str(tmp, Address(ptr, wordSize, post_indexed), lo); 1450 b(loop, lo); 1451 #endif // AARCH64 1452 } 1453 1454 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { 1455 #ifdef AARCH64 1456 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1457 add_rc(tmp, tmp, size_in_bytes); 1458 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1459 #else 1460 // Bump total bytes allocated by this thread 1461 Label done; 1462 1463 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1464 adds(tmp, tmp, size_in_bytes); 1465 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); 1466 b(done, cc); 1467 1468 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) 1469 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by 1470 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. 1471 Register low, high; 1472 // Select ether R0/R1 or R2/R3 1473 1474 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { 1475 low = R2; 1476 high = R3; 1477 } else { 1478 low = R0; 1479 high = R1; 1480 } 1481 push(RegisterSet(low, high)); 1482 1483 ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1484 adds(low, low, size_in_bytes); 1485 adc(high, high, 0); 1486 strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1487 1488 pop(RegisterSet(low, high)); 1489 1490 bind(done); 1491 #endif // AARCH64 1492 } 1493 1494 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { 1495 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM 1496 if (UseStackBanging) { 1497 const int page_size = os::vm_page_size(); 1498 1499 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); 1500 strb(R0, Address(tmp)); 1501 #ifdef AARCH64 1502 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { 1503 sub(tmp, tmp, page_size); 1504 strb(R0, Address(tmp)); 1505 } 1506 #else 1507 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { 1508 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1509 } 1510 #endif // AARCH64 1511 } 1512 } 1513 1514 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { 1515 if (UseStackBanging) { 1516 Label loop; 1517 1518 mov(tmp, SP); 1519 add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); 1520 #ifdef AARCH64 1521 sub(tmp, tmp, Rsize); 1522 bind(loop); 1523 subs(Rsize, Rsize, os::vm_page_size()); 1524 strb(ZR, Address(tmp, Rsize)); 1525 #else 1526 bind(loop); 1527 subs(Rsize, Rsize, 0xff0); 1528 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1529 #endif // AARCH64 1530 b(loop, hi); 1531 } 1532 } 1533 1534 void MacroAssembler::stop(const char* msg) { 1535 // This code pattern is matched in NativeIntruction::is_stop. 1536 // Update it at modifications. 1537 #ifdef COMPILER1 1538 if (CommentedAssembly) { 1539 block_comment("stop"); 1540 } 1541 #endif 1542 1543 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); 1544 InlinedString Lmsg(msg); 1545 1546 // save all registers for further inspection 1547 save_all_registers(); 1548 1549 ldr_literal(R0, Lmsg); // message 1550 mov(R1, SP); // register save area 1551 1552 #ifdef AARCH64 1553 ldr_literal(Rtemp, Ldebug); 1554 br(Rtemp); 1555 #else 1556 ldr_literal(PC, Ldebug); // call MacroAssembler::debug 1557 #endif // AARCH64 1558 1559 #if defined(COMPILER2) && defined(AARCH64) 1560 int off = offset(); 1561 #endif 1562 bind_literal(Lmsg); 1563 bind_literal(Ldebug); 1564 #if defined(COMPILER2) && defined(AARCH64) 1565 if (offset() - off == 2 * wordSize) { 1566 // no padding, so insert nop for worst-case sizing 1567 nop(); 1568 } 1569 #endif 1570 } 1571 1572 void MacroAssembler::warn(const char* msg) { 1573 #ifdef COMPILER1 1574 if (CommentedAssembly) { 1575 block_comment("warn"); 1576 } 1577 #endif 1578 1579 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); 1580 InlinedString Lmsg(msg); 1581 Label done; 1582 1583 int push_size = save_caller_save_registers(); 1584 1585 #ifdef AARCH64 1586 // TODO-AARCH64 - get rid of extra debug parameters 1587 mov(R1, LR); 1588 mov(R2, FP); 1589 add(R3, SP, push_size); 1590 #endif 1591 1592 ldr_literal(R0, Lmsg); // message 1593 ldr_literal(LR, Lwarn); // call warning 1594 1595 call(LR); 1596 1597 restore_caller_save_registers(); 1598 1599 b(done); 1600 bind_literal(Lmsg); 1601 bind_literal(Lwarn); 1602 bind(done); 1603 } 1604 1605 1606 int MacroAssembler::save_all_registers() { 1607 // This code pattern is matched in NativeIntruction::is_save_all_registers. 1608 // Update it at modifications. 1609 #ifdef AARCH64 1610 const Register tmp = Rtemp; 1611 raw_push(R30, ZR); 1612 for (int i = 28; i >= 0; i -= 2) { 1613 raw_push(as_Register(i), as_Register(i+1)); 1614 } 1615 mov_pc_to(tmp); 1616 str(tmp, Address(SP, 31*wordSize)); 1617 ldr(tmp, Address(SP, tmp->encoding()*wordSize)); 1618 return 32*wordSize; 1619 #else 1620 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); 1621 return 15*wordSize; 1622 #endif // AARCH64 1623 } 1624 1625 void MacroAssembler::restore_all_registers() { 1626 #ifdef AARCH64 1627 for (int i = 0; i <= 28; i += 2) { 1628 raw_pop(as_Register(i), as_Register(i+1)); 1629 } 1630 raw_pop(R30, ZR); 1631 #else 1632 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers 1633 add(SP, SP, wordSize); // discard saved PC 1634 #endif // AARCH64 1635 } 1636 1637 int MacroAssembler::save_caller_save_registers() { 1638 #ifdef AARCH64 1639 for (int i = 0; i <= 16; i += 2) { 1640 raw_push(as_Register(i), as_Register(i+1)); 1641 } 1642 raw_push(R18, LR); 1643 return 20*wordSize; 1644 #else 1645 #if R9_IS_SCRATCHED 1646 // Save also R10 to preserve alignment 1647 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1648 return 8*wordSize; 1649 #else 1650 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1651 return 6*wordSize; 1652 #endif 1653 #endif // AARCH64 1654 } 1655 1656 void MacroAssembler::restore_caller_save_registers() { 1657 #ifdef AARCH64 1658 raw_pop(R18, LR); 1659 for (int i = 16; i >= 0; i -= 2) { 1660 raw_pop(as_Register(i), as_Register(i+1)); 1661 } 1662 #else 1663 #if R9_IS_SCRATCHED 1664 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1665 #else 1666 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1667 #endif 1668 #endif // AARCH64 1669 } 1670 1671 void MacroAssembler::debug(const char* msg, const intx* registers) { 1672 // In order to get locks to work, we need to fake a in_VM state 1673 JavaThread* thread = JavaThread::current(); 1674 thread->set_thread_state(_thread_in_vm); 1675 1676 if (ShowMessageBoxOnError) { 1677 ttyLocker ttyl; 1678 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1679 BytecodeCounter::print(); 1680 } 1681 if (os::message_box(msg, "Execution stopped, print registers?")) { 1682 #ifdef AARCH64 1683 // saved registers: R0-R30, PC 1684 const int nregs = 32; 1685 #else 1686 // saved registers: R0-R12, LR, PC 1687 const int nregs = 15; 1688 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; 1689 #endif // AARCH64 1690 1691 for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { 1692 tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); 1693 } 1694 1695 #ifdef AARCH64 1696 tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); 1697 #endif // AARCH64 1698 1699 // derive original SP value from the address of register save area 1700 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); 1701 } 1702 BREAKPOINT; 1703 } else { 1704 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1705 } 1706 assert(false, "DEBUG MESSAGE: %s", msg); 1707 fatal("%s", msg); // returning from MacroAssembler::debug is not supported 1708 } 1709 1710 void MacroAssembler::unimplemented(const char* what) { 1711 const char* buf = NULL; 1712 { 1713 ResourceMark rm; 1714 stringStream ss; 1715 ss.print("unimplemented: %s", what); 1716 buf = code_string(ss.as_string()); 1717 } 1718 stop(buf); 1719 } 1720 1721 1722 // Implementation of FixedSizeCodeBlock 1723 1724 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : 1725 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { 1726 } 1727 1728 FixedSizeCodeBlock::~FixedSizeCodeBlock() { 1729 if (_enabled) { 1730 address curr_pc = _masm->pc(); 1731 1732 assert(_start < curr_pc, "invalid current pc"); 1733 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); 1734 1735 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; 1736 for (int i = 0; i < nops_count; i++) { 1737 _masm->nop(); 1738 } 1739 } 1740 } 1741 1742 #ifdef AARCH64 1743 1744 // Serializes memory. 1745 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM 1746 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { 1747 if (!os::is_MP()) return; 1748 1749 // TODO-AARCH64 investigate dsb vs dmb effects 1750 if (order_constraint == StoreStore) { 1751 dmb(DMB_st); 1752 } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { 1753 dmb(DMB_ld); 1754 } else { 1755 dmb(DMB_all); 1756 } 1757 } 1758 1759 #else 1760 1761 // Serializes memory. Potentially blows flags and reg. 1762 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) 1763 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. 1764 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. 1765 void MacroAssembler::membar(Membar_mask_bits order_constraint, 1766 Register tmp, 1767 bool preserve_flags, 1768 Register load_tgt) { 1769 if (!os::is_MP()) return; 1770 1771 if (order_constraint == StoreStore) { 1772 dmb(DMB_st, tmp); 1773 } else if ((order_constraint & StoreLoad) || 1774 (order_constraint & LoadLoad) || 1775 (order_constraint & StoreStore) || 1776 (load_tgt == noreg) || 1777 preserve_flags) { 1778 dmb(DMB_all, tmp); 1779 } else { 1780 // LoadStore: speculative stores reordeing is prohibited 1781 1782 // By providing an ordered load target register, we avoid an extra memory load reference 1783 Label not_taken; 1784 bind(not_taken); 1785 cmp(load_tgt, load_tgt); 1786 b(not_taken, ne); 1787 } 1788 } 1789 1790 #endif // AARCH64 1791 1792 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" 1793 // on failure, so fall-through can only mean success. 1794 // "one_shot" controls whether we loop and retry to mitigate spurious failures. 1795 // This is only needed for C2, which for some reason does not rety, 1796 // while C1/interpreter does. 1797 // TODO: measure if it makes a difference 1798 1799 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, 1800 Register base, Register tmp, Label &slow_case, 1801 bool allow_fallthrough_on_failure, bool one_shot) 1802 { 1803 1804 bool fallthrough_is_success = false; 1805 1806 // ARM Litmus Test example does prefetching here. 1807 // TODO: investigate if it helps performance 1808 1809 // The last store was to the displaced header, so to prevent 1810 // reordering we must issue a StoreStore or Release barrier before 1811 // the CAS store. 1812 1813 #ifdef AARCH64 1814 1815 Register Rscratch = tmp; 1816 Register Roop = base; 1817 Register mark = oldval; 1818 Register Rbox = newval; 1819 Label loop; 1820 1821 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1822 1823 // Instead of StoreStore here, we use store-release-exclusive below 1824 1825 bind(loop); 1826 1827 ldaxr(tmp, base); // acquire 1828 cmp(tmp, oldval); 1829 b(slow_case, ne); 1830 stlxr(tmp, newval, base); // release 1831 if (one_shot) { 1832 cmp_w(tmp, 0); 1833 } else { 1834 cbnz_w(tmp, loop); 1835 fallthrough_is_success = true; 1836 } 1837 1838 // MemBarAcquireLock would normally go here, but 1839 // we already do ldaxr+stlxr above, which has 1840 // Sequential Consistency 1841 1842 #else 1843 membar(MacroAssembler::StoreStore, noreg); 1844 1845 if (one_shot) { 1846 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1847 cmp(tmp, oldval); 1848 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1849 cmp(tmp, 0, eq); 1850 } else { 1851 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1852 } 1853 1854 // MemBarAcquireLock barrier 1855 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, 1856 // but that doesn't prevent a load or store from floating up between 1857 // the load and store in the CAS sequence, so play it safe and 1858 // do a full fence. 1859 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); 1860 #endif 1861 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1862 b(slow_case, ne); 1863 } 1864 } 1865 1866 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, 1867 Register base, Register tmp, Label &slow_case, 1868 bool allow_fallthrough_on_failure, bool one_shot) 1869 { 1870 1871 bool fallthrough_is_success = false; 1872 1873 assert_different_registers(oldval,newval,base,tmp); 1874 1875 #ifdef AARCH64 1876 Label loop; 1877 1878 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1879 1880 bind(loop); 1881 ldxr(tmp, base); 1882 cmp(tmp, oldval); 1883 b(slow_case, ne); 1884 // MemBarReleaseLock barrier 1885 stlxr(tmp, newval, base); 1886 if (one_shot) { 1887 cmp_w(tmp, 0); 1888 } else { 1889 cbnz_w(tmp, loop); 1890 fallthrough_is_success = true; 1891 } 1892 #else 1893 // MemBarReleaseLock barrier 1894 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, 1895 // but that doesn't prevent a load or store from floating down between 1896 // the load and store in the CAS sequence, so play it safe and 1897 // do a full fence. 1898 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); 1899 1900 if (one_shot) { 1901 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1902 cmp(tmp, oldval); 1903 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1904 cmp(tmp, 0, eq); 1905 } else { 1906 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1907 } 1908 #endif 1909 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1910 b(slow_case, ne); 1911 } 1912 1913 // ExitEnter 1914 // According to JSR-133 Cookbook, this should be StoreLoad, the same 1915 // barrier that follows volatile store. 1916 // TODO: Should be able to remove on armv8 if volatile loads 1917 // use the load-acquire instruction. 1918 membar(StoreLoad, noreg); 1919 } 1920 1921 #ifndef PRODUCT 1922 1923 // Preserves flags and all registers. 1924 // On SMP the updated value might not be visible to external observers without a sychronization barrier 1925 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { 1926 if (counter_addr != NULL) { 1927 InlinedAddress counter_addr_literal((address)counter_addr); 1928 Label done, retry; 1929 if (cond != al) { 1930 b(done, inverse(cond)); 1931 } 1932 1933 #ifdef AARCH64 1934 raw_push(R0, R1); 1935 raw_push(R2, ZR); 1936 1937 ldr_literal(R0, counter_addr_literal); 1938 1939 bind(retry); 1940 ldxr_w(R1, R0); 1941 add_w(R1, R1, 1); 1942 stxr_w(R2, R1, R0); 1943 cbnz_w(R2, retry); 1944 1945 raw_pop(R2, ZR); 1946 raw_pop(R0, R1); 1947 #else 1948 push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1949 ldr_literal(R0, counter_addr_literal); 1950 1951 mrs(CPSR, Rtemp); 1952 1953 bind(retry); 1954 ldr_s32(R1, Address(R0)); 1955 add(R2, R1, 1); 1956 atomic_cas_bool(R1, R2, R0, 0, R3); 1957 b(retry, ne); 1958 1959 msr(CPSR_fsxc, Rtemp); 1960 1961 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1962 #endif // AARCH64 1963 1964 b(done); 1965 bind_literal(counter_addr_literal); 1966 1967 bind(done); 1968 } 1969 } 1970 1971 #endif // !PRODUCT 1972 1973 1974 // Building block for CAS cases of biased locking: makes CAS and records statistics. 1975 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. 1976 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, 1977 Register tmp, Label& slow_case, int* counter_addr) { 1978 1979 cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); 1980 #ifdef ASSERT 1981 breakpoint(ne); // Fallthrough only on success 1982 #endif 1983 #ifndef PRODUCT 1984 if (counter_addr != NULL) { 1985 cond_atomic_inc32(al, counter_addr); 1986 } 1987 #endif // !PRODUCT 1988 } 1989 1990 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, 1991 bool swap_reg_contains_mark, 1992 Register tmp2, 1993 Label& done, Label& slow_case, 1994 BiasedLockingCounters* counters) { 1995 // obj_reg must be preserved (at least) if the bias locking fails 1996 // tmp_reg is a temporary register 1997 // swap_reg was used as a temporary but contained a value 1998 // that was used afterwards in some call pathes. Callers 1999 // have been fixed so that swap_reg no longer needs to be 2000 // saved. 2001 // Rtemp in no longer scratched 2002 2003 assert(UseBiasedLocking, "why call this otherwise?"); 2004 assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); 2005 guarantee(swap_reg!=tmp_reg, "invariant"); 2006 assert(tmp_reg != noreg, "must supply tmp_reg"); 2007 2008 #ifndef PRODUCT 2009 if (PrintBiasedLockingStatistics && (counters == NULL)) { 2010 counters = BiasedLocking::counters(); 2011 } 2012 #endif 2013 2014 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 2015 Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); 2016 2017 // Biased locking 2018 // See whether the lock is currently biased toward our thread and 2019 // whether the epoch is still valid 2020 // Note that the runtime guarantees sufficient alignment of JavaThread 2021 // pointers to allow age to be placed into low bits 2022 // First check to see whether biasing is even enabled for this object 2023 Label cas_label; 2024 2025 // The null check applies to the mark loading, if we need to load it. 2026 // If the mark has already been loaded in swap_reg then it has already 2027 // been performed and the offset is irrelevant. 2028 int null_check_offset = offset(); 2029 if (!swap_reg_contains_mark) { 2030 ldr(swap_reg, mark_addr); 2031 } 2032 2033 // On MP platform loads could return 'stale' values in some cases. 2034 // That is acceptable since either CAS or slow case path is taken in the worst case. 2035 2036 andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2037 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 2038 2039 b(cas_label, ne); 2040 2041 // The bias pattern is present in the object's header. Need to check 2042 // whether the bias owner and the epoch are both still current. 2043 load_klass(tmp_reg, obj_reg); 2044 ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 2045 orr(tmp_reg, tmp_reg, Rthread); 2046 eor(tmp_reg, tmp_reg, swap_reg); 2047 2048 #ifdef AARCH64 2049 ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); 2050 #else 2051 bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); 2052 #endif // AARCH64 2053 2054 #ifndef PRODUCT 2055 if (counters != NULL) { 2056 cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); 2057 } 2058 #endif // !PRODUCT 2059 2060 b(done, eq); 2061 2062 Label try_revoke_bias; 2063 Label try_rebias; 2064 2065 // At this point we know that the header has the bias pattern and 2066 // that we are not the bias owner in the current epoch. We need to 2067 // figure out more details about the state of the header in order to 2068 // know what operations can be legally performed on the object's 2069 // header. 2070 2071 // If the low three bits in the xor result aren't clear, that means 2072 // the prototype header is no longer biased and we have to revoke 2073 // the bias on this object. 2074 tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2075 b(try_revoke_bias, ne); 2076 2077 // Biasing is still enabled for this data type. See whether the 2078 // epoch of the current bias is still valid, meaning that the epoch 2079 // bits of the mark word are equal to the epoch bits of the 2080 // prototype header. (Note that the prototype header's epoch bits 2081 // only change at a safepoint.) If not, attempt to rebias the object 2082 // toward the current thread. Note that we must be absolutely sure 2083 // that the current epoch is invalid in order to do this because 2084 // otherwise the manipulations it performs on the mark word are 2085 // illegal. 2086 tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); 2087 b(try_rebias, ne); 2088 2089 // tmp_reg has the age, epoch and pattern bits cleared 2090 // The remaining (owner) bits are (Thread ^ current_owner) 2091 2092 // The epoch of the current bias is still valid but we know nothing 2093 // about the owner; it might be set or it might be clear. Try to 2094 // acquire the bias of the object using an atomic operation. If this 2095 // fails we will go in to the runtime to revoke the object's bias. 2096 // Note that we first construct the presumed unbiased header so we 2097 // don't accidentally blow away another thread's valid bias. 2098 2099 // Note that we know the owner is not ourself. Hence, success can 2100 // only happen when the owner bits is 0 2101 2102 #ifdef AARCH64 2103 // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has 2104 // cleared bit in the middle (cms bit). So it is loaded with separate instruction. 2105 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2106 andr(swap_reg, swap_reg, tmp2); 2107 #else 2108 // until the assembler can be made smarter, we need to make some assumptions about the values 2109 // so we can optimize this: 2110 assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); 2111 2112 mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); 2113 mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) 2114 #endif // AARCH64 2115 2116 orr(tmp_reg, swap_reg, Rthread); // new mark 2117 2118 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2119 (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); 2120 2121 // If the biasing toward our thread failed, this means that 2122 // another thread succeeded in biasing it toward itself and we 2123 // need to revoke that bias. The revocation will occur in the 2124 // interpreter runtime in the slow case. 2125 2126 b(done); 2127 2128 bind(try_rebias); 2129 2130 // At this point we know the epoch has expired, meaning that the 2131 // current "bias owner", if any, is actually invalid. Under these 2132 // circumstances _only_, we are allowed to use the current header's 2133 // value as the comparison value when doing the cas to acquire the 2134 // bias in the current epoch. In other words, we allow transfer of 2135 // the bias from one thread to another directly in this situation. 2136 2137 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2138 2139 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2140 2141 // owner bits 'random'. Set them to Rthread. 2142 #ifdef AARCH64 2143 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2144 andr(tmp_reg, tmp_reg, tmp2); 2145 #else 2146 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2147 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2148 #endif // AARCH64 2149 2150 orr(tmp_reg, tmp_reg, Rthread); // new mark 2151 2152 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2153 (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); 2154 2155 // If the biasing toward our thread failed, then another thread 2156 // succeeded in biasing it toward itself and we need to revoke that 2157 // bias. The revocation will occur in the runtime in the slow case. 2158 2159 b(done); 2160 2161 bind(try_revoke_bias); 2162 2163 // The prototype mark in the klass doesn't have the bias bit set any 2164 // more, indicating that objects of this data type are not supposed 2165 // to be biased any more. We are going to try to reset the mark of 2166 // this object to the prototype value and fall through to the 2167 // CAS-based locking scheme. Note that if our CAS fails, it means 2168 // that another thread raced us for the privilege of revoking the 2169 // bias of this particular object, so it's okay to continue in the 2170 // normal locking code. 2171 2172 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2173 2174 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2175 2176 // owner bits 'random'. Clear them 2177 #ifdef AARCH64 2178 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2179 andr(tmp_reg, tmp_reg, tmp2); 2180 #else 2181 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2182 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2183 #endif // AARCH64 2184 2185 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, 2186 (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); 2187 2188 // Fall through to the normal CAS-based lock, because no matter what 2189 // the result of the above CAS, some thread must have succeeded in 2190 // removing the bias bit from the object's header. 2191 2192 bind(cas_label); 2193 2194 return null_check_offset; 2195 } 2196 2197 2198 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { 2199 assert(UseBiasedLocking, "why call this otherwise?"); 2200 2201 // Check for biased locking unlock case, which is a no-op 2202 // Note: we do not have to check the thread ID for two reasons. 2203 // First, the interpreter checks for IllegalMonitorStateException at 2204 // a higher level. Second, if the bias was revoked while we held the 2205 // lock, the object could not be rebiased toward another thread, so 2206 // the bias bit would be clear. 2207 ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2208 2209 andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2210 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 2211 b(done, eq); 2212 } 2213 2214 #ifdef AARCH64 2215 2216 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2217 switch (size_in_bytes) { 2218 case 8: ldr(dst, src); break; 2219 case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; 2220 case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; 2221 case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; 2222 default: ShouldNotReachHere(); 2223 } 2224 } 2225 2226 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 2227 switch (size_in_bytes) { 2228 case 8: str(src, dst); break; 2229 case 4: str_32(src, dst); break; 2230 case 2: strh(src, dst); break; 2231 case 1: strb(src, dst); break; 2232 default: ShouldNotReachHere(); 2233 } 2234 } 2235 2236 #else 2237 2238 void MacroAssembler::load_sized_value(Register dst, Address src, 2239 size_t size_in_bytes, bool is_signed, AsmCondition cond) { 2240 switch (size_in_bytes) { 2241 case 4: ldr(dst, src, cond); break; 2242 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; 2243 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; 2244 default: ShouldNotReachHere(); 2245 } 2246 } 2247 2248 2249 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { 2250 switch (size_in_bytes) { 2251 case 4: str(src, dst, cond); break; 2252 case 2: strh(src, dst, cond); break; 2253 case 1: strb(src, dst, cond); break; 2254 default: ShouldNotReachHere(); 2255 } 2256 } 2257 #endif // AARCH64 2258 2259 // Look up the method for a megamorphic invokeinterface call. 2260 // The target method is determined by <Rinterf, Rindex>. 2261 // The receiver klass is in Rklass. 2262 // On success, the result will be in method_result, and execution falls through. 2263 // On failure, execution transfers to the given label. 2264 void MacroAssembler::lookup_interface_method(Register Rklass, 2265 Register Rinterf, 2266 Register Rindex, 2267 Register method_result, 2268 Register temp_reg1, 2269 Register temp_reg2, 2270 Label& L_no_such_interface) { 2271 2272 assert_different_registers(Rklass, Rinterf, temp_reg1, temp_reg2, Rindex); 2273 2274 Register Ritable = temp_reg1; 2275 2276 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 2277 const int base = in_bytes(Klass::vtable_start_offset()); 2278 const int scale = exact_log2(vtableEntry::size_in_bytes()); 2279 ldr_s32(temp_reg2, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable 2280 add(Ritable, Rklass, base); 2281 add(Ritable, Ritable, AsmOperand(temp_reg2, lsl, scale)); 2282 2283 Label entry, search; 2284 2285 b(entry); 2286 2287 bind(search); 2288 add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize); 2289 2290 bind(entry); 2291 2292 // Check that the entry is non-null. A null entry means that the receiver 2293 // class doesn't implement the interface, and wasn't the same as the 2294 // receiver class checked when the interface was resolved. 2295 2296 ldr(temp_reg2, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes())); 2297 cbz(temp_reg2, L_no_such_interface); 2298 2299 cmp(Rinterf, temp_reg2); 2300 b(search, ne); 2301 2302 ldr_s32(temp_reg2, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes())); 2303 add(temp_reg2, temp_reg2, Rklass); // Add offset to Klass* 2304 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); 2305 assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); 2306 2307 ldr(method_result, Address::indexed_ptr(temp_reg2, Rindex)); 2308 } 2309 2310 #ifdef COMPILER2 2311 // TODO: 8 bytes at a time? pre-fetch? 2312 // Compare char[] arrays aligned to 4 bytes. 2313 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, 2314 Register limit, Register result, 2315 Register chr1, Register chr2, Label& Ldone) { 2316 Label Lvector, Lloop; 2317 2318 // Note: limit contains number of bytes (2*char_elements) != 0. 2319 tst(limit, 0x2); // trailing character ? 2320 b(Lvector, eq); 2321 2322 // compare the trailing char 2323 sub(limit, limit, sizeof(jchar)); 2324 ldrh(chr1, Address(ary1, limit)); 2325 ldrh(chr2, Address(ary2, limit)); 2326 cmp(chr1, chr2); 2327 mov(result, 0, ne); // not equal 2328 b(Ldone, ne); 2329 2330 // only one char ? 2331 tst(limit, limit); 2332 mov(result, 1, eq); 2333 b(Ldone, eq); 2334 2335 // word by word compare, dont't need alignment check 2336 bind(Lvector); 2337 2338 // Shift ary1 and ary2 to the end of the arrays, negate limit 2339 add(ary1, limit, ary1); 2340 add(ary2, limit, ary2); 2341 neg(limit, limit); 2342 2343 bind(Lloop); 2344 ldr_u32(chr1, Address(ary1, limit)); 2345 ldr_u32(chr2, Address(ary2, limit)); 2346 cmp_32(chr1, chr2); 2347 mov(result, 0, ne); // not equal 2348 b(Ldone, ne); 2349 adds(limit, limit, 2*sizeof(jchar)); 2350 b(Lloop, ne); 2351 2352 // Caller should set it: 2353 // mov(result_reg, 1); //equal 2354 } 2355 #endif 2356 2357 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { 2358 mov_slow(tmpreg1, counter_addr); 2359 ldr_s32(tmpreg2, tmpreg1); 2360 add_32(tmpreg2, tmpreg2, 1); 2361 str_32(tmpreg2, tmpreg1); 2362 } 2363 2364 void MacroAssembler::floating_cmp(Register dst) { 2365 #ifdef AARCH64 2366 NOT_TESTED(); 2367 cset(dst, gt); // 1 if '>', else 0 2368 csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 2369 #else 2370 vmrs(dst, FPSCR); 2371 orr(dst, dst, 0x08000000); 2372 eor(dst, dst, AsmOperand(dst, lsl, 3)); 2373 mov(dst, AsmOperand(dst, asr, 30)); 2374 #endif 2375 } 2376 2377 void MacroAssembler::restore_default_fp_mode() { 2378 #ifdef AARCH64 2379 msr(SysReg_FPCR, ZR); 2380 #else 2381 #ifndef __SOFTFP__ 2382 // Round to Near mode, IEEE compatible, masked exceptions 2383 mov(Rtemp, 0); 2384 vmsr(FPSCR, Rtemp); 2385 #endif // !__SOFTFP__ 2386 #endif // AARCH64 2387 } 2388 2389 #ifndef AARCH64 2390 // 24-bit word range == 26-bit byte range 2391 bool check26(int offset) { 2392 // this could be simplified, but it mimics encoding and decoding 2393 // an actual branch insrtuction 2394 int off1 = offset << 6 >> 8; 2395 int encoded = off1 & ((1<<24)-1); 2396 int decoded = encoded << 8 >> 6; 2397 return offset == decoded; 2398 } 2399 #endif // !AARCH64 2400 2401 // Perform some slight adjustments so the default 32MB code cache 2402 // is fully reachable. 2403 static inline address first_cache_address() { 2404 return CodeCache::low_bound() + sizeof(HeapBlock::Header); 2405 } 2406 static inline address last_cache_address() { 2407 return CodeCache::high_bound() - Assembler::InstructionSize; 2408 } 2409 2410 #ifdef AARCH64 2411 // Can we reach target using ADRP? 2412 bool MacroAssembler::page_reachable_from_cache(address target) { 2413 intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; 2414 intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; 2415 intptr_t addr = (intptr_t)target & ~0xfff; 2416 2417 intptr_t loffset = addr - cl; 2418 intptr_t hoffset = addr - ch; 2419 return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); 2420 } 2421 #endif 2422 2423 // Can we reach target using unconditional branch or call from anywhere 2424 // in the code cache (because code can be relocated)? 2425 bool MacroAssembler::_reachable_from_cache(address target) { 2426 #ifdef __thumb__ 2427 if ((1 & (intptr_t)target) != 0) { 2428 // Return false to avoid 'b' if we need switching to THUMB mode. 2429 return false; 2430 } 2431 #endif 2432 2433 address cl = first_cache_address(); 2434 address ch = last_cache_address(); 2435 2436 if (ForceUnreachable) { 2437 // Only addresses from CodeCache can be treated as reachable. 2438 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { 2439 return false; 2440 } 2441 } 2442 2443 intptr_t loffset = (intptr_t)target - (intptr_t)cl; 2444 intptr_t hoffset = (intptr_t)target - (intptr_t)ch; 2445 2446 #ifdef AARCH64 2447 return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); 2448 #else 2449 return check26(loffset - 8) && check26(hoffset - 8); 2450 #endif 2451 } 2452 2453 bool MacroAssembler::reachable_from_cache(address target) { 2454 assert(CodeCache::contains(pc()), "not supported"); 2455 return _reachable_from_cache(target); 2456 } 2457 2458 // Can we reach the entire code cache from anywhere else in the code cache? 2459 bool MacroAssembler::_cache_fully_reachable() { 2460 address cl = first_cache_address(); 2461 address ch = last_cache_address(); 2462 return _reachable_from_cache(cl) && _reachable_from_cache(ch); 2463 } 2464 2465 bool MacroAssembler::cache_fully_reachable() { 2466 assert(CodeCache::contains(pc()), "not supported"); 2467 return _cache_fully_reachable(); 2468 } 2469 2470 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2471 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2472 if (reachable_from_cache(target)) { 2473 relocate(rtype); 2474 b(target NOT_AARCH64_ARG(cond)); 2475 return; 2476 } 2477 2478 // Note: relocate is not needed for the code below, 2479 // encoding targets in absolute format. 2480 if (ignore_non_patchable_relocations()) { 2481 rtype = relocInfo::none; 2482 } 2483 2484 #ifdef AARCH64 2485 assert (scratch != noreg, "should be specified"); 2486 InlinedAddress address_literal(target, rtype); 2487 ldr_literal(scratch, address_literal); 2488 br(scratch); 2489 int off = offset(); 2490 bind_literal(address_literal); 2491 #ifdef COMPILER2 2492 if (offset() - off == wordSize) { 2493 // no padding, so insert nop for worst-case sizing 2494 nop(); 2495 } 2496 #endif 2497 #else 2498 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { 2499 // Note: this version cannot be (atomically) patched 2500 mov_slow(scratch, (intptr_t)target, cond); 2501 bx(scratch, cond); 2502 } else { 2503 Label skip; 2504 InlinedAddress address_literal(target); 2505 if (cond != al) { 2506 b(skip, inverse(cond)); 2507 } 2508 relocate(rtype); 2509 ldr_literal(PC, address_literal); 2510 bind_literal(address_literal); 2511 bind(skip); 2512 } 2513 #endif // AARCH64 2514 } 2515 2516 // Similar to jump except that: 2517 // - near calls are valid only if any destination in the cache is near 2518 // - no movt/movw (not atomically patchable) 2519 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2520 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2521 if (cache_fully_reachable()) { 2522 // Note: this assumes that all possible targets (the initial one 2523 // and the addressed patched to) are all in the code cache. 2524 assert(CodeCache::contains(target), "target might be too far"); 2525 relocate(rtype); 2526 b(target NOT_AARCH64_ARG(cond)); 2527 return; 2528 } 2529 2530 // Discard the relocation information if not needed for CacheCompiledCode 2531 // since the next encodings are all in absolute format. 2532 if (ignore_non_patchable_relocations()) { 2533 rtype = relocInfo::none; 2534 } 2535 2536 #ifdef AARCH64 2537 assert (scratch != noreg, "should be specified"); 2538 InlinedAddress address_literal(target); 2539 relocate(rtype); 2540 ldr_literal(scratch, address_literal); 2541 br(scratch); 2542 int off = offset(); 2543 bind_literal(address_literal); 2544 #ifdef COMPILER2 2545 if (offset() - off == wordSize) { 2546 // no padding, so insert nop for worst-case sizing 2547 nop(); 2548 } 2549 #endif 2550 #else 2551 { 2552 Label skip; 2553 InlinedAddress address_literal(target); 2554 if (cond != al) { 2555 b(skip, inverse(cond)); 2556 } 2557 relocate(rtype); 2558 ldr_literal(PC, address_literal); 2559 bind_literal(address_literal); 2560 bind(skip); 2561 } 2562 #endif // AARCH64 2563 } 2564 2565 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { 2566 Register scratch = LR; 2567 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); 2568 if (reachable_from_cache(target)) { 2569 relocate(rspec); 2570 bl(target NOT_AARCH64_ARG(cond)); 2571 return; 2572 } 2573 2574 // Note: relocate is not needed for the code below, 2575 // encoding targets in absolute format. 2576 if (ignore_non_patchable_relocations()) { 2577 // This assumes the information was needed only for relocating the code. 2578 rspec = RelocationHolder::none; 2579 } 2580 2581 #ifndef AARCH64 2582 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { 2583 // Note: this version cannot be (atomically) patched 2584 mov_slow(scratch, (intptr_t)target, cond); 2585 blx(scratch, cond); 2586 return; 2587 } 2588 #endif 2589 2590 { 2591 Label ret_addr; 2592 #ifndef AARCH64 2593 if (cond != al) { 2594 b(ret_addr, inverse(cond)); 2595 } 2596 #endif 2597 2598 2599 #ifdef AARCH64 2600 // TODO-AARCH64: make more optimal implementation 2601 // [ Keep in sync with MacroAssembler::call_size ] 2602 assert(rspec.type() == relocInfo::none, "call reloc not implemented"); 2603 mov_slow(scratch, target); 2604 blr(scratch); 2605 #else 2606 InlinedAddress address_literal(target); 2607 relocate(rspec); 2608 adr(LR, ret_addr); 2609 ldr_literal(PC, address_literal); 2610 2611 bind_literal(address_literal); 2612 bind(ret_addr); 2613 #endif 2614 } 2615 } 2616 2617 #if defined(AARCH64) && defined(COMPILER2) 2618 int MacroAssembler::call_size(address target, bool far, bool patchable) { 2619 // FIXME: mov_slow is variable-length 2620 if (!far) return 1; // bl 2621 if (patchable) return 2; // ldr; blr 2622 return instr_count_for_mov_slow((intptr_t)target) + 1; 2623 } 2624 #endif 2625 2626 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { 2627 assert(rspec.type() == relocInfo::static_call_type || 2628 rspec.type() == relocInfo::none || 2629 rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); 2630 2631 // Always generate the relocation information, needed for patching 2632 relocate(rspec); // used by NativeCall::is_call_before() 2633 if (cache_fully_reachable()) { 2634 // Note: this assumes that all possible targets (the initial one 2635 // and the addresses patched to) are all in the code cache. 2636 assert(CodeCache::contains(target), "target might be too far"); 2637 bl(target); 2638 } else { 2639 #if defined(AARCH64) && defined(COMPILER2) 2640 if (c2) { 2641 // return address needs to match call_size(). 2642 // no need to trash Rtemp 2643 int off = offset(); 2644 Label skip_literal; 2645 InlinedAddress address_literal(target); 2646 ldr_literal(LR, address_literal); 2647 blr(LR); 2648 int ret_addr_offset = offset(); 2649 assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); 2650 b(skip_literal); 2651 int off2 = offset(); 2652 bind_literal(address_literal); 2653 if (offset() - off2 == wordSize) { 2654 // no padding, so insert nop for worst-case sizing 2655 nop(); 2656 } 2657 bind(skip_literal); 2658 return ret_addr_offset; 2659 } 2660 #endif 2661 Label ret_addr; 2662 InlinedAddress address_literal(target); 2663 #ifdef AARCH64 2664 ldr_literal(Rtemp, address_literal); 2665 adr(LR, ret_addr); 2666 br(Rtemp); 2667 #else 2668 adr(LR, ret_addr); 2669 ldr_literal(PC, address_literal); 2670 #endif 2671 bind_literal(address_literal); 2672 bind(ret_addr); 2673 } 2674 return offset(); 2675 } 2676 2677 2678 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { 2679 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2680 ldr(tmp, Address(method, Method::const_offset())); 2681 ldr(tmp, Address(tmp, ConstMethod::constants_offset())); 2682 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); 2683 ldr(mirror, Address(tmp, mirror_offset)); 2684 } 2685 2686 /////////////////////////////////////////////////////////////////////////////// 2687 2688 // Compressed pointers 2689 2690 #ifdef AARCH64 2691 2692 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { 2693 if (UseCompressedClassPointers) { 2694 ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2695 decode_klass_not_null(dst_klass); 2696 } else { 2697 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2698 } 2699 } 2700 2701 #else 2702 2703 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { 2704 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); 2705 } 2706 2707 #endif // AARCH64 2708 2709 // Blows src_klass. 2710 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { 2711 #ifdef AARCH64 2712 if (UseCompressedClassPointers) { 2713 assert(src_klass != dst_oop, "not enough registers"); 2714 encode_klass_not_null(src_klass); 2715 str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2716 return; 2717 } 2718 #endif // AARCH64 2719 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2720 } 2721 2722 #ifdef AARCH64 2723 2724 void MacroAssembler::store_klass_gap(Register dst) { 2725 if (UseCompressedClassPointers) { 2726 str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2727 } 2728 } 2729 2730 #endif // AARCH64 2731 2732 2733 void MacroAssembler::load_heap_oop(Register dst, Address src) { 2734 #ifdef AARCH64 2735 if (UseCompressedOops) { 2736 ldr_w(dst, src); 2737 decode_heap_oop(dst); 2738 return; 2739 } 2740 #endif // AARCH64 2741 ldr(dst, src); 2742 } 2743 2744 // Blows src and flags. 2745 void MacroAssembler::store_heap_oop(Register src, Address dst) { 2746 #ifdef AARCH64 2747 if (UseCompressedOops) { 2748 assert(!dst.uses(src), "not enough registers"); 2749 encode_heap_oop(src); 2750 str_w(src, dst); 2751 return; 2752 } 2753 #endif // AARCH64 2754 str(src, dst); 2755 } 2756 2757 void MacroAssembler::store_heap_oop_null(Register src, Address dst) { 2758 #ifdef AARCH64 2759 if (UseCompressedOops) { 2760 str_w(src, dst); 2761 return; 2762 } 2763 #endif // AARCH64 2764 str(src, dst); 2765 } 2766 2767 2768 #ifdef AARCH64 2769 2770 // Algorithm must match oop.inline.hpp encode_heap_oop. 2771 void MacroAssembler::encode_heap_oop(Register dst, Register src) { 2772 // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. 2773 // Update it at modifications. 2774 assert (UseCompressedOops, "must be compressed"); 2775 assert (Universe::heap() != NULL, "java heap should be initialized"); 2776 #ifdef ASSERT 2777 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2778 #endif 2779 verify_oop(src); 2780 if (Universe::narrow_oop_base() == NULL) { 2781 if (Universe::narrow_oop_shift() != 0) { 2782 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2783 _lsr(dst, src, Universe::narrow_oop_shift()); 2784 } else if (dst != src) { 2785 mov(dst, src); 2786 } 2787 } else { 2788 tst(src, src); 2789 csel(dst, Rheap_base, src, eq); 2790 sub(dst, dst, Rheap_base); 2791 if (Universe::narrow_oop_shift() != 0) { 2792 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2793 _lsr(dst, dst, Universe::narrow_oop_shift()); 2794 } 2795 } 2796 } 2797 2798 // Same algorithm as oop.inline.hpp decode_heap_oop. 2799 void MacroAssembler::decode_heap_oop(Register dst, Register src) { 2800 #ifdef ASSERT 2801 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2802 #endif 2803 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2804 if (Universe::narrow_oop_base() != NULL) { 2805 tst(src, src); 2806 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2807 csel(dst, dst, ZR, ne); 2808 } else { 2809 _lsl(dst, src, Universe::narrow_oop_shift()); 2810 } 2811 verify_oop(dst); 2812 } 2813 2814 #ifdef COMPILER2 2815 // Algorithm must match oop.inline.hpp encode_heap_oop. 2816 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule 2817 // must be changed. 2818 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2819 assert (UseCompressedOops, "must be compressed"); 2820 assert (Universe::heap() != NULL, "java heap should be initialized"); 2821 #ifdef ASSERT 2822 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2823 #endif 2824 verify_oop(src); 2825 if (Universe::narrow_oop_base() == NULL) { 2826 if (Universe::narrow_oop_shift() != 0) { 2827 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2828 _lsr(dst, src, Universe::narrow_oop_shift()); 2829 } else if (dst != src) { 2830 mov(dst, src); 2831 } 2832 } else { 2833 sub(dst, src, Rheap_base); 2834 if (Universe::narrow_oop_shift() != 0) { 2835 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2836 _lsr(dst, dst, Universe::narrow_oop_shift()); 2837 } 2838 } 2839 } 2840 2841 // Same algorithm as oops.inline.hpp decode_heap_oop. 2842 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule 2843 // must be changed. 2844 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2845 #ifdef ASSERT 2846 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2847 #endif 2848 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2849 if (Universe::narrow_oop_base() != NULL) { 2850 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2851 } else { 2852 _lsl(dst, src, Universe::narrow_oop_shift()); 2853 } 2854 verify_oop(dst); 2855 } 2856 2857 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 2858 assert(UseCompressedClassPointers, "should only be used for compressed header"); 2859 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 2860 int klass_index = oop_recorder()->find_index(k); 2861 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 2862 2863 // Relocation with special format (see relocInfo_arm.hpp). 2864 relocate(rspec); 2865 narrowKlass encoded_k = Klass::encode_klass(k); 2866 movz(dst, encoded_k & 0xffff, 0); 2867 movk(dst, (encoded_k >> 16) & 0xffff, 16); 2868 } 2869 2870 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 2871 assert(UseCompressedOops, "should only be used for compressed header"); 2872 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 2873 int oop_index = oop_recorder()->find_index(obj); 2874 RelocationHolder rspec = oop_Relocation::spec(oop_index); 2875 2876 relocate(rspec); 2877 movz(dst, 0xffff, 0); 2878 movk(dst, 0xffff, 16); 2879 } 2880 2881 #endif // COMPILER2 2882 2883 // Must preserve condition codes, or C2 encodeKlass_not_null rule 2884 // must be changed. 2885 void MacroAssembler::encode_klass_not_null(Register r) { 2886 if (Universe::narrow_klass_base() != NULL) { 2887 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 2888 assert(r != Rheap_base, "Encoding a klass in Rheap_base"); 2889 mov_slow(Rheap_base, Universe::narrow_klass_base()); 2890 sub(r, r, Rheap_base); 2891 } 2892 if (Universe::narrow_klass_shift() != 0) { 2893 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2894 _lsr(r, r, Universe::narrow_klass_shift()); 2895 } 2896 if (Universe::narrow_klass_base() != NULL) { 2897 reinit_heapbase(); 2898 } 2899 } 2900 2901 // Must preserve condition codes, or C2 encodeKlass_not_null rule 2902 // must be changed. 2903 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 2904 if (dst == src) { 2905 encode_klass_not_null(src); 2906 return; 2907 } 2908 if (Universe::narrow_klass_base() != NULL) { 2909 mov_slow(dst, (int64_t)Universe::narrow_klass_base()); 2910 sub(dst, src, dst); 2911 if (Universe::narrow_klass_shift() != 0) { 2912 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2913 _lsr(dst, dst, Universe::narrow_klass_shift()); 2914 } 2915 } else { 2916 if (Universe::narrow_klass_shift() != 0) { 2917 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2918 _lsr(dst, src, Universe::narrow_klass_shift()); 2919 } else { 2920 mov(dst, src); 2921 } 2922 } 2923 } 2924 2925 // Function instr_count_for_decode_klass_not_null() counts the instructions 2926 // generated by decode_klass_not_null(register r) and reinit_heapbase(), 2927 // when (Universe::heap() != NULL). Hence, if the instructions they 2928 // generate change, then this method needs to be updated. 2929 int MacroAssembler::instr_count_for_decode_klass_not_null() { 2930 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 2931 assert(Universe::heap() != NULL, "java heap should be initialized"); 2932 if (Universe::narrow_klass_base() != NULL) { 2933 return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow 2934 1 + // add 2935 instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow 2936 } else { 2937 if (Universe::narrow_klass_shift() != 0) { 2938 return 1; 2939 } 2940 } 2941 return 0; 2942 } 2943 2944 // Must preserve condition codes, or C2 decodeKlass_not_null rule 2945 // must be changed. 2946 void MacroAssembler::decode_klass_not_null(Register r) { 2947 int off = offset(); 2948 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2949 assert(Universe::heap() != NULL, "java heap should be initialized"); 2950 assert(r != Rheap_base, "Decoding a klass in Rheap_base"); 2951 // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. 2952 // Also do not verify_oop as this is called by verify_oop. 2953 if (Universe::narrow_klass_base() != NULL) { 2954 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 2955 mov_slow(Rheap_base, Universe::narrow_klass_base()); 2956 add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); 2957 reinit_heapbase(); 2958 } else { 2959 if (Universe::narrow_klass_shift() != 0) { 2960 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 2961 _lsl(r, r, Universe::narrow_klass_shift()); 2962 } 2963 } 2964 assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); 2965 } 2966 2967 // Must preserve condition codes, or C2 decodeKlass_not_null rule 2968 // must be changed. 2969 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 2970 if (src == dst) { 2971 decode_klass_not_null(src); 2972 return; 2973 } 2974 2975 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 2976 assert(Universe::heap() != NULL, "java heap should be initialized"); 2977 assert(src != Rheap_base, "Decoding a klass in Rheap_base"); 2978 assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); 2979 // Also do not verify_oop as this is called by verify_oop. 2980 if (Universe::narrow_klass_base() != NULL) { 2981 mov_slow(dst, Universe::narrow_klass_base()); 2982 add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); 2983 } else { 2984 _lsl(dst, src, Universe::narrow_klass_shift()); 2985 } 2986 } 2987 2988 2989 void MacroAssembler::reinit_heapbase() { 2990 if (UseCompressedOops || UseCompressedClassPointers) { 2991 if (Universe::heap() != NULL) { 2992 mov_slow(Rheap_base, Universe::narrow_ptrs_base()); 2993 } else { 2994 ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); 2995 } 2996 } 2997 } 2998 2999 #ifdef ASSERT 3000 void MacroAssembler::verify_heapbase(const char* msg) { 3001 // This code pattern is matched in NativeIntruction::skip_verify_heapbase. 3002 // Update it at modifications. 3003 assert (UseCompressedOops, "should be compressed"); 3004 assert (Universe::heap() != NULL, "java heap should be initialized"); 3005 if (CheckCompressedOops) { 3006 Label ok; 3007 str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 3008 raw_push(Rtemp, ZR); 3009 mrs(Rtemp, Assembler::SysReg_NZCV); 3010 str(Rtemp, Address(SP, 1 * wordSize)); 3011 mov_slow(Rtemp, Universe::narrow_ptrs_base()); 3012 cmp(Rheap_base, Rtemp); 3013 b(ok, eq); 3014 stop(msg); 3015 bind(ok); 3016 ldr(Rtemp, Address(SP, 1 * wordSize)); 3017 msr(Assembler::SysReg_NZCV, Rtemp); 3018 raw_pop(Rtemp, ZR); 3019 str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 3020 } 3021 } 3022 #endif // ASSERT 3023 3024 #endif // AARCH64 3025 3026 #ifdef COMPILER2 3027 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3028 { 3029 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3030 3031 Register Rmark = Rscratch2; 3032 3033 assert(Roop != Rscratch, ""); 3034 assert(Roop != Rmark, ""); 3035 assert(Rbox != Rscratch, ""); 3036 assert(Rbox != Rmark, ""); 3037 3038 Label fast_lock, done; 3039 3040 if (UseBiasedLocking && !UseOptoBiasInlining) { 3041 Label failed; 3042 #ifdef AARCH64 3043 biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); 3044 #else 3045 biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); 3046 #endif 3047 bind(failed); 3048 } 3049 3050 ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); 3051 tst(Rmark, markOopDesc::unlocked_value); 3052 b(fast_lock, ne); 3053 3054 // Check for recursive lock 3055 // See comments in InterpreterMacroAssembler::lock_object for 3056 // explanations on the fast recursive locking check. 3057 #ifdef AARCH64 3058 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); 3059 Assembler::LogicalImmediate imm(mask, false); 3060 mov(Rscratch, SP); 3061 sub(Rscratch, Rmark, Rscratch); 3062 ands(Rscratch, Rscratch, imm); 3063 b(done, ne); // exit with failure 3064 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero 3065 b(done); 3066 3067 #else 3068 // -1- test low 2 bits 3069 movs(Rscratch, AsmOperand(Rmark, lsl, 30)); 3070 // -2- test (hdr - SP) if the low two bits are 0 3071 sub(Rscratch, Rmark, SP, eq); 3072 movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); 3073 // If still 'eq' then recursive locking OK 3074 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero 3075 b(done); 3076 #endif 3077 3078 bind(fast_lock); 3079 str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3080 3081 bool allow_fallthrough_on_failure = true; 3082 bool one_shot = true; 3083 cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3084 3085 bind(done); 3086 3087 } 3088 3089 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3090 { 3091 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3092 3093 Register Rmark = Rscratch2; 3094 3095 assert(Roop != Rscratch, ""); 3096 assert(Roop != Rmark, ""); 3097 assert(Rbox != Rscratch, ""); 3098 assert(Rbox != Rmark, ""); 3099 3100 Label done; 3101 3102 if (UseBiasedLocking && !UseOptoBiasInlining) { 3103 biased_locking_exit(Roop, Rscratch, done); 3104 } 3105 3106 ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3107 // If hdr is NULL, we've got recursive locking and there's nothing more to do 3108 cmp(Rmark, 0); 3109 b(done, eq); 3110 3111 // Restore the object header 3112 bool allow_fallthrough_on_failure = true; 3113 bool one_shot = true; 3114 cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3115 3116 bind(done); 3117 3118 } 3119 #endif // COMPILER2 3120