1 /* 2 * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "asm/macroAssembler.hpp" 29 #include "ci/ciEnv.hpp" 30 #include "code/nativeInst.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/cardTable.hpp" 33 #include "gc/shared/cardTableModRefBS.hpp" 34 #include "gc/shared/collectedHeap.inline.hpp" 35 #include "interpreter/interpreter.hpp" 36 #include "memory/resourceArea.hpp" 37 #include "oops/klass.inline.hpp" 38 #include "prims/methodHandles.hpp" 39 #include "runtime/biasedLocking.hpp" 40 #include "runtime/interfaceSupport.inline.hpp" 41 #include "runtime/objectMonitor.hpp" 42 #include "runtime/os.hpp" 43 #include "runtime/sharedRuntime.hpp" 44 #include "runtime/stubRoutines.hpp" 45 #include "utilities/macros.hpp" 46 #if INCLUDE_ALL_GCS 47 #include "gc/g1/g1BarrierSet.hpp" 48 #include "gc/g1/g1CardTable.hpp" 49 #include "gc/g1/g1CollectedHeap.inline.hpp" 50 #include "gc/g1/heapRegion.hpp" 51 #endif 52 53 // Implementation of AddressLiteral 54 55 void AddressLiteral::set_rspec(relocInfo::relocType rtype) { 56 switch (rtype) { 57 case relocInfo::oop_type: 58 // Oops are a special case. Normally they would be their own section 59 // but in cases like icBuffer they are literals in the code stream that 60 // we don't have a section for. We use none so that we get a literal address 61 // which is always patchable. 62 break; 63 case relocInfo::external_word_type: 64 _rspec = external_word_Relocation::spec(_target); 65 break; 66 case relocInfo::internal_word_type: 67 _rspec = internal_word_Relocation::spec(_target); 68 break; 69 case relocInfo::opt_virtual_call_type: 70 _rspec = opt_virtual_call_Relocation::spec(); 71 break; 72 case relocInfo::static_call_type: 73 _rspec = static_call_Relocation::spec(); 74 break; 75 case relocInfo::runtime_call_type: 76 _rspec = runtime_call_Relocation::spec(); 77 break; 78 case relocInfo::poll_type: 79 case relocInfo::poll_return_type: 80 _rspec = Relocation::spec_simple(rtype); 81 break; 82 case relocInfo::none: 83 break; 84 default: 85 ShouldNotReachHere(); 86 break; 87 } 88 } 89 90 // Initially added to the Assembler interface as a pure virtual: 91 // RegisterConstant delayed_value(..) 92 // for: 93 // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) 94 // this was subsequently modified to its present name and return type 95 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 96 Register tmp, 97 int offset) { 98 ShouldNotReachHere(); 99 return RegisterOrConstant(-1); 100 } 101 102 103 #ifdef AARCH64 104 // Note: ARM32 version is OS dependent 105 void MacroAssembler::breakpoint(AsmCondition cond) { 106 if (cond == al) { 107 brk(); 108 } else { 109 Label L; 110 b(L, inverse(cond)); 111 brk(); 112 bind(L); 113 } 114 } 115 #endif // AARCH64 116 117 118 // virtual method calling 119 void MacroAssembler::lookup_virtual_method(Register recv_klass, 120 Register vtable_index, 121 Register method_result) { 122 const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); 123 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 124 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); 125 ldr(method_result, Address(recv_klass, base_offset)); 126 } 127 128 129 // Simplified, combined version, good for typical uses. 130 // Falls through on failure. 131 void MacroAssembler::check_klass_subtype(Register sub_klass, 132 Register super_klass, 133 Register temp_reg, 134 Register temp_reg2, 135 Register temp_reg3, 136 Label& L_success) { 137 Label L_failure; 138 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); 139 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); 140 bind(L_failure); 141 }; 142 143 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 144 Register super_klass, 145 Register temp_reg, 146 Register temp_reg2, 147 Label* L_success, 148 Label* L_failure, 149 Label* L_slow_path) { 150 151 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); 152 const Register super_check_offset = temp_reg2; 153 154 Label L_fallthrough; 155 int label_nulls = 0; 156 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 157 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 158 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 159 assert(label_nulls <= 1, "at most one NULL in the batch"); 160 161 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 162 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 163 Address super_check_offset_addr(super_klass, sco_offset); 164 165 // If the pointers are equal, we are done (e.g., String[] elements). 166 // This self-check enables sharing of secondary supertype arrays among 167 // non-primary types such as array-of-interface. Otherwise, each such 168 // type would need its own customized SSA. 169 // We move this check to the front of the fast path because many 170 // type checks are in fact trivially successful in this manner, 171 // so we get a nicely predicted branch right at the start of the check. 172 cmp(sub_klass, super_klass); 173 b(*L_success, eq); 174 175 // Check the supertype display: 176 ldr_u32(super_check_offset, super_check_offset_addr); 177 178 Address super_check_addr(sub_klass, super_check_offset); 179 ldr(temp_reg, super_check_addr); 180 cmp(super_klass, temp_reg); // load displayed supertype 181 182 // This check has worked decisively for primary supers. 183 // Secondary supers are sought in the super_cache ('super_cache_addr'). 184 // (Secondary supers are interfaces and very deeply nested subtypes.) 185 // This works in the same check above because of a tricky aliasing 186 // between the super_cache and the primary super display elements. 187 // (The 'super_check_addr' can address either, as the case requires.) 188 // Note that the cache is updated below if it does not help us find 189 // what we need immediately. 190 // So if it was a primary super, we can just fail immediately. 191 // Otherwise, it's the slow path for us (no success at this point). 192 193 b(*L_success, eq); 194 cmp_32(super_check_offset, sc_offset); 195 if (L_failure == &L_fallthrough) { 196 b(*L_slow_path, eq); 197 } else { 198 b(*L_failure, ne); 199 if (L_slow_path != &L_fallthrough) { 200 b(*L_slow_path); 201 } 202 } 203 204 bind(L_fallthrough); 205 } 206 207 208 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 209 Register super_klass, 210 Register temp_reg, 211 Register temp2_reg, 212 Register temp3_reg, 213 Label* L_success, 214 Label* L_failure, 215 bool set_cond_codes) { 216 #ifdef AARCH64 217 NOT_IMPLEMENTED(); 218 #else 219 // Note: if used by code that expects a register to be 0 on success, 220 // this register must be temp_reg and set_cond_codes must be true 221 222 Register saved_reg = noreg; 223 224 // get additional tmp registers 225 if (temp3_reg == noreg) { 226 saved_reg = temp3_reg = LR; 227 push(saved_reg); 228 } 229 230 assert(temp2_reg != noreg, "need all the temporary registers"); 231 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); 232 233 Register cmp_temp = temp_reg; 234 Register scan_temp = temp3_reg; 235 Register count_temp = temp2_reg; 236 237 Label L_fallthrough; 238 int label_nulls = 0; 239 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 240 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 241 assert(label_nulls <= 1, "at most one NULL in the batch"); 242 243 // a couple of useful fields in sub_klass: 244 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 245 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 246 Address secondary_supers_addr(sub_klass, ss_offset); 247 Address super_cache_addr( sub_klass, sc_offset); 248 249 #ifndef PRODUCT 250 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); 251 #endif 252 253 // We will consult the secondary-super array. 254 ldr(scan_temp, Address(sub_klass, ss_offset)); 255 256 assert(! UseCompressedOops, "search_key must be the compressed super_klass"); 257 // else search_key is the 258 Register search_key = super_klass; 259 260 // Load the array length. 261 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes())); 262 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes()); 263 264 add(count_temp, count_temp, 1); 265 266 Label L_loop, L_setnz_and_fail, L_fail; 267 268 // Top of search loop 269 bind(L_loop); 270 // Notes: 271 // scan_temp starts at the array elements 272 // count_temp is 1+size 273 subs(count_temp, count_temp, 1); 274 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { 275 // direct jump to L_failure if failed and no cleanup needed 276 b(*L_failure, eq); // not found and 277 } else { 278 b(L_fail, eq); // not found in the array 279 } 280 281 // Load next super to check 282 // In the array of super classes elements are pointer sized. 283 int element_size = wordSize; 284 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); 285 286 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list 287 subs(cmp_temp, cmp_temp, search_key); 288 289 // A miss means we are NOT a subtype and need to keep looping 290 b(L_loop, ne); 291 292 // Falling out the bottom means we found a hit; we ARE a subtype 293 294 // Note: temp_reg/cmp_temp is already 0 and flag Z is set 295 296 // Success. Cache the super we found and proceed in triumph. 297 str(super_klass, Address(sub_klass, sc_offset)); 298 299 if (saved_reg != noreg) { 300 // Return success 301 pop(saved_reg); 302 } 303 304 b(*L_success); 305 306 bind(L_fail); 307 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here 308 if (set_cond_codes) { 309 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed 310 } 311 if (saved_reg != noreg) { 312 pop(saved_reg); 313 } 314 if (L_failure != &L_fallthrough) { 315 b(*L_failure); 316 } 317 318 bind(L_fallthrough); 319 #endif 320 } 321 322 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. 323 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { 324 assert_different_registers(params_base, params_count); 325 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); 326 return Address(tmp, -Interpreter::stackElementSize); 327 } 328 329 330 void MacroAssembler::align(int modulus) { 331 while (offset() % modulus != 0) { 332 nop(); 333 } 334 } 335 336 int MacroAssembler::set_last_Java_frame(Register last_java_sp, 337 Register last_java_fp, 338 bool save_last_java_pc, 339 Register tmp) { 340 int pc_offset; 341 if (last_java_fp != noreg) { 342 // optional 343 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); 344 _fp_saved = true; 345 } else { 346 _fp_saved = false; 347 } 348 if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM 349 #ifdef AARCH64 350 pc_offset = mov_pc_to(tmp); 351 str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); 352 #else 353 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); 354 pc_offset = offset() + VM_Version::stored_pc_adjustment(); 355 #endif 356 _pc_saved = true; 357 } else { 358 _pc_saved = false; 359 pc_offset = -1; 360 } 361 // According to comment in javaFrameAnchorm SP must be saved last, so that other 362 // entries are valid when SP is set. 363 364 // However, this is probably not a strong constrainst since for instance PC is 365 // sometimes read from the stack at SP... but is pushed later (by the call). Hence, 366 // we now write the fields in the expected order but we have not added a StoreStore 367 // barrier. 368 369 // XXX: if the ordering is really important, PC should always be saved (without forgetting 370 // to update oop_map offsets) and a StoreStore barrier might be needed. 371 372 if (last_java_sp == noreg) { 373 last_java_sp = SP; // always saved 374 } 375 #ifdef AARCH64 376 if (last_java_sp == SP) { 377 mov(tmp, SP); 378 str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); 379 } else { 380 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 381 } 382 #else 383 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 384 #endif 385 386 return pc_offset; // for oopmaps 387 } 388 389 void MacroAssembler::reset_last_Java_frame(Register tmp) { 390 const Register Rzero = zero_register(tmp); 391 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); 392 if (_fp_saved) { 393 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); 394 } 395 if (_pc_saved) { 396 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); 397 } 398 } 399 400 401 // Implementation of call_VM versions 402 403 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { 404 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 405 assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); 406 407 #ifndef AARCH64 408 // Safer to save R9 here since callers may have been written 409 // assuming R9 survives. This is suboptimal but is not worth 410 // optimizing for the few platforms where R9 is scratched. 411 push(RegisterSet(R4) | R9ifScratched); 412 mov(R4, SP); 413 bic(SP, SP, StackAlignmentInBytes - 1); 414 #endif // AARCH64 415 call(entry_point, relocInfo::runtime_call_type); 416 #ifndef AARCH64 417 mov(SP, R4); 418 pop(RegisterSet(R4) | R9ifScratched); 419 #endif // AARCH64 420 } 421 422 423 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 424 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 425 assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); 426 427 const Register tmp = Rtemp; 428 assert_different_registers(oop_result, tmp); 429 430 set_last_Java_frame(SP, FP, true, tmp); 431 432 #ifdef ASSERT 433 AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); 434 #endif // ASSERT 435 436 #ifndef AARCH64 437 #if R9_IS_SCRATCHED 438 // Safer to save R9 here since callers may have been written 439 // assuming R9 survives. This is suboptimal but is not worth 440 // optimizing for the few platforms where R9 is scratched. 441 442 // Note: cannot save R9 above the saved SP (some calls expect for 443 // instance the Java stack top at the saved SP) 444 // => once saved (with set_last_Java_frame), decrease SP before rounding to 445 // ensure the slot at SP will be free for R9). 446 sub(SP, SP, 4); 447 bic(SP, SP, StackAlignmentInBytes - 1); 448 str(R9, Address(SP, 0)); 449 #else 450 bic(SP, SP, StackAlignmentInBytes - 1); 451 #endif // R9_IS_SCRATCHED 452 #endif 453 454 mov(R0, Rthread); 455 call(entry_point, relocInfo::runtime_call_type); 456 457 #ifndef AARCH64 458 #if R9_IS_SCRATCHED 459 ldr(R9, Address(SP, 0)); 460 #endif 461 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); 462 #endif 463 464 reset_last_Java_frame(tmp); 465 466 // C++ interp handles this in the interpreter 467 check_and_handle_popframe(); 468 check_and_handle_earlyret(); 469 470 if (check_exceptions) { 471 // check for pending exceptions 472 ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); 473 #ifdef AARCH64 474 Label L; 475 cbz(tmp, L); 476 mov_pc_to(Rexception_pc); 477 b(StubRoutines::forward_exception_entry()); 478 bind(L); 479 #else 480 cmp(tmp, 0); 481 mov(Rexception_pc, PC, ne); 482 b(StubRoutines::forward_exception_entry(), ne); 483 #endif // AARCH64 484 } 485 486 // get oop result if there is one and reset the value in the thread 487 if (oop_result->is_valid()) { 488 get_vm_result(oop_result, tmp); 489 } 490 } 491 492 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 493 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 494 } 495 496 497 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 498 assert (arg_1 == R1, "fixed register for arg_1"); 499 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 500 } 501 502 503 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 504 assert (arg_1 == R1, "fixed register for arg_1"); 505 assert (arg_2 == R2, "fixed register for arg_2"); 506 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 507 } 508 509 510 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 511 assert (arg_1 == R1, "fixed register for arg_1"); 512 assert (arg_2 == R2, "fixed register for arg_2"); 513 assert (arg_3 == R3, "fixed register for arg_3"); 514 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 515 } 516 517 518 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { 519 // Not used on ARM 520 Unimplemented(); 521 } 522 523 524 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 525 // Not used on ARM 526 Unimplemented(); 527 } 528 529 530 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 531 // Not used on ARM 532 Unimplemented(); 533 } 534 535 536 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 537 // Not used on ARM 538 Unimplemented(); 539 } 540 541 // Raw call, without saving/restoring registers, exception handling, etc. 542 // Mainly used from various stubs. 543 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { 544 const Register tmp = Rtemp; // Rtemp free since scratched by call 545 set_last_Java_frame(SP, FP, true, tmp); 546 #if R9_IS_SCRATCHED 547 if (save_R9_if_scratched) { 548 // Note: Saving also R10 for alignment. 549 push(RegisterSet(R9, R10)); 550 } 551 #endif 552 mov(R0, Rthread); 553 call(entry_point, relocInfo::runtime_call_type); 554 #if R9_IS_SCRATCHED 555 if (save_R9_if_scratched) { 556 pop(RegisterSet(R9, R10)); 557 } 558 #endif 559 reset_last_Java_frame(tmp); 560 } 561 562 void MacroAssembler::call_VM_leaf(address entry_point) { 563 call_VM_leaf_helper(entry_point, 0); 564 } 565 566 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 567 assert (arg_1 == R0, "fixed register for arg_1"); 568 call_VM_leaf_helper(entry_point, 1); 569 } 570 571 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 572 assert (arg_1 == R0, "fixed register for arg_1"); 573 assert (arg_2 == R1, "fixed register for arg_2"); 574 call_VM_leaf_helper(entry_point, 2); 575 } 576 577 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 578 assert (arg_1 == R0, "fixed register for arg_1"); 579 assert (arg_2 == R1, "fixed register for arg_2"); 580 assert (arg_3 == R2, "fixed register for arg_3"); 581 call_VM_leaf_helper(entry_point, 3); 582 } 583 584 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { 585 assert (arg_1 == R0, "fixed register for arg_1"); 586 assert (arg_2 == R1, "fixed register for arg_2"); 587 assert (arg_3 == R2, "fixed register for arg_3"); 588 assert (arg_4 == R3, "fixed register for arg_4"); 589 call_VM_leaf_helper(entry_point, 4); 590 } 591 592 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { 593 assert_different_registers(oop_result, tmp); 594 ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); 595 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); 596 verify_oop(oop_result); 597 } 598 599 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { 600 assert_different_registers(metadata_result, tmp); 601 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); 602 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); 603 } 604 605 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { 606 if (arg2.is_register()) { 607 add(dst, arg1, arg2.as_register()); 608 } else { 609 add(dst, arg1, arg2.as_constant()); 610 } 611 } 612 613 void MacroAssembler::add_slow(Register rd, Register rn, int c) { 614 #ifdef AARCH64 615 if (c == 0) { 616 if (rd != rn) { 617 mov(rd, rn); 618 } 619 return; 620 } 621 if (c < 0) { 622 sub_slow(rd, rn, -c); 623 return; 624 } 625 if (c > right_n_bits(24)) { 626 guarantee(rd != rn, "no large add_slow with only one register"); 627 mov_slow(rd, c); 628 add(rd, rn, rd); 629 } else { 630 int lo = c & right_n_bits(12); 631 int hi = (c >> 12) & right_n_bits(12); 632 if (lo != 0) { 633 add(rd, rn, lo, lsl0); 634 } 635 if (hi != 0) { 636 add(rd, (lo == 0) ? rn : rd, hi, lsl12); 637 } 638 } 639 #else 640 // This function is used in compiler for handling large frame offsets 641 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 642 return sub(rd, rn, (-c)); 643 } 644 int low = c & 0x3fc; 645 if (low != 0) { 646 add(rd, rn, low); 647 rn = rd; 648 } 649 if (c & ~0x3fc) { 650 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); 651 add(rd, rn, c & ~0x3fc); 652 } else if (rd != rn) { 653 assert(c == 0, ""); 654 mov(rd, rn); // need to generate at least one move! 655 } 656 #endif // AARCH64 657 } 658 659 void MacroAssembler::sub_slow(Register rd, Register rn, int c) { 660 #ifdef AARCH64 661 if (c <= 0) { 662 add_slow(rd, rn, -c); 663 return; 664 } 665 if (c > right_n_bits(24)) { 666 guarantee(rd != rn, "no large sub_slow with only one register"); 667 mov_slow(rd, c); 668 sub(rd, rn, rd); 669 } else { 670 int lo = c & right_n_bits(12); 671 int hi = (c >> 12) & right_n_bits(12); 672 if (lo != 0) { 673 sub(rd, rn, lo, lsl0); 674 } 675 if (hi != 0) { 676 sub(rd, (lo == 0) ? rn : rd, hi, lsl12); 677 } 678 } 679 #else 680 // This function is used in compiler for handling large frame offsets 681 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 682 return add(rd, rn, (-c)); 683 } 684 int low = c & 0x3fc; 685 if (low != 0) { 686 sub(rd, rn, low); 687 rn = rd; 688 } 689 if (c & ~0x3fc) { 690 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); 691 sub(rd, rn, c & ~0x3fc); 692 } else if (rd != rn) { 693 assert(c == 0, ""); 694 mov(rd, rn); // need to generate at least one move! 695 } 696 #endif // AARCH64 697 } 698 699 void MacroAssembler::mov_slow(Register rd, address addr) { 700 // do *not* call the non relocated mov_related_address 701 mov_slow(rd, (intptr_t)addr); 702 } 703 704 void MacroAssembler::mov_slow(Register rd, const char *str) { 705 mov_slow(rd, (intptr_t)str); 706 } 707 708 #ifdef AARCH64 709 710 // Common code for mov_slow and instr_count_for_mov_slow. 711 // Returns number of instructions of mov_slow pattern, 712 // generating it if non-null MacroAssembler is given. 713 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { 714 // This code pattern is matched in NativeIntruction::is_mov_slow. 715 // Update it at modifications. 716 717 const intx mask = right_n_bits(16); 718 // 1 movz instruction 719 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 720 if ((c & ~(mask << base_shift)) == 0) { 721 if (masm != NULL) { 722 masm->movz(rd, ((uintx)c) >> base_shift, base_shift); 723 } 724 return 1; 725 } 726 } 727 // 1 movn instruction 728 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 729 if (((~c) & ~(mask << base_shift)) == 0) { 730 if (masm != NULL) { 731 masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); 732 } 733 return 1; 734 } 735 } 736 // 1 orr instruction 737 { 738 LogicalImmediate imm(c, false); 739 if (imm.is_encoded()) { 740 if (masm != NULL) { 741 masm->orr(rd, ZR, imm); 742 } 743 return 1; 744 } 745 } 746 // 1 movz/movn + up to 3 movk instructions 747 int zeroes = 0; 748 int ones = 0; 749 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 750 int part = (c >> base_shift) & mask; 751 if (part == 0) { 752 ++zeroes; 753 } else if (part == mask) { 754 ++ones; 755 } 756 } 757 int def_bits = 0; 758 if (ones > zeroes) { 759 def_bits = mask; 760 } 761 int inst_count = 0; 762 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 763 int part = (c >> base_shift) & mask; 764 if (part != def_bits) { 765 if (masm != NULL) { 766 if (inst_count > 0) { 767 masm->movk(rd, part, base_shift); 768 } else { 769 if (def_bits == 0) { 770 masm->movz(rd, part, base_shift); 771 } else { 772 masm->movn(rd, ~part & mask, base_shift); 773 } 774 } 775 } 776 inst_count++; 777 } 778 } 779 assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); 780 return inst_count; 781 } 782 783 void MacroAssembler::mov_slow(Register rd, intptr_t c) { 784 #ifdef ASSERT 785 int off = offset(); 786 #endif 787 (void) mov_slow_helper(rd, c, this); 788 assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); 789 } 790 791 // Counts instructions generated by mov_slow(rd, c). 792 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { 793 return mov_slow_helper(noreg, c, NULL); 794 } 795 796 int MacroAssembler::instr_count_for_mov_slow(address c) { 797 return mov_slow_helper(noreg, (intptr_t)c, NULL); 798 } 799 800 #else 801 802 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { 803 if (AsmOperand::is_rotated_imm(c)) { 804 mov(rd, c, cond); 805 } else if (AsmOperand::is_rotated_imm(~c)) { 806 mvn(rd, ~c, cond); 807 } else if (VM_Version::supports_movw()) { 808 movw(rd, c & 0xffff, cond); 809 if ((unsigned int)c >> 16) { 810 movt(rd, (unsigned int)c >> 16, cond); 811 } 812 } else { 813 // Find first non-zero bit 814 int shift = 0; 815 while ((c & (3 << shift)) == 0) { 816 shift += 2; 817 } 818 // Put the least significant part of the constant 819 int mask = 0xff << shift; 820 mov(rd, c & mask, cond); 821 // Add up to 3 other parts of the constant; 822 // each of them can be represented as rotated_imm 823 if (c & (mask << 8)) { 824 orr(rd, rd, c & (mask << 8), cond); 825 } 826 if (c & (mask << 16)) { 827 orr(rd, rd, c & (mask << 16), cond); 828 } 829 if (c & (mask << 24)) { 830 orr(rd, rd, c & (mask << 24), cond); 831 } 832 } 833 } 834 835 #endif // AARCH64 836 837 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, 838 #ifdef AARCH64 839 bool patchable 840 #else 841 AsmCondition cond 842 #endif 843 ) { 844 845 if (o == NULL) { 846 #ifdef AARCH64 847 if (patchable) { 848 nop(); 849 } 850 mov(rd, ZR); 851 #else 852 mov(rd, 0, cond); 853 #endif 854 return; 855 } 856 857 if (oop_index == 0) { 858 oop_index = oop_recorder()->allocate_oop_index(o); 859 } 860 relocate(oop_Relocation::spec(oop_index)); 861 862 #ifdef AARCH64 863 if (patchable) { 864 nop(); 865 } 866 ldr(rd, pc()); 867 #else 868 if (VM_Version::supports_movw()) { 869 movw(rd, 0, cond); 870 movt(rd, 0, cond); 871 } else { 872 ldr(rd, Address(PC), cond); 873 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). 874 nop(); 875 } 876 #endif 877 } 878 879 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { 880 if (o == NULL) { 881 #ifdef AARCH64 882 if (patchable) { 883 nop(); 884 } 885 #endif 886 mov(rd, 0); 887 return; 888 } 889 890 if (metadata_index == 0) { 891 metadata_index = oop_recorder()->allocate_metadata_index(o); 892 } 893 relocate(metadata_Relocation::spec(metadata_index)); 894 895 #ifdef AARCH64 896 if (patchable) { 897 nop(); 898 } 899 #ifdef COMPILER2 900 if (!patchable && VM_Version::prefer_moves_over_load_literal()) { 901 mov_slow(rd, (address)o); 902 return; 903 } 904 #endif 905 ldr(rd, pc()); 906 #else 907 if (VM_Version::supports_movw()) { 908 movw(rd, ((int)o) & 0xffff); 909 movt(rd, (unsigned int)o >> 16); 910 } else { 911 ldr(rd, Address(PC)); 912 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). 913 nop(); 914 } 915 #endif // AARCH64 916 } 917 918 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { 919 Label skip_constant; 920 union { 921 jfloat f; 922 jint i; 923 } accessor; 924 accessor.f = c; 925 926 #ifdef AARCH64 927 // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow 928 Label L; 929 ldr_s(fd, target(L)); 930 b(skip_constant); 931 bind(L); 932 emit_int32(accessor.i); 933 bind(skip_constant); 934 #else 935 flds(fd, Address(PC), cond); 936 b(skip_constant); 937 emit_int32(accessor.i); 938 bind(skip_constant); 939 #endif // AARCH64 940 } 941 942 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { 943 Label skip_constant; 944 union { 945 jdouble d; 946 jint i[2]; 947 } accessor; 948 accessor.d = c; 949 950 #ifdef AARCH64 951 // TODO-AARCH64 - try to optimize loading of double constants with fmov 952 Label L; 953 ldr_d(fd, target(L)); 954 b(skip_constant); 955 align(wordSize); 956 bind(L); 957 emit_int32(accessor.i[0]); 958 emit_int32(accessor.i[1]); 959 bind(skip_constant); 960 #else 961 fldd(fd, Address(PC), cond); 962 b(skip_constant); 963 emit_int32(accessor.i[0]); 964 emit_int32(accessor.i[1]); 965 bind(skip_constant); 966 #endif // AARCH64 967 } 968 969 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { 970 intptr_t addr = (intptr_t) address_of_global; 971 #ifdef AARCH64 972 assert((addr & 0x3) == 0, "address should be aligned"); 973 974 // FIXME: TODO 975 if (false && page_reachable_from_cache(address_of_global)) { 976 assert(false,"TODO: relocate"); 977 //relocate(); 978 adrp(reg, address_of_global); 979 ldrsw(reg, Address(reg, addr & 0xfff)); 980 } else { 981 mov_slow(reg, addr & ~0x3fff); 982 ldrsw(reg, Address(reg, addr & 0x3fff)); 983 } 984 #else 985 mov_slow(reg, addr & ~0xfff); 986 ldr(reg, Address(reg, addr & 0xfff)); 987 #endif 988 } 989 990 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { 991 #ifdef AARCH64 992 intptr_t addr = (intptr_t) address_of_global; 993 assert ((addr & 0x7) == 0, "address should be aligned"); 994 mov_slow(reg, addr & ~0x7fff); 995 ldr(reg, Address(reg, addr & 0x7fff)); 996 #else 997 ldr_global_s32(reg, address_of_global); 998 #endif 999 } 1000 1001 void MacroAssembler::ldrb_global(Register reg, address address_of_global) { 1002 intptr_t addr = (intptr_t) address_of_global; 1003 mov_slow(reg, addr & ~0xfff); 1004 ldrb(reg, Address(reg, addr & 0xfff)); 1005 } 1006 1007 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { 1008 #ifdef AARCH64 1009 switch (bits) { 1010 case 8: uxtb(rd, rn); break; 1011 case 16: uxth(rd, rn); break; 1012 case 32: mov_w(rd, rn); break; 1013 default: ShouldNotReachHere(); 1014 } 1015 #else 1016 if (bits <= 8) { 1017 andr(rd, rn, (1 << bits) - 1); 1018 } else if (bits >= 24) { 1019 bic(rd, rn, -1 << bits); 1020 } else { 1021 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1022 mov(rd, AsmOperand(rd, lsr, 32 - bits)); 1023 } 1024 #endif 1025 } 1026 1027 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { 1028 #ifdef AARCH64 1029 switch (bits) { 1030 case 8: sxtb(rd, rn); break; 1031 case 16: sxth(rd, rn); break; 1032 case 32: sxtw(rd, rn); break; 1033 default: ShouldNotReachHere(); 1034 } 1035 #else 1036 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1037 mov(rd, AsmOperand(rd, asr, 32 - bits)); 1038 #endif 1039 } 1040 1041 #ifndef AARCH64 1042 1043 void MacroAssembler::long_move(Register rd_lo, Register rd_hi, 1044 Register rn_lo, Register rn_hi, 1045 AsmCondition cond) { 1046 if (rd_lo != rn_hi) { 1047 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1048 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1049 } else if (rd_hi != rn_lo) { 1050 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1051 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1052 } else { 1053 eor(rd_lo, rd_hi, rd_lo, cond); 1054 eor(rd_hi, rd_lo, rd_hi, cond); 1055 eor(rd_lo, rd_hi, rd_lo, cond); 1056 } 1057 } 1058 1059 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1060 Register rn_lo, Register rn_hi, 1061 AsmShift shift, Register count) { 1062 Register tmp; 1063 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { 1064 tmp = rd_lo; 1065 } else { 1066 tmp = rd_hi; 1067 } 1068 assert_different_registers(tmp, count, rn_lo, rn_hi); 1069 1070 subs(tmp, count, 32); 1071 if (shift == lsl) { 1072 assert_different_registers(rd_hi, rn_lo); 1073 assert_different_registers(count, rd_hi); 1074 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); 1075 rsb(tmp, count, 32, mi); 1076 if (rd_hi == rn_hi) { 1077 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1078 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1079 } else { 1080 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1081 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1082 } 1083 mov(rd_lo, AsmOperand(rn_lo, shift, count)); 1084 } else { 1085 assert_different_registers(rd_lo, rn_hi); 1086 assert_different_registers(rd_lo, count); 1087 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); 1088 rsb(tmp, count, 32, mi); 1089 if (rd_lo == rn_lo) { 1090 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1091 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1092 } else { 1093 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1094 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1095 } 1096 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1097 } 1098 } 1099 1100 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1101 Register rn_lo, Register rn_hi, 1102 AsmShift shift, int count) { 1103 assert(count != 0 && (count & ~63) == 0, "must be"); 1104 1105 if (shift == lsl) { 1106 assert_different_registers(rd_hi, rn_lo); 1107 if (count >= 32) { 1108 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); 1109 mov(rd_lo, 0); 1110 } else { 1111 mov(rd_hi, AsmOperand(rn_hi, lsl, count)); 1112 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); 1113 mov(rd_lo, AsmOperand(rn_lo, lsl, count)); 1114 } 1115 } else { 1116 assert_different_registers(rd_lo, rn_hi); 1117 if (count >= 32) { 1118 if (count == 32) { 1119 mov(rd_lo, rn_hi); 1120 } else { 1121 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); 1122 } 1123 if (shift == asr) { 1124 mov(rd_hi, AsmOperand(rn_hi, asr, 0)); 1125 } else { 1126 mov(rd_hi, 0); 1127 } 1128 } else { 1129 mov(rd_lo, AsmOperand(rn_lo, lsr, count)); 1130 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); 1131 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1132 } 1133 } 1134 } 1135 #endif // !AARCH64 1136 1137 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 1138 // This code pattern is matched in NativeIntruction::skip_verify_oop. 1139 // Update it at modifications. 1140 if (!VerifyOops) return; 1141 1142 char buffer[64]; 1143 #ifdef COMPILER1 1144 if (CommentedAssembly) { 1145 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); 1146 block_comment(buffer); 1147 } 1148 #endif 1149 const char* msg_buffer = NULL; 1150 { 1151 ResourceMark rm; 1152 stringStream ss; 1153 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); 1154 msg_buffer = code_string(ss.as_string()); 1155 } 1156 1157 save_all_registers(); 1158 1159 if (reg != R2) { 1160 mov(R2, reg); // oop to verify 1161 } 1162 mov(R1, SP); // register save area 1163 1164 Label done; 1165 InlinedString Lmsg(msg_buffer); 1166 ldr_literal(R0, Lmsg); // message 1167 1168 // call indirectly to solve generation ordering problem 1169 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1170 call(Rtemp); 1171 1172 restore_all_registers(); 1173 1174 b(done); 1175 #ifdef COMPILER2 1176 int off = offset(); 1177 #endif 1178 bind_literal(Lmsg); 1179 #ifdef COMPILER2 1180 if (offset() - off == 1 * wordSize) { 1181 // no padding, so insert nop for worst-case sizing 1182 nop(); 1183 } 1184 #endif 1185 bind(done); 1186 } 1187 1188 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 1189 if (!VerifyOops) return; 1190 1191 const char* msg_buffer = NULL; 1192 { 1193 ResourceMark rm; 1194 stringStream ss; 1195 if ((addr.base() == SP) && (addr.index()==noreg)) { 1196 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); 1197 } else { 1198 ss.print("verify_oop_addr: %s", s); 1199 } 1200 ss.print(" (%s:%d)", file, line); 1201 msg_buffer = code_string(ss.as_string()); 1202 } 1203 1204 int push_size = save_all_registers(); 1205 1206 if (addr.base() == SP) { 1207 // computes an addr that takes into account the push 1208 if (addr.index() != noreg) { 1209 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index 1210 add(new_base, SP, push_size); 1211 addr = addr.rebase(new_base); 1212 } else { 1213 addr = addr.plus_disp(push_size); 1214 } 1215 } 1216 1217 ldr(R2, addr); // oop to verify 1218 mov(R1, SP); // register save area 1219 1220 Label done; 1221 InlinedString Lmsg(msg_buffer); 1222 ldr_literal(R0, Lmsg); // message 1223 1224 // call indirectly to solve generation ordering problem 1225 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1226 call(Rtemp); 1227 1228 restore_all_registers(); 1229 1230 b(done); 1231 bind_literal(Lmsg); 1232 bind(done); 1233 } 1234 1235 void MacroAssembler::null_check(Register reg, Register tmp, int offset) { 1236 if (needs_explicit_null_check(offset)) { 1237 #ifdef AARCH64 1238 ldr(ZR, Address(reg)); 1239 #else 1240 assert_different_registers(reg, tmp); 1241 if (tmp == noreg) { 1242 tmp = Rtemp; 1243 assert((! Thread::current()->is_Compiler_thread()) || 1244 (! (ciEnv::current()->task() == NULL)) || 1245 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), 1246 "Rtemp not available in C2"); // explicit tmp register required 1247 // XXX: could we mark the code buffer as not compatible with C2 ? 1248 } 1249 ldr(tmp, Address(reg)); 1250 #endif 1251 } 1252 } 1253 1254 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1255 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, 1256 RegisterOrConstant size_expression, Label& slow_case) { 1257 if (!Universe::heap()->supports_inline_contig_alloc()) { 1258 b(slow_case); 1259 return; 1260 } 1261 1262 CollectedHeap* ch = Universe::heap(); 1263 1264 const Register top_addr = tmp1; 1265 const Register heap_end = tmp2; 1266 1267 if (size_expression.is_register()) { 1268 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); 1269 } else { 1270 assert_different_registers(obj, obj_end, top_addr, heap_end); 1271 } 1272 1273 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance 1274 if (load_const) { 1275 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); 1276 } else { 1277 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); 1278 } 1279 // Calculate new heap_top by adding the size of the object 1280 Label retry; 1281 bind(retry); 1282 1283 #ifdef AARCH64 1284 ldxr(obj, top_addr); 1285 #else 1286 ldr(obj, Address(top_addr)); 1287 #endif // AARCH64 1288 1289 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); 1290 add_rc(obj_end, obj, size_expression); 1291 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. 1292 cmp(obj_end, obj); 1293 b(slow_case, lo); 1294 // Update heap_top if allocation succeeded 1295 cmp(obj_end, heap_end); 1296 b(slow_case, hi); 1297 1298 #ifdef AARCH64 1299 stxr(heap_end/*scratched*/, obj_end, top_addr); 1300 cbnz_w(heap_end, retry); 1301 #else 1302 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); 1303 b(retry, ne); 1304 #endif // AARCH64 1305 } 1306 1307 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1308 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, 1309 RegisterOrConstant size_expression, Label& slow_case) { 1310 const Register tlab_end = tmp1; 1311 assert_different_registers(obj, obj_end, tlab_end); 1312 1313 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); 1314 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); 1315 add_rc(obj_end, obj, size_expression); 1316 cmp(obj_end, tlab_end); 1317 b(slow_case, hi); 1318 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); 1319 } 1320 1321 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. 1322 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { 1323 Label loop; 1324 const Register ptr = start; 1325 1326 #ifdef AARCH64 1327 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x 1328 const Register size = tmp; 1329 Label remaining, done; 1330 1331 sub(size, end, start); 1332 1333 #ifdef ASSERT 1334 { Label L; 1335 tst(size, wordSize - 1); 1336 b(L, eq); 1337 stop("size is not a multiple of wordSize"); 1338 bind(L); 1339 } 1340 #endif // ASSERT 1341 1342 subs(size, size, wordSize); 1343 b(remaining, le); 1344 1345 // Zero by 2 words per iteration. 1346 bind(loop); 1347 subs(size, size, 2*wordSize); 1348 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); 1349 b(loop, gt); 1350 1351 bind(remaining); 1352 b(done, ne); 1353 str(ZR, Address(ptr)); 1354 bind(done); 1355 #else 1356 mov(tmp, 0); 1357 bind(loop); 1358 cmp(ptr, end); 1359 str(tmp, Address(ptr, wordSize, post_indexed), lo); 1360 b(loop, lo); 1361 #endif // AARCH64 1362 } 1363 1364 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { 1365 #ifdef AARCH64 1366 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1367 add_rc(tmp, tmp, size_in_bytes); 1368 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1369 #else 1370 // Bump total bytes allocated by this thread 1371 Label done; 1372 1373 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1374 adds(tmp, tmp, size_in_bytes); 1375 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); 1376 b(done, cc); 1377 1378 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) 1379 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by 1380 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. 1381 Register low, high; 1382 // Select ether R0/R1 or R2/R3 1383 1384 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { 1385 low = R2; 1386 high = R3; 1387 } else { 1388 low = R0; 1389 high = R1; 1390 } 1391 push(RegisterSet(low, high)); 1392 1393 ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1394 adds(low, low, size_in_bytes); 1395 adc(high, high, 0); 1396 strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1397 1398 pop(RegisterSet(low, high)); 1399 1400 bind(done); 1401 #endif // AARCH64 1402 } 1403 1404 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { 1405 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM 1406 if (UseStackBanging) { 1407 const int page_size = os::vm_page_size(); 1408 1409 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); 1410 strb(R0, Address(tmp)); 1411 #ifdef AARCH64 1412 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { 1413 sub(tmp, tmp, page_size); 1414 strb(R0, Address(tmp)); 1415 } 1416 #else 1417 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { 1418 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1419 } 1420 #endif // AARCH64 1421 } 1422 } 1423 1424 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { 1425 if (UseStackBanging) { 1426 Label loop; 1427 1428 mov(tmp, SP); 1429 add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); 1430 #ifdef AARCH64 1431 sub(tmp, tmp, Rsize); 1432 bind(loop); 1433 subs(Rsize, Rsize, os::vm_page_size()); 1434 strb(ZR, Address(tmp, Rsize)); 1435 #else 1436 bind(loop); 1437 subs(Rsize, Rsize, 0xff0); 1438 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1439 #endif // AARCH64 1440 b(loop, hi); 1441 } 1442 } 1443 1444 void MacroAssembler::stop(const char* msg) { 1445 // This code pattern is matched in NativeIntruction::is_stop. 1446 // Update it at modifications. 1447 #ifdef COMPILER1 1448 if (CommentedAssembly) { 1449 block_comment("stop"); 1450 } 1451 #endif 1452 1453 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); 1454 InlinedString Lmsg(msg); 1455 1456 // save all registers for further inspection 1457 save_all_registers(); 1458 1459 ldr_literal(R0, Lmsg); // message 1460 mov(R1, SP); // register save area 1461 1462 #ifdef AARCH64 1463 ldr_literal(Rtemp, Ldebug); 1464 br(Rtemp); 1465 #else 1466 ldr_literal(PC, Ldebug); // call MacroAssembler::debug 1467 #endif // AARCH64 1468 1469 #if defined(COMPILER2) && defined(AARCH64) 1470 int off = offset(); 1471 #endif 1472 bind_literal(Lmsg); 1473 bind_literal(Ldebug); 1474 #if defined(COMPILER2) && defined(AARCH64) 1475 if (offset() - off == 2 * wordSize) { 1476 // no padding, so insert nop for worst-case sizing 1477 nop(); 1478 } 1479 #endif 1480 } 1481 1482 void MacroAssembler::warn(const char* msg) { 1483 #ifdef COMPILER1 1484 if (CommentedAssembly) { 1485 block_comment("warn"); 1486 } 1487 #endif 1488 1489 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); 1490 InlinedString Lmsg(msg); 1491 Label done; 1492 1493 int push_size = save_caller_save_registers(); 1494 1495 #ifdef AARCH64 1496 // TODO-AARCH64 - get rid of extra debug parameters 1497 mov(R1, LR); 1498 mov(R2, FP); 1499 add(R3, SP, push_size); 1500 #endif 1501 1502 ldr_literal(R0, Lmsg); // message 1503 ldr_literal(LR, Lwarn); // call warning 1504 1505 call(LR); 1506 1507 restore_caller_save_registers(); 1508 1509 b(done); 1510 bind_literal(Lmsg); 1511 bind_literal(Lwarn); 1512 bind(done); 1513 } 1514 1515 1516 int MacroAssembler::save_all_registers() { 1517 // This code pattern is matched in NativeIntruction::is_save_all_registers. 1518 // Update it at modifications. 1519 #ifdef AARCH64 1520 const Register tmp = Rtemp; 1521 raw_push(R30, ZR); 1522 for (int i = 28; i >= 0; i -= 2) { 1523 raw_push(as_Register(i), as_Register(i+1)); 1524 } 1525 mov_pc_to(tmp); 1526 str(tmp, Address(SP, 31*wordSize)); 1527 ldr(tmp, Address(SP, tmp->encoding()*wordSize)); 1528 return 32*wordSize; 1529 #else 1530 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); 1531 return 15*wordSize; 1532 #endif // AARCH64 1533 } 1534 1535 void MacroAssembler::restore_all_registers() { 1536 #ifdef AARCH64 1537 for (int i = 0; i <= 28; i += 2) { 1538 raw_pop(as_Register(i), as_Register(i+1)); 1539 } 1540 raw_pop(R30, ZR); 1541 #else 1542 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers 1543 add(SP, SP, wordSize); // discard saved PC 1544 #endif // AARCH64 1545 } 1546 1547 int MacroAssembler::save_caller_save_registers() { 1548 #ifdef AARCH64 1549 for (int i = 0; i <= 16; i += 2) { 1550 raw_push(as_Register(i), as_Register(i+1)); 1551 } 1552 raw_push(R18, LR); 1553 return 20*wordSize; 1554 #else 1555 #if R9_IS_SCRATCHED 1556 // Save also R10 to preserve alignment 1557 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1558 return 8*wordSize; 1559 #else 1560 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1561 return 6*wordSize; 1562 #endif 1563 #endif // AARCH64 1564 } 1565 1566 void MacroAssembler::restore_caller_save_registers() { 1567 #ifdef AARCH64 1568 raw_pop(R18, LR); 1569 for (int i = 16; i >= 0; i -= 2) { 1570 raw_pop(as_Register(i), as_Register(i+1)); 1571 } 1572 #else 1573 #if R9_IS_SCRATCHED 1574 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1575 #else 1576 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1577 #endif 1578 #endif // AARCH64 1579 } 1580 1581 void MacroAssembler::debug(const char* msg, const intx* registers) { 1582 // In order to get locks to work, we need to fake a in_VM state 1583 JavaThread* thread = JavaThread::current(); 1584 thread->set_thread_state(_thread_in_vm); 1585 1586 if (ShowMessageBoxOnError) { 1587 ttyLocker ttyl; 1588 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1589 BytecodeCounter::print(); 1590 } 1591 if (os::message_box(msg, "Execution stopped, print registers?")) { 1592 #ifdef AARCH64 1593 // saved registers: R0-R30, PC 1594 const int nregs = 32; 1595 #else 1596 // saved registers: R0-R12, LR, PC 1597 const int nregs = 15; 1598 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; 1599 #endif // AARCH64 1600 1601 for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { 1602 tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); 1603 } 1604 1605 #ifdef AARCH64 1606 tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); 1607 #endif // AARCH64 1608 1609 // derive original SP value from the address of register save area 1610 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); 1611 } 1612 BREAKPOINT; 1613 } else { 1614 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1615 } 1616 assert(false, "DEBUG MESSAGE: %s", msg); 1617 fatal("%s", msg); // returning from MacroAssembler::debug is not supported 1618 } 1619 1620 void MacroAssembler::unimplemented(const char* what) { 1621 const char* buf = NULL; 1622 { 1623 ResourceMark rm; 1624 stringStream ss; 1625 ss.print("unimplemented: %s", what); 1626 buf = code_string(ss.as_string()); 1627 } 1628 stop(buf); 1629 } 1630 1631 1632 // Implementation of FixedSizeCodeBlock 1633 1634 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : 1635 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { 1636 } 1637 1638 FixedSizeCodeBlock::~FixedSizeCodeBlock() { 1639 if (_enabled) { 1640 address curr_pc = _masm->pc(); 1641 1642 assert(_start < curr_pc, "invalid current pc"); 1643 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); 1644 1645 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; 1646 for (int i = 0; i < nops_count; i++) { 1647 _masm->nop(); 1648 } 1649 } 1650 } 1651 1652 #ifdef AARCH64 1653 1654 // Serializes memory. 1655 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM 1656 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { 1657 if (!os::is_MP()) return; 1658 1659 // TODO-AARCH64 investigate dsb vs dmb effects 1660 if (order_constraint == StoreStore) { 1661 dmb(DMB_st); 1662 } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { 1663 dmb(DMB_ld); 1664 } else { 1665 dmb(DMB_all); 1666 } 1667 } 1668 1669 #else 1670 1671 // Serializes memory. Potentially blows flags and reg. 1672 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) 1673 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. 1674 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. 1675 void MacroAssembler::membar(Membar_mask_bits order_constraint, 1676 Register tmp, 1677 bool preserve_flags, 1678 Register load_tgt) { 1679 if (!os::is_MP()) return; 1680 1681 if (order_constraint == StoreStore) { 1682 dmb(DMB_st, tmp); 1683 } else if ((order_constraint & StoreLoad) || 1684 (order_constraint & LoadLoad) || 1685 (order_constraint & StoreStore) || 1686 (load_tgt == noreg) || 1687 preserve_flags) { 1688 dmb(DMB_all, tmp); 1689 } else { 1690 // LoadStore: speculative stores reordeing is prohibited 1691 1692 // By providing an ordered load target register, we avoid an extra memory load reference 1693 Label not_taken; 1694 bind(not_taken); 1695 cmp(load_tgt, load_tgt); 1696 b(not_taken, ne); 1697 } 1698 } 1699 1700 #endif // AARCH64 1701 1702 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" 1703 // on failure, so fall-through can only mean success. 1704 // "one_shot" controls whether we loop and retry to mitigate spurious failures. 1705 // This is only needed for C2, which for some reason does not rety, 1706 // while C1/interpreter does. 1707 // TODO: measure if it makes a difference 1708 1709 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, 1710 Register base, Register tmp, Label &slow_case, 1711 bool allow_fallthrough_on_failure, bool one_shot) 1712 { 1713 1714 bool fallthrough_is_success = false; 1715 1716 // ARM Litmus Test example does prefetching here. 1717 // TODO: investigate if it helps performance 1718 1719 // The last store was to the displaced header, so to prevent 1720 // reordering we must issue a StoreStore or Release barrier before 1721 // the CAS store. 1722 1723 #ifdef AARCH64 1724 1725 Register Rscratch = tmp; 1726 Register Roop = base; 1727 Register mark = oldval; 1728 Register Rbox = newval; 1729 Label loop; 1730 1731 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1732 1733 // Instead of StoreStore here, we use store-release-exclusive below 1734 1735 bind(loop); 1736 1737 ldaxr(tmp, base); // acquire 1738 cmp(tmp, oldval); 1739 b(slow_case, ne); 1740 stlxr(tmp, newval, base); // release 1741 if (one_shot) { 1742 cmp_w(tmp, 0); 1743 } else { 1744 cbnz_w(tmp, loop); 1745 fallthrough_is_success = true; 1746 } 1747 1748 // MemBarAcquireLock would normally go here, but 1749 // we already do ldaxr+stlxr above, which has 1750 // Sequential Consistency 1751 1752 #else 1753 membar(MacroAssembler::StoreStore, noreg); 1754 1755 if (one_shot) { 1756 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1757 cmp(tmp, oldval); 1758 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1759 cmp(tmp, 0, eq); 1760 } else { 1761 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1762 } 1763 1764 // MemBarAcquireLock barrier 1765 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, 1766 // but that doesn't prevent a load or store from floating up between 1767 // the load and store in the CAS sequence, so play it safe and 1768 // do a full fence. 1769 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); 1770 #endif 1771 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1772 b(slow_case, ne); 1773 } 1774 } 1775 1776 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, 1777 Register base, Register tmp, Label &slow_case, 1778 bool allow_fallthrough_on_failure, bool one_shot) 1779 { 1780 1781 bool fallthrough_is_success = false; 1782 1783 assert_different_registers(oldval,newval,base,tmp); 1784 1785 #ifdef AARCH64 1786 Label loop; 1787 1788 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1789 1790 bind(loop); 1791 ldxr(tmp, base); 1792 cmp(tmp, oldval); 1793 b(slow_case, ne); 1794 // MemBarReleaseLock barrier 1795 stlxr(tmp, newval, base); 1796 if (one_shot) { 1797 cmp_w(tmp, 0); 1798 } else { 1799 cbnz_w(tmp, loop); 1800 fallthrough_is_success = true; 1801 } 1802 #else 1803 // MemBarReleaseLock barrier 1804 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, 1805 // but that doesn't prevent a load or store from floating down between 1806 // the load and store in the CAS sequence, so play it safe and 1807 // do a full fence. 1808 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); 1809 1810 if (one_shot) { 1811 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1812 cmp(tmp, oldval); 1813 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1814 cmp(tmp, 0, eq); 1815 } else { 1816 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1817 } 1818 #endif 1819 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1820 b(slow_case, ne); 1821 } 1822 1823 // ExitEnter 1824 // According to JSR-133 Cookbook, this should be StoreLoad, the same 1825 // barrier that follows volatile store. 1826 // TODO: Should be able to remove on armv8 if volatile loads 1827 // use the load-acquire instruction. 1828 membar(StoreLoad, noreg); 1829 } 1830 1831 #ifndef PRODUCT 1832 1833 // Preserves flags and all registers. 1834 // On SMP the updated value might not be visible to external observers without a sychronization barrier 1835 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { 1836 if (counter_addr != NULL) { 1837 InlinedAddress counter_addr_literal((address)counter_addr); 1838 Label done, retry; 1839 if (cond != al) { 1840 b(done, inverse(cond)); 1841 } 1842 1843 #ifdef AARCH64 1844 raw_push(R0, R1); 1845 raw_push(R2, ZR); 1846 1847 ldr_literal(R0, counter_addr_literal); 1848 1849 bind(retry); 1850 ldxr_w(R1, R0); 1851 add_w(R1, R1, 1); 1852 stxr_w(R2, R1, R0); 1853 cbnz_w(R2, retry); 1854 1855 raw_pop(R2, ZR); 1856 raw_pop(R0, R1); 1857 #else 1858 push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1859 ldr_literal(R0, counter_addr_literal); 1860 1861 mrs(CPSR, Rtemp); 1862 1863 bind(retry); 1864 ldr_s32(R1, Address(R0)); 1865 add(R2, R1, 1); 1866 atomic_cas_bool(R1, R2, R0, 0, R3); 1867 b(retry, ne); 1868 1869 msr(CPSR_fsxc, Rtemp); 1870 1871 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1872 #endif // AARCH64 1873 1874 b(done); 1875 bind_literal(counter_addr_literal); 1876 1877 bind(done); 1878 } 1879 } 1880 1881 #endif // !PRODUCT 1882 1883 1884 // Building block for CAS cases of biased locking: makes CAS and records statistics. 1885 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. 1886 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, 1887 Register tmp, Label& slow_case, int* counter_addr) { 1888 1889 cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); 1890 #ifdef ASSERT 1891 breakpoint(ne); // Fallthrough only on success 1892 #endif 1893 #ifndef PRODUCT 1894 if (counter_addr != NULL) { 1895 cond_atomic_inc32(al, counter_addr); 1896 } 1897 #endif // !PRODUCT 1898 } 1899 1900 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, 1901 bool swap_reg_contains_mark, 1902 Register tmp2, 1903 Label& done, Label& slow_case, 1904 BiasedLockingCounters* counters) { 1905 // obj_reg must be preserved (at least) if the bias locking fails 1906 // tmp_reg is a temporary register 1907 // swap_reg was used as a temporary but contained a value 1908 // that was used afterwards in some call pathes. Callers 1909 // have been fixed so that swap_reg no longer needs to be 1910 // saved. 1911 // Rtemp in no longer scratched 1912 1913 assert(UseBiasedLocking, "why call this otherwise?"); 1914 assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); 1915 guarantee(swap_reg!=tmp_reg, "invariant"); 1916 assert(tmp_reg != noreg, "must supply tmp_reg"); 1917 1918 #ifndef PRODUCT 1919 if (PrintBiasedLockingStatistics && (counters == NULL)) { 1920 counters = BiasedLocking::counters(); 1921 } 1922 #endif 1923 1924 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 1925 Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); 1926 1927 // Biased locking 1928 // See whether the lock is currently biased toward our thread and 1929 // whether the epoch is still valid 1930 // Note that the runtime guarantees sufficient alignment of JavaThread 1931 // pointers to allow age to be placed into low bits 1932 // First check to see whether biasing is even enabled for this object 1933 Label cas_label; 1934 1935 // The null check applies to the mark loading, if we need to load it. 1936 // If the mark has already been loaded in swap_reg then it has already 1937 // been performed and the offset is irrelevant. 1938 int null_check_offset = offset(); 1939 if (!swap_reg_contains_mark) { 1940 ldr(swap_reg, mark_addr); 1941 } 1942 1943 // On MP platform loads could return 'stale' values in some cases. 1944 // That is acceptable since either CAS or slow case path is taken in the worst case. 1945 1946 andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1947 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 1948 1949 b(cas_label, ne); 1950 1951 // The bias pattern is present in the object's header. Need to check 1952 // whether the bias owner and the epoch are both still current. 1953 load_klass(tmp_reg, obj_reg); 1954 ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 1955 orr(tmp_reg, tmp_reg, Rthread); 1956 eor(tmp_reg, tmp_reg, swap_reg); 1957 1958 #ifdef AARCH64 1959 ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); 1960 #else 1961 bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); 1962 #endif // AARCH64 1963 1964 #ifndef PRODUCT 1965 if (counters != NULL) { 1966 cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); 1967 } 1968 #endif // !PRODUCT 1969 1970 b(done, eq); 1971 1972 Label try_revoke_bias; 1973 Label try_rebias; 1974 1975 // At this point we know that the header has the bias pattern and 1976 // that we are not the bias owner in the current epoch. We need to 1977 // figure out more details about the state of the header in order to 1978 // know what operations can be legally performed on the object's 1979 // header. 1980 1981 // If the low three bits in the xor result aren't clear, that means 1982 // the prototype header is no longer biased and we have to revoke 1983 // the bias on this object. 1984 tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1985 b(try_revoke_bias, ne); 1986 1987 // Biasing is still enabled for this data type. See whether the 1988 // epoch of the current bias is still valid, meaning that the epoch 1989 // bits of the mark word are equal to the epoch bits of the 1990 // prototype header. (Note that the prototype header's epoch bits 1991 // only change at a safepoint.) If not, attempt to rebias the object 1992 // toward the current thread. Note that we must be absolutely sure 1993 // that the current epoch is invalid in order to do this because 1994 // otherwise the manipulations it performs on the mark word are 1995 // illegal. 1996 tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); 1997 b(try_rebias, ne); 1998 1999 // tmp_reg has the age, epoch and pattern bits cleared 2000 // The remaining (owner) bits are (Thread ^ current_owner) 2001 2002 // The epoch of the current bias is still valid but we know nothing 2003 // about the owner; it might be set or it might be clear. Try to 2004 // acquire the bias of the object using an atomic operation. If this 2005 // fails we will go in to the runtime to revoke the object's bias. 2006 // Note that we first construct the presumed unbiased header so we 2007 // don't accidentally blow away another thread's valid bias. 2008 2009 // Note that we know the owner is not ourself. Hence, success can 2010 // only happen when the owner bits is 0 2011 2012 #ifdef AARCH64 2013 // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has 2014 // cleared bit in the middle (cms bit). So it is loaded with separate instruction. 2015 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2016 andr(swap_reg, swap_reg, tmp2); 2017 #else 2018 // until the assembler can be made smarter, we need to make some assumptions about the values 2019 // so we can optimize this: 2020 assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); 2021 2022 mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); 2023 mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) 2024 #endif // AARCH64 2025 2026 orr(tmp_reg, swap_reg, Rthread); // new mark 2027 2028 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2029 (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); 2030 2031 // If the biasing toward our thread failed, this means that 2032 // another thread succeeded in biasing it toward itself and we 2033 // need to revoke that bias. The revocation will occur in the 2034 // interpreter runtime in the slow case. 2035 2036 b(done); 2037 2038 bind(try_rebias); 2039 2040 // At this point we know the epoch has expired, meaning that the 2041 // current "bias owner", if any, is actually invalid. Under these 2042 // circumstances _only_, we are allowed to use the current header's 2043 // value as the comparison value when doing the cas to acquire the 2044 // bias in the current epoch. In other words, we allow transfer of 2045 // the bias from one thread to another directly in this situation. 2046 2047 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2048 2049 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2050 2051 // owner bits 'random'. Set them to Rthread. 2052 #ifdef AARCH64 2053 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2054 andr(tmp_reg, tmp_reg, tmp2); 2055 #else 2056 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2057 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2058 #endif // AARCH64 2059 2060 orr(tmp_reg, tmp_reg, Rthread); // new mark 2061 2062 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2063 (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); 2064 2065 // If the biasing toward our thread failed, then another thread 2066 // succeeded in biasing it toward itself and we need to revoke that 2067 // bias. The revocation will occur in the runtime in the slow case. 2068 2069 b(done); 2070 2071 bind(try_revoke_bias); 2072 2073 // The prototype mark in the klass doesn't have the bias bit set any 2074 // more, indicating that objects of this data type are not supposed 2075 // to be biased any more. We are going to try to reset the mark of 2076 // this object to the prototype value and fall through to the 2077 // CAS-based locking scheme. Note that if our CAS fails, it means 2078 // that another thread raced us for the privilege of revoking the 2079 // bias of this particular object, so it's okay to continue in the 2080 // normal locking code. 2081 2082 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2083 2084 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2085 2086 // owner bits 'random'. Clear them 2087 #ifdef AARCH64 2088 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2089 andr(tmp_reg, tmp_reg, tmp2); 2090 #else 2091 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2092 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2093 #endif // AARCH64 2094 2095 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, 2096 (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); 2097 2098 // Fall through to the normal CAS-based lock, because no matter what 2099 // the result of the above CAS, some thread must have succeeded in 2100 // removing the bias bit from the object's header. 2101 2102 bind(cas_label); 2103 2104 return null_check_offset; 2105 } 2106 2107 2108 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { 2109 assert(UseBiasedLocking, "why call this otherwise?"); 2110 2111 // Check for biased locking unlock case, which is a no-op 2112 // Note: we do not have to check the thread ID for two reasons. 2113 // First, the interpreter checks for IllegalMonitorStateException at 2114 // a higher level. Second, if the bias was revoked while we held the 2115 // lock, the object could not be rebiased toward another thread, so 2116 // the bias bit would be clear. 2117 ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2118 2119 andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2120 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 2121 b(done, eq); 2122 } 2123 2124 2125 void MacroAssembler::resolve_jobject(Register value, 2126 Register tmp1, 2127 Register tmp2) { 2128 assert_different_registers(value, tmp1, tmp2); 2129 Label done, not_weak; 2130 cbz(value, done); // Use NULL as-is. 2131 STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); 2132 tbz(value, 0, not_weak); // Test for jweak tag. 2133 // Resolve jweak. 2134 ldr(value, Address(value, -JNIHandles::weak_tag_value)); 2135 verify_oop(value); 2136 #if INCLUDE_ALL_GCS 2137 if (UseG1GC) { 2138 g1_write_barrier_pre(noreg, // store_addr 2139 noreg, // new_val 2140 value, // pre_val 2141 tmp1, // tmp1 2142 tmp2); // tmp2 2143 } 2144 #endif // INCLUDE_ALL_GCS 2145 b(done); 2146 bind(not_weak); 2147 // Resolve (untagged) jobject. 2148 ldr(value, Address(value)); 2149 verify_oop(value); 2150 bind(done); 2151 } 2152 2153 2154 ////////////////////////////////////////////////////////////////////////////////// 2155 2156 #if INCLUDE_ALL_GCS 2157 2158 // G1 pre-barrier. 2159 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 2160 // If store_addr != noreg, then previous value is loaded from [store_addr]; 2161 // in such case store_addr and new_val registers are preserved; 2162 // otherwise pre_val register is preserved. 2163 void MacroAssembler::g1_write_barrier_pre(Register store_addr, 2164 Register new_val, 2165 Register pre_val, 2166 Register tmp1, 2167 Register tmp2) { 2168 Label done; 2169 Label runtime; 2170 2171 if (store_addr != noreg) { 2172 assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); 2173 } else { 2174 assert (new_val == noreg, "should be"); 2175 assert_different_registers(pre_val, tmp1, tmp2, noreg); 2176 } 2177 2178 Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 2179 SATBMarkQueue::byte_offset_of_active())); 2180 Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 2181 SATBMarkQueue::byte_offset_of_index())); 2182 Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 2183 SATBMarkQueue::byte_offset_of_buf())); 2184 2185 // Is marking active? 2186 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); 2187 ldrb(tmp1, in_progress); 2188 cbz(tmp1, done); 2189 2190 // Do we need to load the previous value? 2191 if (store_addr != noreg) { 2192 load_heap_oop(pre_val, Address(store_addr, 0)); 2193 } 2194 2195 // Is the previous value null? 2196 cbz(pre_val, done); 2197 2198 // Can we store original value in the thread's buffer? 2199 // Is index == 0? 2200 // (The index field is typed as size_t.) 2201 2202 ldr(tmp1, index); // tmp1 := *index_adr 2203 ldr(tmp2, buffer); 2204 2205 subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize 2206 b(runtime, lt); // If negative, goto runtime 2207 2208 str(tmp1, index); // *index_adr := tmp1 2209 2210 // Record the previous value 2211 str(pre_val, Address(tmp2, tmp1)); 2212 b(done); 2213 2214 bind(runtime); 2215 2216 // save the live input values 2217 #ifdef AARCH64 2218 if (store_addr != noreg) { 2219 raw_push(store_addr, new_val); 2220 } else { 2221 raw_push(pre_val, ZR); 2222 } 2223 #else 2224 if (store_addr != noreg) { 2225 // avoid raw_push to support any ordering of store_addr and new_val 2226 push(RegisterSet(store_addr) | RegisterSet(new_val)); 2227 } else { 2228 push(pre_val); 2229 } 2230 #endif // AARCH64 2231 2232 if (pre_val != R0) { 2233 mov(R0, pre_val); 2234 } 2235 mov(R1, Rthread); 2236 2237 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); 2238 2239 #ifdef AARCH64 2240 if (store_addr != noreg) { 2241 raw_pop(store_addr, new_val); 2242 } else { 2243 raw_pop(pre_val, ZR); 2244 } 2245 #else 2246 if (store_addr != noreg) { 2247 pop(RegisterSet(store_addr) | RegisterSet(new_val)); 2248 } else { 2249 pop(pre_val); 2250 } 2251 #endif // AARCH64 2252 2253 bind(done); 2254 } 2255 2256 // G1 post-barrier. 2257 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 2258 void MacroAssembler::g1_write_barrier_post(Register store_addr, 2259 Register new_val, 2260 Register tmp1, 2261 Register tmp2, 2262 Register tmp3) { 2263 2264 Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + 2265 DirtyCardQueue::byte_offset_of_index())); 2266 Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + 2267 DirtyCardQueue::byte_offset_of_buf())); 2268 2269 BarrierSet* bs = Universe::heap()->barrier_set(); 2270 CardTableModRefBS* ctbs = barrier_set_cast<CardTableModRefBS>(bs); 2271 CardTable* ct = ctbs->card_table(); 2272 Label done; 2273 Label runtime; 2274 2275 // Does store cross heap regions? 2276 2277 eor(tmp1, store_addr, new_val); 2278 #ifdef AARCH64 2279 logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); 2280 cbz(tmp1, done); 2281 #else 2282 movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); 2283 b(done, eq); 2284 #endif 2285 2286 // crosses regions, storing NULL? 2287 2288 cbz(new_val, done); 2289 2290 // storing region crossing non-NULL, is card already dirty? 2291 const Register card_addr = tmp1; 2292 assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); 2293 2294 mov_address(tmp2, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference); 2295 add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift)); 2296 2297 ldrb(tmp2, Address(card_addr)); 2298 cmp(tmp2, (int)G1CardTable::g1_young_card_val()); 2299 b(done, eq); 2300 2301 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); 2302 2303 assert(CardTable::dirty_card_val() == 0, "adjust this code"); 2304 ldrb(tmp2, Address(card_addr)); 2305 cbz(tmp2, done); 2306 2307 // storing a region crossing, non-NULL oop, card is clean. 2308 // dirty card and log. 2309 2310 strb(zero_register(tmp2), Address(card_addr)); 2311 2312 ldr(tmp2, queue_index); 2313 ldr(tmp3, buffer); 2314 2315 subs(tmp2, tmp2, wordSize); 2316 b(runtime, lt); // go to runtime if now negative 2317 2318 str(tmp2, queue_index); 2319 2320 str(card_addr, Address(tmp3, tmp2)); 2321 b(done); 2322 2323 bind(runtime); 2324 2325 if (card_addr != R0) { 2326 mov(R0, card_addr); 2327 } 2328 mov(R1, Rthread); 2329 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); 2330 2331 bind(done); 2332 } 2333 2334 #endif // INCLUDE_ALL_GCS 2335 2336 ////////////////////////////////////////////////////////////////////////////////// 2337 2338 #ifdef AARCH64 2339 2340 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2341 switch (size_in_bytes) { 2342 case 8: ldr(dst, src); break; 2343 case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; 2344 case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; 2345 case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; 2346 default: ShouldNotReachHere(); 2347 } 2348 } 2349 2350 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 2351 switch (size_in_bytes) { 2352 case 8: str(src, dst); break; 2353 case 4: str_32(src, dst); break; 2354 case 2: strh(src, dst); break; 2355 case 1: strb(src, dst); break; 2356 default: ShouldNotReachHere(); 2357 } 2358 } 2359 2360 #else 2361 2362 void MacroAssembler::load_sized_value(Register dst, Address src, 2363 size_t size_in_bytes, bool is_signed, AsmCondition cond) { 2364 switch (size_in_bytes) { 2365 case 4: ldr(dst, src, cond); break; 2366 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; 2367 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; 2368 default: ShouldNotReachHere(); 2369 } 2370 } 2371 2372 2373 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { 2374 switch (size_in_bytes) { 2375 case 4: str(src, dst, cond); break; 2376 case 2: strh(src, dst, cond); break; 2377 case 1: strb(src, dst, cond); break; 2378 default: ShouldNotReachHere(); 2379 } 2380 } 2381 #endif // AARCH64 2382 2383 // Look up the method for a megamorphic invokeinterface call. 2384 // The target method is determined by <Rinterf, Rindex>. 2385 // The receiver klass is in Rklass. 2386 // On success, the result will be in method_result, and execution falls through. 2387 // On failure, execution transfers to the given label. 2388 void MacroAssembler::lookup_interface_method(Register Rklass, 2389 Register Rintf, 2390 RegisterOrConstant itable_index, 2391 Register method_result, 2392 Register Rscan, 2393 Register Rtmp, 2394 Label& L_no_such_interface) { 2395 2396 assert_different_registers(Rklass, Rintf, Rscan, Rtmp); 2397 2398 const int entry_size = itableOffsetEntry::size() * HeapWordSize; 2399 assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience"); 2400 2401 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 2402 const int base = in_bytes(Klass::vtable_start_offset()); 2403 const int scale = exact_log2(vtableEntry::size_in_bytes()); 2404 ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable 2405 add(Rscan, Rklass, base); 2406 add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale)); 2407 2408 // Search through the itable for an interface equal to incoming Rintf 2409 // itable looks like [intface][offset][intface][offset][intface][offset] 2410 2411 Label loop; 2412 bind(loop); 2413 ldr(Rtmp, Address(Rscan, entry_size, post_indexed)); 2414 #ifdef AARCH64 2415 Label found; 2416 cmp(Rtmp, Rintf); 2417 b(found, eq); 2418 cbnz(Rtmp, loop); 2419 #else 2420 cmp(Rtmp, Rintf); // set ZF and CF if interface is found 2421 cmn(Rtmp, 0, ne); // check if tmp == 0 and clear CF if it is 2422 b(loop, ne); 2423 #endif // AARCH64 2424 2425 #ifdef AARCH64 2426 b(L_no_such_interface); 2427 bind(found); 2428 #else 2429 // CF == 0 means we reached the end of itable without finding icklass 2430 b(L_no_such_interface, cc); 2431 #endif // !AARCH64 2432 2433 if (method_result != noreg) { 2434 // Interface found at previous position of Rscan, now load the method 2435 ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size)); 2436 if (itable_index.is_register()) { 2437 add(Rtmp, Rtmp, Rklass); // Add offset to Klass* 2438 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); 2439 assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); 2440 ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register())); 2441 } else { 2442 int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() + 2443 itableMethodEntry::method_offset_in_bytes(); 2444 add_slow(method_result, Rklass, method_offset); 2445 ldr(method_result, Address(method_result, Rtmp)); 2446 } 2447 } 2448 } 2449 2450 #ifdef COMPILER2 2451 // TODO: 8 bytes at a time? pre-fetch? 2452 // Compare char[] arrays aligned to 4 bytes. 2453 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, 2454 Register limit, Register result, 2455 Register chr1, Register chr2, Label& Ldone) { 2456 Label Lvector, Lloop; 2457 2458 // Note: limit contains number of bytes (2*char_elements) != 0. 2459 tst(limit, 0x2); // trailing character ? 2460 b(Lvector, eq); 2461 2462 // compare the trailing char 2463 sub(limit, limit, sizeof(jchar)); 2464 ldrh(chr1, Address(ary1, limit)); 2465 ldrh(chr2, Address(ary2, limit)); 2466 cmp(chr1, chr2); 2467 mov(result, 0, ne); // not equal 2468 b(Ldone, ne); 2469 2470 // only one char ? 2471 tst(limit, limit); 2472 mov(result, 1, eq); 2473 b(Ldone, eq); 2474 2475 // word by word compare, dont't need alignment check 2476 bind(Lvector); 2477 2478 // Shift ary1 and ary2 to the end of the arrays, negate limit 2479 add(ary1, limit, ary1); 2480 add(ary2, limit, ary2); 2481 neg(limit, limit); 2482 2483 bind(Lloop); 2484 ldr_u32(chr1, Address(ary1, limit)); 2485 ldr_u32(chr2, Address(ary2, limit)); 2486 cmp_32(chr1, chr2); 2487 mov(result, 0, ne); // not equal 2488 b(Ldone, ne); 2489 adds(limit, limit, 2*sizeof(jchar)); 2490 b(Lloop, ne); 2491 2492 // Caller should set it: 2493 // mov(result_reg, 1); //equal 2494 } 2495 #endif 2496 2497 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { 2498 mov_slow(tmpreg1, counter_addr); 2499 ldr_s32(tmpreg2, tmpreg1); 2500 add_32(tmpreg2, tmpreg2, 1); 2501 str_32(tmpreg2, tmpreg1); 2502 } 2503 2504 void MacroAssembler::floating_cmp(Register dst) { 2505 #ifdef AARCH64 2506 NOT_TESTED(); 2507 cset(dst, gt); // 1 if '>', else 0 2508 csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 2509 #else 2510 vmrs(dst, FPSCR); 2511 orr(dst, dst, 0x08000000); 2512 eor(dst, dst, AsmOperand(dst, lsl, 3)); 2513 mov(dst, AsmOperand(dst, asr, 30)); 2514 #endif 2515 } 2516 2517 void MacroAssembler::restore_default_fp_mode() { 2518 #ifdef AARCH64 2519 msr(SysReg_FPCR, ZR); 2520 #else 2521 #ifndef __SOFTFP__ 2522 // Round to Near mode, IEEE compatible, masked exceptions 2523 mov(Rtemp, 0); 2524 vmsr(FPSCR, Rtemp); 2525 #endif // !__SOFTFP__ 2526 #endif // AARCH64 2527 } 2528 2529 #ifndef AARCH64 2530 // 24-bit word range == 26-bit byte range 2531 bool check26(int offset) { 2532 // this could be simplified, but it mimics encoding and decoding 2533 // an actual branch insrtuction 2534 int off1 = offset << 6 >> 8; 2535 int encoded = off1 & ((1<<24)-1); 2536 int decoded = encoded << 8 >> 6; 2537 return offset == decoded; 2538 } 2539 #endif // !AARCH64 2540 2541 // Perform some slight adjustments so the default 32MB code cache 2542 // is fully reachable. 2543 static inline address first_cache_address() { 2544 return CodeCache::low_bound() + sizeof(HeapBlock::Header); 2545 } 2546 static inline address last_cache_address() { 2547 return CodeCache::high_bound() - Assembler::InstructionSize; 2548 } 2549 2550 #ifdef AARCH64 2551 // Can we reach target using ADRP? 2552 bool MacroAssembler::page_reachable_from_cache(address target) { 2553 intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; 2554 intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; 2555 intptr_t addr = (intptr_t)target & ~0xfff; 2556 2557 intptr_t loffset = addr - cl; 2558 intptr_t hoffset = addr - ch; 2559 return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); 2560 } 2561 #endif 2562 2563 // Can we reach target using unconditional branch or call from anywhere 2564 // in the code cache (because code can be relocated)? 2565 bool MacroAssembler::_reachable_from_cache(address target) { 2566 #ifdef __thumb__ 2567 if ((1 & (intptr_t)target) != 0) { 2568 // Return false to avoid 'b' if we need switching to THUMB mode. 2569 return false; 2570 } 2571 #endif 2572 2573 address cl = first_cache_address(); 2574 address ch = last_cache_address(); 2575 2576 if (ForceUnreachable) { 2577 // Only addresses from CodeCache can be treated as reachable. 2578 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { 2579 return false; 2580 } 2581 } 2582 2583 intptr_t loffset = (intptr_t)target - (intptr_t)cl; 2584 intptr_t hoffset = (intptr_t)target - (intptr_t)ch; 2585 2586 #ifdef AARCH64 2587 return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); 2588 #else 2589 return check26(loffset - 8) && check26(hoffset - 8); 2590 #endif 2591 } 2592 2593 bool MacroAssembler::reachable_from_cache(address target) { 2594 assert(CodeCache::contains(pc()), "not supported"); 2595 return _reachable_from_cache(target); 2596 } 2597 2598 // Can we reach the entire code cache from anywhere else in the code cache? 2599 bool MacroAssembler::_cache_fully_reachable() { 2600 address cl = first_cache_address(); 2601 address ch = last_cache_address(); 2602 return _reachable_from_cache(cl) && _reachable_from_cache(ch); 2603 } 2604 2605 bool MacroAssembler::cache_fully_reachable() { 2606 assert(CodeCache::contains(pc()), "not supported"); 2607 return _cache_fully_reachable(); 2608 } 2609 2610 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2611 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2612 if (reachable_from_cache(target)) { 2613 relocate(rtype); 2614 b(target NOT_AARCH64_ARG(cond)); 2615 return; 2616 } 2617 2618 // Note: relocate is not needed for the code below, 2619 // encoding targets in absolute format. 2620 if (ignore_non_patchable_relocations()) { 2621 rtype = relocInfo::none; 2622 } 2623 2624 #ifdef AARCH64 2625 assert (scratch != noreg, "should be specified"); 2626 InlinedAddress address_literal(target, rtype); 2627 ldr_literal(scratch, address_literal); 2628 br(scratch); 2629 int off = offset(); 2630 bind_literal(address_literal); 2631 #ifdef COMPILER2 2632 if (offset() - off == wordSize) { 2633 // no padding, so insert nop for worst-case sizing 2634 nop(); 2635 } 2636 #endif 2637 #else 2638 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { 2639 // Note: this version cannot be (atomically) patched 2640 mov_slow(scratch, (intptr_t)target, cond); 2641 bx(scratch, cond); 2642 } else { 2643 Label skip; 2644 InlinedAddress address_literal(target); 2645 if (cond != al) { 2646 b(skip, inverse(cond)); 2647 } 2648 relocate(rtype); 2649 ldr_literal(PC, address_literal); 2650 bind_literal(address_literal); 2651 bind(skip); 2652 } 2653 #endif // AARCH64 2654 } 2655 2656 // Similar to jump except that: 2657 // - near calls are valid only if any destination in the cache is near 2658 // - no movt/movw (not atomically patchable) 2659 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2660 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2661 if (cache_fully_reachable()) { 2662 // Note: this assumes that all possible targets (the initial one 2663 // and the addressed patched to) are all in the code cache. 2664 assert(CodeCache::contains(target), "target might be too far"); 2665 relocate(rtype); 2666 b(target NOT_AARCH64_ARG(cond)); 2667 return; 2668 } 2669 2670 // Discard the relocation information if not needed for CacheCompiledCode 2671 // since the next encodings are all in absolute format. 2672 if (ignore_non_patchable_relocations()) { 2673 rtype = relocInfo::none; 2674 } 2675 2676 #ifdef AARCH64 2677 assert (scratch != noreg, "should be specified"); 2678 InlinedAddress address_literal(target); 2679 relocate(rtype); 2680 ldr_literal(scratch, address_literal); 2681 br(scratch); 2682 int off = offset(); 2683 bind_literal(address_literal); 2684 #ifdef COMPILER2 2685 if (offset() - off == wordSize) { 2686 // no padding, so insert nop for worst-case sizing 2687 nop(); 2688 } 2689 #endif 2690 #else 2691 { 2692 Label skip; 2693 InlinedAddress address_literal(target); 2694 if (cond != al) { 2695 b(skip, inverse(cond)); 2696 } 2697 relocate(rtype); 2698 ldr_literal(PC, address_literal); 2699 bind_literal(address_literal); 2700 bind(skip); 2701 } 2702 #endif // AARCH64 2703 } 2704 2705 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { 2706 Register scratch = LR; 2707 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); 2708 if (reachable_from_cache(target)) { 2709 relocate(rspec); 2710 bl(target NOT_AARCH64_ARG(cond)); 2711 return; 2712 } 2713 2714 // Note: relocate is not needed for the code below, 2715 // encoding targets in absolute format. 2716 if (ignore_non_patchable_relocations()) { 2717 // This assumes the information was needed only for relocating the code. 2718 rspec = RelocationHolder::none; 2719 } 2720 2721 #ifndef AARCH64 2722 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { 2723 // Note: this version cannot be (atomically) patched 2724 mov_slow(scratch, (intptr_t)target, cond); 2725 blx(scratch, cond); 2726 return; 2727 } 2728 #endif 2729 2730 { 2731 Label ret_addr; 2732 #ifndef AARCH64 2733 if (cond != al) { 2734 b(ret_addr, inverse(cond)); 2735 } 2736 #endif 2737 2738 2739 #ifdef AARCH64 2740 // TODO-AARCH64: make more optimal implementation 2741 // [ Keep in sync with MacroAssembler::call_size ] 2742 assert(rspec.type() == relocInfo::none, "call reloc not implemented"); 2743 mov_slow(scratch, target); 2744 blr(scratch); 2745 #else 2746 InlinedAddress address_literal(target); 2747 relocate(rspec); 2748 adr(LR, ret_addr); 2749 ldr_literal(PC, address_literal); 2750 2751 bind_literal(address_literal); 2752 bind(ret_addr); 2753 #endif 2754 } 2755 } 2756 2757 #if defined(AARCH64) && defined(COMPILER2) 2758 int MacroAssembler::call_size(address target, bool far, bool patchable) { 2759 // FIXME: mov_slow is variable-length 2760 if (!far) return 1; // bl 2761 if (patchable) return 2; // ldr; blr 2762 return instr_count_for_mov_slow((intptr_t)target) + 1; 2763 } 2764 #endif 2765 2766 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { 2767 assert(rspec.type() == relocInfo::static_call_type || 2768 rspec.type() == relocInfo::none || 2769 rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); 2770 2771 // Always generate the relocation information, needed for patching 2772 relocate(rspec); // used by NativeCall::is_call_before() 2773 if (cache_fully_reachable()) { 2774 // Note: this assumes that all possible targets (the initial one 2775 // and the addresses patched to) are all in the code cache. 2776 assert(CodeCache::contains(target), "target might be too far"); 2777 bl(target); 2778 } else { 2779 #if defined(AARCH64) && defined(COMPILER2) 2780 if (c2) { 2781 // return address needs to match call_size(). 2782 // no need to trash Rtemp 2783 int off = offset(); 2784 Label skip_literal; 2785 InlinedAddress address_literal(target); 2786 ldr_literal(LR, address_literal); 2787 blr(LR); 2788 int ret_addr_offset = offset(); 2789 assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); 2790 b(skip_literal); 2791 int off2 = offset(); 2792 bind_literal(address_literal); 2793 if (offset() - off2 == wordSize) { 2794 // no padding, so insert nop for worst-case sizing 2795 nop(); 2796 } 2797 bind(skip_literal); 2798 return ret_addr_offset; 2799 } 2800 #endif 2801 Label ret_addr; 2802 InlinedAddress address_literal(target); 2803 #ifdef AARCH64 2804 ldr_literal(Rtemp, address_literal); 2805 adr(LR, ret_addr); 2806 br(Rtemp); 2807 #else 2808 adr(LR, ret_addr); 2809 ldr_literal(PC, address_literal); 2810 #endif 2811 bind_literal(address_literal); 2812 bind(ret_addr); 2813 } 2814 return offset(); 2815 } 2816 2817 // ((OopHandle)result).resolve(); 2818 void MacroAssembler::resolve_oop_handle(Register result) { 2819 // OopHandle::resolve is an indirection. 2820 ldr(result, Address(result, 0)); 2821 } 2822 2823 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { 2824 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2825 ldr(tmp, Address(method, Method::const_offset())); 2826 ldr(tmp, Address(tmp, ConstMethod::constants_offset())); 2827 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); 2828 ldr(mirror, Address(tmp, mirror_offset)); 2829 resolve_oop_handle(mirror); 2830 } 2831 2832 2833 /////////////////////////////////////////////////////////////////////////////// 2834 2835 // Compressed pointers 2836 2837 #ifdef AARCH64 2838 2839 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { 2840 if (UseCompressedClassPointers) { 2841 ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2842 decode_klass_not_null(dst_klass); 2843 } else { 2844 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2845 } 2846 } 2847 2848 #else 2849 2850 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { 2851 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); 2852 } 2853 2854 #endif // AARCH64 2855 2856 // Blows src_klass. 2857 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { 2858 #ifdef AARCH64 2859 if (UseCompressedClassPointers) { 2860 assert(src_klass != dst_oop, "not enough registers"); 2861 encode_klass_not_null(src_klass); 2862 str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2863 return; 2864 } 2865 #endif // AARCH64 2866 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2867 } 2868 2869 #ifdef AARCH64 2870 2871 void MacroAssembler::store_klass_gap(Register dst) { 2872 if (UseCompressedClassPointers) { 2873 str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2874 } 2875 } 2876 2877 #endif // AARCH64 2878 2879 2880 void MacroAssembler::load_heap_oop(Register dst, Address src) { 2881 #ifdef AARCH64 2882 if (UseCompressedOops) { 2883 ldr_w(dst, src); 2884 decode_heap_oop(dst); 2885 return; 2886 } 2887 #endif // AARCH64 2888 ldr(dst, src); 2889 } 2890 2891 // Blows src and flags. 2892 void MacroAssembler::store_heap_oop(Register src, Address dst) { 2893 #ifdef AARCH64 2894 if (UseCompressedOops) { 2895 assert(!dst.uses(src), "not enough registers"); 2896 encode_heap_oop(src); 2897 str_w(src, dst); 2898 return; 2899 } 2900 #endif // AARCH64 2901 str(src, dst); 2902 } 2903 2904 void MacroAssembler::store_heap_oop_null(Register src, Address dst) { 2905 #ifdef AARCH64 2906 if (UseCompressedOops) { 2907 str_w(src, dst); 2908 return; 2909 } 2910 #endif // AARCH64 2911 str(src, dst); 2912 } 2913 2914 2915 #ifdef AARCH64 2916 2917 // Algorithm must match oop.inline.hpp encode_heap_oop. 2918 void MacroAssembler::encode_heap_oop(Register dst, Register src) { 2919 // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. 2920 // Update it at modifications. 2921 assert (UseCompressedOops, "must be compressed"); 2922 assert (Universe::heap() != NULL, "java heap should be initialized"); 2923 #ifdef ASSERT 2924 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2925 #endif 2926 verify_oop(src); 2927 if (Universe::narrow_oop_base() == NULL) { 2928 if (Universe::narrow_oop_shift() != 0) { 2929 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2930 _lsr(dst, src, Universe::narrow_oop_shift()); 2931 } else if (dst != src) { 2932 mov(dst, src); 2933 } 2934 } else { 2935 tst(src, src); 2936 csel(dst, Rheap_base, src, eq); 2937 sub(dst, dst, Rheap_base); 2938 if (Universe::narrow_oop_shift() != 0) { 2939 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2940 _lsr(dst, dst, Universe::narrow_oop_shift()); 2941 } 2942 } 2943 } 2944 2945 // Same algorithm as oop.inline.hpp decode_heap_oop. 2946 void MacroAssembler::decode_heap_oop(Register dst, Register src) { 2947 #ifdef ASSERT 2948 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2949 #endif 2950 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2951 if (Universe::narrow_oop_base() != NULL) { 2952 tst(src, src); 2953 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2954 csel(dst, dst, ZR, ne); 2955 } else { 2956 _lsl(dst, src, Universe::narrow_oop_shift()); 2957 } 2958 verify_oop(dst); 2959 } 2960 2961 #ifdef COMPILER2 2962 // Algorithm must match oop.inline.hpp encode_heap_oop. 2963 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule 2964 // must be changed. 2965 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2966 assert (UseCompressedOops, "must be compressed"); 2967 assert (Universe::heap() != NULL, "java heap should be initialized"); 2968 #ifdef ASSERT 2969 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2970 #endif 2971 verify_oop(src); 2972 if (Universe::narrow_oop_base() == NULL) { 2973 if (Universe::narrow_oop_shift() != 0) { 2974 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2975 _lsr(dst, src, Universe::narrow_oop_shift()); 2976 } else if (dst != src) { 2977 mov(dst, src); 2978 } 2979 } else { 2980 sub(dst, src, Rheap_base); 2981 if (Universe::narrow_oop_shift() != 0) { 2982 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2983 _lsr(dst, dst, Universe::narrow_oop_shift()); 2984 } 2985 } 2986 } 2987 2988 // Same algorithm as oops.inline.hpp decode_heap_oop. 2989 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule 2990 // must be changed. 2991 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2992 #ifdef ASSERT 2993 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2994 #endif 2995 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2996 if (Universe::narrow_oop_base() != NULL) { 2997 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2998 } else { 2999 _lsl(dst, src, Universe::narrow_oop_shift()); 3000 } 3001 verify_oop(dst); 3002 } 3003 3004 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3005 assert(UseCompressedClassPointers, "should only be used for compressed header"); 3006 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3007 int klass_index = oop_recorder()->find_index(k); 3008 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 3009 3010 // Relocation with special format (see relocInfo_arm.hpp). 3011 relocate(rspec); 3012 narrowKlass encoded_k = Klass::encode_klass(k); 3013 movz(dst, encoded_k & 0xffff, 0); 3014 movk(dst, (encoded_k >> 16) & 0xffff, 16); 3015 } 3016 3017 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3018 assert(UseCompressedOops, "should only be used for compressed header"); 3019 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3020 int oop_index = oop_recorder()->find_index(obj); 3021 RelocationHolder rspec = oop_Relocation::spec(oop_index); 3022 3023 relocate(rspec); 3024 movz(dst, 0xffff, 0); 3025 movk(dst, 0xffff, 16); 3026 } 3027 3028 #endif // COMPILER2 3029 // Must preserve condition codes, or C2 encodeKlass_not_null rule 3030 // must be changed. 3031 void MacroAssembler::encode_klass_not_null(Register r) { 3032 if (Universe::narrow_klass_base() != NULL) { 3033 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 3034 assert(r != Rheap_base, "Encoding a klass in Rheap_base"); 3035 mov_slow(Rheap_base, Universe::narrow_klass_base()); 3036 sub(r, r, Rheap_base); 3037 } 3038 if (Universe::narrow_klass_shift() != 0) { 3039 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3040 _lsr(r, r, Universe::narrow_klass_shift()); 3041 } 3042 if (Universe::narrow_klass_base() != NULL) { 3043 reinit_heapbase(); 3044 } 3045 } 3046 3047 // Must preserve condition codes, or C2 encodeKlass_not_null rule 3048 // must be changed. 3049 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3050 if (dst == src) { 3051 encode_klass_not_null(src); 3052 return; 3053 } 3054 if (Universe::narrow_klass_base() != NULL) { 3055 mov_slow(dst, (int64_t)Universe::narrow_klass_base()); 3056 sub(dst, src, dst); 3057 if (Universe::narrow_klass_shift() != 0) { 3058 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3059 _lsr(dst, dst, Universe::narrow_klass_shift()); 3060 } 3061 } else { 3062 if (Universe::narrow_klass_shift() != 0) { 3063 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3064 _lsr(dst, src, Universe::narrow_klass_shift()); 3065 } else { 3066 mov(dst, src); 3067 } 3068 } 3069 } 3070 3071 // Function instr_count_for_decode_klass_not_null() counts the instructions 3072 // generated by decode_klass_not_null(register r) and reinit_heapbase(), 3073 // when (Universe::heap() != NULL). Hence, if the instructions they 3074 // generate change, then this method needs to be updated. 3075 int MacroAssembler::instr_count_for_decode_klass_not_null() { 3076 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3077 assert(Universe::heap() != NULL, "java heap should be initialized"); 3078 if (Universe::narrow_klass_base() != NULL) { 3079 return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow 3080 1 + // add 3081 instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow 3082 } else { 3083 if (Universe::narrow_klass_shift() != 0) { 3084 return 1; 3085 } 3086 } 3087 return 0; 3088 } 3089 3090 // Must preserve condition codes, or C2 decodeKlass_not_null rule 3091 // must be changed. 3092 void MacroAssembler::decode_klass_not_null(Register r) { 3093 int off = offset(); 3094 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 3095 assert(Universe::heap() != NULL, "java heap should be initialized"); 3096 assert(r != Rheap_base, "Decoding a klass in Rheap_base"); 3097 // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. 3098 // Also do not verify_oop as this is called by verify_oop. 3099 if (Universe::narrow_klass_base() != NULL) { 3100 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 3101 mov_slow(Rheap_base, Universe::narrow_klass_base()); 3102 add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); 3103 reinit_heapbase(); 3104 } else { 3105 if (Universe::narrow_klass_shift() != 0) { 3106 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3107 _lsl(r, r, Universe::narrow_klass_shift()); 3108 } 3109 } 3110 assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); 3111 } 3112 3113 // Must preserve condition codes, or C2 decodeKlass_not_null rule 3114 // must be changed. 3115 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 3116 if (src == dst) { 3117 decode_klass_not_null(src); 3118 return; 3119 } 3120 3121 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 3122 assert(Universe::heap() != NULL, "java heap should be initialized"); 3123 assert(src != Rheap_base, "Decoding a klass in Rheap_base"); 3124 assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); 3125 // Also do not verify_oop as this is called by verify_oop. 3126 if (Universe::narrow_klass_base() != NULL) { 3127 mov_slow(dst, Universe::narrow_klass_base()); 3128 add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); 3129 } else { 3130 _lsl(dst, src, Universe::narrow_klass_shift()); 3131 } 3132 } 3133 3134 3135 void MacroAssembler::reinit_heapbase() { 3136 if (UseCompressedOops || UseCompressedClassPointers) { 3137 if (Universe::heap() != NULL) { 3138 mov_slow(Rheap_base, Universe::narrow_ptrs_base()); 3139 } else { 3140 ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); 3141 } 3142 } 3143 } 3144 3145 #ifdef ASSERT 3146 void MacroAssembler::verify_heapbase(const char* msg) { 3147 // This code pattern is matched in NativeIntruction::skip_verify_heapbase. 3148 // Update it at modifications. 3149 assert (UseCompressedOops, "should be compressed"); 3150 assert (Universe::heap() != NULL, "java heap should be initialized"); 3151 if (CheckCompressedOops) { 3152 Label ok; 3153 str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 3154 raw_push(Rtemp, ZR); 3155 mrs(Rtemp, Assembler::SysReg_NZCV); 3156 str(Rtemp, Address(SP, 1 * wordSize)); 3157 mov_slow(Rtemp, Universe::narrow_ptrs_base()); 3158 cmp(Rheap_base, Rtemp); 3159 b(ok, eq); 3160 stop(msg); 3161 bind(ok); 3162 ldr(Rtemp, Address(SP, 1 * wordSize)); 3163 msr(Assembler::SysReg_NZCV, Rtemp); 3164 raw_pop(Rtemp, ZR); 3165 str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 3166 } 3167 } 3168 #endif // ASSERT 3169 3170 #endif // AARCH64 3171 3172 #ifdef COMPILER2 3173 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3174 { 3175 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3176 3177 Register Rmark = Rscratch2; 3178 3179 assert(Roop != Rscratch, ""); 3180 assert(Roop != Rmark, ""); 3181 assert(Rbox != Rscratch, ""); 3182 assert(Rbox != Rmark, ""); 3183 3184 Label fast_lock, done; 3185 3186 if (UseBiasedLocking && !UseOptoBiasInlining) { 3187 Label failed; 3188 #ifdef AARCH64 3189 biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); 3190 #else 3191 biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); 3192 #endif 3193 bind(failed); 3194 } 3195 3196 ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); 3197 tst(Rmark, markOopDesc::unlocked_value); 3198 b(fast_lock, ne); 3199 3200 // Check for recursive lock 3201 // See comments in InterpreterMacroAssembler::lock_object for 3202 // explanations on the fast recursive locking check. 3203 #ifdef AARCH64 3204 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); 3205 Assembler::LogicalImmediate imm(mask, false); 3206 mov(Rscratch, SP); 3207 sub(Rscratch, Rmark, Rscratch); 3208 ands(Rscratch, Rscratch, imm); 3209 b(done, ne); // exit with failure 3210 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero 3211 b(done); 3212 3213 #else 3214 // -1- test low 2 bits 3215 movs(Rscratch, AsmOperand(Rmark, lsl, 30)); 3216 // -2- test (hdr - SP) if the low two bits are 0 3217 sub(Rscratch, Rmark, SP, eq); 3218 movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); 3219 // If still 'eq' then recursive locking OK 3220 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero 3221 b(done); 3222 #endif 3223 3224 bind(fast_lock); 3225 str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3226 3227 bool allow_fallthrough_on_failure = true; 3228 bool one_shot = true; 3229 cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3230 3231 bind(done); 3232 3233 } 3234 3235 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3236 { 3237 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3238 3239 Register Rmark = Rscratch2; 3240 3241 assert(Roop != Rscratch, ""); 3242 assert(Roop != Rmark, ""); 3243 assert(Rbox != Rscratch, ""); 3244 assert(Rbox != Rmark, ""); 3245 3246 Label done; 3247 3248 if (UseBiasedLocking && !UseOptoBiasInlining) { 3249 biased_locking_exit(Roop, Rscratch, done); 3250 } 3251 3252 ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3253 // If hdr is NULL, we've got recursive locking and there's nothing more to do 3254 cmp(Rmark, 0); 3255 b(done, eq); 3256 3257 // Restore the object header 3258 bool allow_fallthrough_on_failure = true; 3259 bool one_shot = true; 3260 cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3261 3262 bind(done); 3263 3264 } 3265 #endif // COMPILER2