1 /* 2 * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "asm/macroAssembler.hpp" 29 #include "ci/ciEnv.hpp" 30 #include "code/nativeInst.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/barrierSet.hpp" 33 #include "gc/shared/cardTable.hpp" 34 #include "gc/shared/cardTableBarrierSet.hpp" 35 #include "gc/shared/collectedHeap.inline.hpp" 36 #include "interpreter/interpreter.hpp" 37 #include "memory/resourceArea.hpp" 38 #include "oops/klass.inline.hpp" 39 #include "prims/methodHandles.hpp" 40 #include "runtime/biasedLocking.hpp" 41 #include "runtime/interfaceSupport.inline.hpp" 42 #include "runtime/objectMonitor.hpp" 43 #include "runtime/os.hpp" 44 #include "runtime/sharedRuntime.hpp" 45 #include "runtime/stubRoutines.hpp" 46 #include "utilities/macros.hpp" 47 #if INCLUDE_ALL_GCS 48 #include "gc/g1/g1BarrierSet.hpp" 49 #include "gc/g1/g1CardTable.hpp" 50 #include "gc/g1/g1ThreadLocalData.hpp" 51 #include "gc/g1/heapRegion.hpp" 52 #endif 53 54 // Implementation of AddressLiteral 55 56 void AddressLiteral::set_rspec(relocInfo::relocType rtype) { 57 switch (rtype) { 58 case relocInfo::oop_type: 59 // Oops are a special case. Normally they would be their own section 60 // but in cases like icBuffer they are literals in the code stream that 61 // we don't have a section for. We use none so that we get a literal address 62 // which is always patchable. 63 break; 64 case relocInfo::external_word_type: 65 _rspec = external_word_Relocation::spec(_target); 66 break; 67 case relocInfo::internal_word_type: 68 _rspec = internal_word_Relocation::spec(_target); 69 break; 70 case relocInfo::opt_virtual_call_type: 71 _rspec = opt_virtual_call_Relocation::spec(); 72 break; 73 case relocInfo::static_call_type: 74 _rspec = static_call_Relocation::spec(); 75 break; 76 case relocInfo::runtime_call_type: 77 _rspec = runtime_call_Relocation::spec(); 78 break; 79 case relocInfo::poll_type: 80 case relocInfo::poll_return_type: 81 _rspec = Relocation::spec_simple(rtype); 82 break; 83 case relocInfo::none: 84 break; 85 default: 86 ShouldNotReachHere(); 87 break; 88 } 89 } 90 91 // Initially added to the Assembler interface as a pure virtual: 92 // RegisterConstant delayed_value(..) 93 // for: 94 // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) 95 // this was subsequently modified to its present name and return type 96 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 97 Register tmp, 98 int offset) { 99 ShouldNotReachHere(); 100 return RegisterOrConstant(-1); 101 } 102 103 104 #ifdef AARCH64 105 // Note: ARM32 version is OS dependent 106 void MacroAssembler::breakpoint(AsmCondition cond) { 107 if (cond == al) { 108 brk(); 109 } else { 110 Label L; 111 b(L, inverse(cond)); 112 brk(); 113 bind(L); 114 } 115 } 116 #endif // AARCH64 117 118 119 // virtual method calling 120 void MacroAssembler::lookup_virtual_method(Register recv_klass, 121 Register vtable_index, 122 Register method_result) { 123 const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); 124 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 125 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); 126 ldr(method_result, Address(recv_klass, base_offset)); 127 } 128 129 130 // Simplified, combined version, good for typical uses. 131 // Falls through on failure. 132 void MacroAssembler::check_klass_subtype(Register sub_klass, 133 Register super_klass, 134 Register temp_reg, 135 Register temp_reg2, 136 Register temp_reg3, 137 Label& L_success) { 138 Label L_failure; 139 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); 140 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); 141 bind(L_failure); 142 }; 143 144 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 145 Register super_klass, 146 Register temp_reg, 147 Register temp_reg2, 148 Label* L_success, 149 Label* L_failure, 150 Label* L_slow_path) { 151 152 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); 153 const Register super_check_offset = temp_reg2; 154 155 Label L_fallthrough; 156 int label_nulls = 0; 157 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 158 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 159 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 160 assert(label_nulls <= 1, "at most one NULL in the batch"); 161 162 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 163 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 164 Address super_check_offset_addr(super_klass, sco_offset); 165 166 // If the pointers are equal, we are done (e.g., String[] elements). 167 // This self-check enables sharing of secondary supertype arrays among 168 // non-primary types such as array-of-interface. Otherwise, each such 169 // type would need its own customized SSA. 170 // We move this check to the front of the fast path because many 171 // type checks are in fact trivially successful in this manner, 172 // so we get a nicely predicted branch right at the start of the check. 173 cmp(sub_klass, super_klass); 174 b(*L_success, eq); 175 176 // Check the supertype display: 177 ldr_u32(super_check_offset, super_check_offset_addr); 178 179 Address super_check_addr(sub_klass, super_check_offset); 180 ldr(temp_reg, super_check_addr); 181 cmp(super_klass, temp_reg); // load displayed supertype 182 183 // This check has worked decisively for primary supers. 184 // Secondary supers are sought in the super_cache ('super_cache_addr'). 185 // (Secondary supers are interfaces and very deeply nested subtypes.) 186 // This works in the same check above because of a tricky aliasing 187 // between the super_cache and the primary super display elements. 188 // (The 'super_check_addr' can address either, as the case requires.) 189 // Note that the cache is updated below if it does not help us find 190 // what we need immediately. 191 // So if it was a primary super, we can just fail immediately. 192 // Otherwise, it's the slow path for us (no success at this point). 193 194 b(*L_success, eq); 195 cmp_32(super_check_offset, sc_offset); 196 if (L_failure == &L_fallthrough) { 197 b(*L_slow_path, eq); 198 } else { 199 b(*L_failure, ne); 200 if (L_slow_path != &L_fallthrough) { 201 b(*L_slow_path); 202 } 203 } 204 205 bind(L_fallthrough); 206 } 207 208 209 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 210 Register super_klass, 211 Register temp_reg, 212 Register temp2_reg, 213 Register temp3_reg, 214 Label* L_success, 215 Label* L_failure, 216 bool set_cond_codes) { 217 #ifdef AARCH64 218 NOT_IMPLEMENTED(); 219 #else 220 // Note: if used by code that expects a register to be 0 on success, 221 // this register must be temp_reg and set_cond_codes must be true 222 223 Register saved_reg = noreg; 224 225 // get additional tmp registers 226 if (temp3_reg == noreg) { 227 saved_reg = temp3_reg = LR; 228 push(saved_reg); 229 } 230 231 assert(temp2_reg != noreg, "need all the temporary registers"); 232 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); 233 234 Register cmp_temp = temp_reg; 235 Register scan_temp = temp3_reg; 236 Register count_temp = temp2_reg; 237 238 Label L_fallthrough; 239 int label_nulls = 0; 240 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 241 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 242 assert(label_nulls <= 1, "at most one NULL in the batch"); 243 244 // a couple of useful fields in sub_klass: 245 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 246 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 247 Address secondary_supers_addr(sub_klass, ss_offset); 248 Address super_cache_addr( sub_klass, sc_offset); 249 250 #ifndef PRODUCT 251 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); 252 #endif 253 254 // We will consult the secondary-super array. 255 ldr(scan_temp, Address(sub_klass, ss_offset)); 256 257 assert(! UseCompressedOops, "search_key must be the compressed super_klass"); 258 // else search_key is the 259 Register search_key = super_klass; 260 261 // Load the array length. 262 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes())); 263 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes()); 264 265 add(count_temp, count_temp, 1); 266 267 Label L_loop, L_setnz_and_fail, L_fail; 268 269 // Top of search loop 270 bind(L_loop); 271 // Notes: 272 // scan_temp starts at the array elements 273 // count_temp is 1+size 274 subs(count_temp, count_temp, 1); 275 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { 276 // direct jump to L_failure if failed and no cleanup needed 277 b(*L_failure, eq); // not found and 278 } else { 279 b(L_fail, eq); // not found in the array 280 } 281 282 // Load next super to check 283 // In the array of super classes elements are pointer sized. 284 int element_size = wordSize; 285 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); 286 287 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list 288 subs(cmp_temp, cmp_temp, search_key); 289 290 // A miss means we are NOT a subtype and need to keep looping 291 b(L_loop, ne); 292 293 // Falling out the bottom means we found a hit; we ARE a subtype 294 295 // Note: temp_reg/cmp_temp is already 0 and flag Z is set 296 297 // Success. Cache the super we found and proceed in triumph. 298 str(super_klass, Address(sub_klass, sc_offset)); 299 300 if (saved_reg != noreg) { 301 // Return success 302 pop(saved_reg); 303 } 304 305 b(*L_success); 306 307 bind(L_fail); 308 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here 309 if (set_cond_codes) { 310 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed 311 } 312 if (saved_reg != noreg) { 313 pop(saved_reg); 314 } 315 if (L_failure != &L_fallthrough) { 316 b(*L_failure); 317 } 318 319 bind(L_fallthrough); 320 #endif 321 } 322 323 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. 324 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { 325 assert_different_registers(params_base, params_count); 326 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); 327 return Address(tmp, -Interpreter::stackElementSize); 328 } 329 330 331 void MacroAssembler::align(int modulus) { 332 while (offset() % modulus != 0) { 333 nop(); 334 } 335 } 336 337 int MacroAssembler::set_last_Java_frame(Register last_java_sp, 338 Register last_java_fp, 339 bool save_last_java_pc, 340 Register tmp) { 341 int pc_offset; 342 if (last_java_fp != noreg) { 343 // optional 344 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); 345 _fp_saved = true; 346 } else { 347 _fp_saved = false; 348 } 349 if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM 350 #ifdef AARCH64 351 pc_offset = mov_pc_to(tmp); 352 str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); 353 #else 354 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); 355 pc_offset = offset() + VM_Version::stored_pc_adjustment(); 356 #endif 357 _pc_saved = true; 358 } else { 359 _pc_saved = false; 360 pc_offset = -1; 361 } 362 // According to comment in javaFrameAnchorm SP must be saved last, so that other 363 // entries are valid when SP is set. 364 365 // However, this is probably not a strong constrainst since for instance PC is 366 // sometimes read from the stack at SP... but is pushed later (by the call). Hence, 367 // we now write the fields in the expected order but we have not added a StoreStore 368 // barrier. 369 370 // XXX: if the ordering is really important, PC should always be saved (without forgetting 371 // to update oop_map offsets) and a StoreStore barrier might be needed. 372 373 if (last_java_sp == noreg) { 374 last_java_sp = SP; // always saved 375 } 376 #ifdef AARCH64 377 if (last_java_sp == SP) { 378 mov(tmp, SP); 379 str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); 380 } else { 381 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 382 } 383 #else 384 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 385 #endif 386 387 return pc_offset; // for oopmaps 388 } 389 390 void MacroAssembler::reset_last_Java_frame(Register tmp) { 391 const Register Rzero = zero_register(tmp); 392 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); 393 if (_fp_saved) { 394 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); 395 } 396 if (_pc_saved) { 397 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); 398 } 399 } 400 401 402 // Implementation of call_VM versions 403 404 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { 405 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 406 assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); 407 408 #ifndef AARCH64 409 // Safer to save R9 here since callers may have been written 410 // assuming R9 survives. This is suboptimal but is not worth 411 // optimizing for the few platforms where R9 is scratched. 412 push(RegisterSet(R4) | R9ifScratched); 413 mov(R4, SP); 414 bic(SP, SP, StackAlignmentInBytes - 1); 415 #endif // AARCH64 416 call(entry_point, relocInfo::runtime_call_type); 417 #ifndef AARCH64 418 mov(SP, R4); 419 pop(RegisterSet(R4) | R9ifScratched); 420 #endif // AARCH64 421 } 422 423 424 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 425 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 426 assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); 427 428 const Register tmp = Rtemp; 429 assert_different_registers(oop_result, tmp); 430 431 set_last_Java_frame(SP, FP, true, tmp); 432 433 #ifdef ASSERT 434 AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); 435 #endif // ASSERT 436 437 #ifndef AARCH64 438 #if R9_IS_SCRATCHED 439 // Safer to save R9 here since callers may have been written 440 // assuming R9 survives. This is suboptimal but is not worth 441 // optimizing for the few platforms where R9 is scratched. 442 443 // Note: cannot save R9 above the saved SP (some calls expect for 444 // instance the Java stack top at the saved SP) 445 // => once saved (with set_last_Java_frame), decrease SP before rounding to 446 // ensure the slot at SP will be free for R9). 447 sub(SP, SP, 4); 448 bic(SP, SP, StackAlignmentInBytes - 1); 449 str(R9, Address(SP, 0)); 450 #else 451 bic(SP, SP, StackAlignmentInBytes - 1); 452 #endif // R9_IS_SCRATCHED 453 #endif 454 455 mov(R0, Rthread); 456 call(entry_point, relocInfo::runtime_call_type); 457 458 #ifndef AARCH64 459 #if R9_IS_SCRATCHED 460 ldr(R9, Address(SP, 0)); 461 #endif 462 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); 463 #endif 464 465 reset_last_Java_frame(tmp); 466 467 // C++ interp handles this in the interpreter 468 check_and_handle_popframe(); 469 check_and_handle_earlyret(); 470 471 if (check_exceptions) { 472 // check for pending exceptions 473 ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); 474 #ifdef AARCH64 475 Label L; 476 cbz(tmp, L); 477 mov_pc_to(Rexception_pc); 478 b(StubRoutines::forward_exception_entry()); 479 bind(L); 480 #else 481 cmp(tmp, 0); 482 mov(Rexception_pc, PC, ne); 483 b(StubRoutines::forward_exception_entry(), ne); 484 #endif // AARCH64 485 } 486 487 // get oop result if there is one and reset the value in the thread 488 if (oop_result->is_valid()) { 489 get_vm_result(oop_result, tmp); 490 } 491 } 492 493 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 494 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 495 } 496 497 498 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 499 assert (arg_1 == R1, "fixed register for arg_1"); 500 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 501 } 502 503 504 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 505 assert (arg_1 == R1, "fixed register for arg_1"); 506 assert (arg_2 == R2, "fixed register for arg_2"); 507 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 508 } 509 510 511 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 512 assert (arg_1 == R1, "fixed register for arg_1"); 513 assert (arg_2 == R2, "fixed register for arg_2"); 514 assert (arg_3 == R3, "fixed register for arg_3"); 515 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 516 } 517 518 519 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { 520 // Not used on ARM 521 Unimplemented(); 522 } 523 524 525 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 526 // Not used on ARM 527 Unimplemented(); 528 } 529 530 531 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 532 // Not used on ARM 533 Unimplemented(); 534 } 535 536 537 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 538 // Not used on ARM 539 Unimplemented(); 540 } 541 542 // Raw call, without saving/restoring registers, exception handling, etc. 543 // Mainly used from various stubs. 544 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { 545 const Register tmp = Rtemp; // Rtemp free since scratched by call 546 set_last_Java_frame(SP, FP, true, tmp); 547 #if R9_IS_SCRATCHED 548 if (save_R9_if_scratched) { 549 // Note: Saving also R10 for alignment. 550 push(RegisterSet(R9, R10)); 551 } 552 #endif 553 mov(R0, Rthread); 554 call(entry_point, relocInfo::runtime_call_type); 555 #if R9_IS_SCRATCHED 556 if (save_R9_if_scratched) { 557 pop(RegisterSet(R9, R10)); 558 } 559 #endif 560 reset_last_Java_frame(tmp); 561 } 562 563 void MacroAssembler::call_VM_leaf(address entry_point) { 564 call_VM_leaf_helper(entry_point, 0); 565 } 566 567 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 568 assert (arg_1 == R0, "fixed register for arg_1"); 569 call_VM_leaf_helper(entry_point, 1); 570 } 571 572 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 573 assert (arg_1 == R0, "fixed register for arg_1"); 574 assert (arg_2 == R1, "fixed register for arg_2"); 575 call_VM_leaf_helper(entry_point, 2); 576 } 577 578 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 579 assert (arg_1 == R0, "fixed register for arg_1"); 580 assert (arg_2 == R1, "fixed register for arg_2"); 581 assert (arg_3 == R2, "fixed register for arg_3"); 582 call_VM_leaf_helper(entry_point, 3); 583 } 584 585 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { 586 assert (arg_1 == R0, "fixed register for arg_1"); 587 assert (arg_2 == R1, "fixed register for arg_2"); 588 assert (arg_3 == R2, "fixed register for arg_3"); 589 assert (arg_4 == R3, "fixed register for arg_4"); 590 call_VM_leaf_helper(entry_point, 4); 591 } 592 593 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { 594 assert_different_registers(oop_result, tmp); 595 ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); 596 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); 597 verify_oop(oop_result); 598 } 599 600 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { 601 assert_different_registers(metadata_result, tmp); 602 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); 603 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); 604 } 605 606 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { 607 if (arg2.is_register()) { 608 add(dst, arg1, arg2.as_register()); 609 } else { 610 add(dst, arg1, arg2.as_constant()); 611 } 612 } 613 614 void MacroAssembler::add_slow(Register rd, Register rn, int c) { 615 #ifdef AARCH64 616 if (c == 0) { 617 if (rd != rn) { 618 mov(rd, rn); 619 } 620 return; 621 } 622 if (c < 0) { 623 sub_slow(rd, rn, -c); 624 return; 625 } 626 if (c > right_n_bits(24)) { 627 guarantee(rd != rn, "no large add_slow with only one register"); 628 mov_slow(rd, c); 629 add(rd, rn, rd); 630 } else { 631 int lo = c & right_n_bits(12); 632 int hi = (c >> 12) & right_n_bits(12); 633 if (lo != 0) { 634 add(rd, rn, lo, lsl0); 635 } 636 if (hi != 0) { 637 add(rd, (lo == 0) ? rn : rd, hi, lsl12); 638 } 639 } 640 #else 641 // This function is used in compiler for handling large frame offsets 642 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 643 return sub(rd, rn, (-c)); 644 } 645 int low = c & 0x3fc; 646 if (low != 0) { 647 add(rd, rn, low); 648 rn = rd; 649 } 650 if (c & ~0x3fc) { 651 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); 652 add(rd, rn, c & ~0x3fc); 653 } else if (rd != rn) { 654 assert(c == 0, ""); 655 mov(rd, rn); // need to generate at least one move! 656 } 657 #endif // AARCH64 658 } 659 660 void MacroAssembler::sub_slow(Register rd, Register rn, int c) { 661 #ifdef AARCH64 662 if (c <= 0) { 663 add_slow(rd, rn, -c); 664 return; 665 } 666 if (c > right_n_bits(24)) { 667 guarantee(rd != rn, "no large sub_slow with only one register"); 668 mov_slow(rd, c); 669 sub(rd, rn, rd); 670 } else { 671 int lo = c & right_n_bits(12); 672 int hi = (c >> 12) & right_n_bits(12); 673 if (lo != 0) { 674 sub(rd, rn, lo, lsl0); 675 } 676 if (hi != 0) { 677 sub(rd, (lo == 0) ? rn : rd, hi, lsl12); 678 } 679 } 680 #else 681 // This function is used in compiler for handling large frame offsets 682 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 683 return add(rd, rn, (-c)); 684 } 685 int low = c & 0x3fc; 686 if (low != 0) { 687 sub(rd, rn, low); 688 rn = rd; 689 } 690 if (c & ~0x3fc) { 691 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); 692 sub(rd, rn, c & ~0x3fc); 693 } else if (rd != rn) { 694 assert(c == 0, ""); 695 mov(rd, rn); // need to generate at least one move! 696 } 697 #endif // AARCH64 698 } 699 700 void MacroAssembler::mov_slow(Register rd, address addr) { 701 // do *not* call the non relocated mov_related_address 702 mov_slow(rd, (intptr_t)addr); 703 } 704 705 void MacroAssembler::mov_slow(Register rd, const char *str) { 706 mov_slow(rd, (intptr_t)str); 707 } 708 709 #ifdef AARCH64 710 711 // Common code for mov_slow and instr_count_for_mov_slow. 712 // Returns number of instructions of mov_slow pattern, 713 // generating it if non-null MacroAssembler is given. 714 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { 715 // This code pattern is matched in NativeIntruction::is_mov_slow. 716 // Update it at modifications. 717 718 const intx mask = right_n_bits(16); 719 // 1 movz instruction 720 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 721 if ((c & ~(mask << base_shift)) == 0) { 722 if (masm != NULL) { 723 masm->movz(rd, ((uintx)c) >> base_shift, base_shift); 724 } 725 return 1; 726 } 727 } 728 // 1 movn instruction 729 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 730 if (((~c) & ~(mask << base_shift)) == 0) { 731 if (masm != NULL) { 732 masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); 733 } 734 return 1; 735 } 736 } 737 // 1 orr instruction 738 { 739 LogicalImmediate imm(c, false); 740 if (imm.is_encoded()) { 741 if (masm != NULL) { 742 masm->orr(rd, ZR, imm); 743 } 744 return 1; 745 } 746 } 747 // 1 movz/movn + up to 3 movk instructions 748 int zeroes = 0; 749 int ones = 0; 750 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 751 int part = (c >> base_shift) & mask; 752 if (part == 0) { 753 ++zeroes; 754 } else if (part == mask) { 755 ++ones; 756 } 757 } 758 int def_bits = 0; 759 if (ones > zeroes) { 760 def_bits = mask; 761 } 762 int inst_count = 0; 763 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 764 int part = (c >> base_shift) & mask; 765 if (part != def_bits) { 766 if (masm != NULL) { 767 if (inst_count > 0) { 768 masm->movk(rd, part, base_shift); 769 } else { 770 if (def_bits == 0) { 771 masm->movz(rd, part, base_shift); 772 } else { 773 masm->movn(rd, ~part & mask, base_shift); 774 } 775 } 776 } 777 inst_count++; 778 } 779 } 780 assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); 781 return inst_count; 782 } 783 784 void MacroAssembler::mov_slow(Register rd, intptr_t c) { 785 #ifdef ASSERT 786 int off = offset(); 787 #endif 788 (void) mov_slow_helper(rd, c, this); 789 assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); 790 } 791 792 // Counts instructions generated by mov_slow(rd, c). 793 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { 794 return mov_slow_helper(noreg, c, NULL); 795 } 796 797 int MacroAssembler::instr_count_for_mov_slow(address c) { 798 return mov_slow_helper(noreg, (intptr_t)c, NULL); 799 } 800 801 #else 802 803 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { 804 if (AsmOperand::is_rotated_imm(c)) { 805 mov(rd, c, cond); 806 } else if (AsmOperand::is_rotated_imm(~c)) { 807 mvn(rd, ~c, cond); 808 } else if (VM_Version::supports_movw()) { 809 movw(rd, c & 0xffff, cond); 810 if ((unsigned int)c >> 16) { 811 movt(rd, (unsigned int)c >> 16, cond); 812 } 813 } else { 814 // Find first non-zero bit 815 int shift = 0; 816 while ((c & (3 << shift)) == 0) { 817 shift += 2; 818 } 819 // Put the least significant part of the constant 820 int mask = 0xff << shift; 821 mov(rd, c & mask, cond); 822 // Add up to 3 other parts of the constant; 823 // each of them can be represented as rotated_imm 824 if (c & (mask << 8)) { 825 orr(rd, rd, c & (mask << 8), cond); 826 } 827 if (c & (mask << 16)) { 828 orr(rd, rd, c & (mask << 16), cond); 829 } 830 if (c & (mask << 24)) { 831 orr(rd, rd, c & (mask << 24), cond); 832 } 833 } 834 } 835 836 #endif // AARCH64 837 838 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, 839 #ifdef AARCH64 840 bool patchable 841 #else 842 AsmCondition cond 843 #endif 844 ) { 845 846 if (o == NULL) { 847 #ifdef AARCH64 848 if (patchable) { 849 nop(); 850 } 851 mov(rd, ZR); 852 #else 853 mov(rd, 0, cond); 854 #endif 855 return; 856 } 857 858 if (oop_index == 0) { 859 oop_index = oop_recorder()->allocate_oop_index(o); 860 } 861 relocate(oop_Relocation::spec(oop_index)); 862 863 #ifdef AARCH64 864 if (patchable) { 865 nop(); 866 } 867 ldr(rd, pc()); 868 #else 869 if (VM_Version::supports_movw()) { 870 movw(rd, 0, cond); 871 movt(rd, 0, cond); 872 } else { 873 ldr(rd, Address(PC), cond); 874 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). 875 nop(); 876 } 877 #endif 878 } 879 880 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { 881 if (o == NULL) { 882 #ifdef AARCH64 883 if (patchable) { 884 nop(); 885 } 886 #endif 887 mov(rd, 0); 888 return; 889 } 890 891 if (metadata_index == 0) { 892 metadata_index = oop_recorder()->allocate_metadata_index(o); 893 } 894 relocate(metadata_Relocation::spec(metadata_index)); 895 896 #ifdef AARCH64 897 if (patchable) { 898 nop(); 899 } 900 #ifdef COMPILER2 901 if (!patchable && VM_Version::prefer_moves_over_load_literal()) { 902 mov_slow(rd, (address)o); 903 return; 904 } 905 #endif 906 ldr(rd, pc()); 907 #else 908 if (VM_Version::supports_movw()) { 909 movw(rd, ((int)o) & 0xffff); 910 movt(rd, (unsigned int)o >> 16); 911 } else { 912 ldr(rd, Address(PC)); 913 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). 914 nop(); 915 } 916 #endif // AARCH64 917 } 918 919 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { 920 Label skip_constant; 921 union { 922 jfloat f; 923 jint i; 924 } accessor; 925 accessor.f = c; 926 927 #ifdef AARCH64 928 // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow 929 Label L; 930 ldr_s(fd, target(L)); 931 b(skip_constant); 932 bind(L); 933 emit_int32(accessor.i); 934 bind(skip_constant); 935 #else 936 flds(fd, Address(PC), cond); 937 b(skip_constant); 938 emit_int32(accessor.i); 939 bind(skip_constant); 940 #endif // AARCH64 941 } 942 943 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { 944 Label skip_constant; 945 union { 946 jdouble d; 947 jint i[2]; 948 } accessor; 949 accessor.d = c; 950 951 #ifdef AARCH64 952 // TODO-AARCH64 - try to optimize loading of double constants with fmov 953 Label L; 954 ldr_d(fd, target(L)); 955 b(skip_constant); 956 align(wordSize); 957 bind(L); 958 emit_int32(accessor.i[0]); 959 emit_int32(accessor.i[1]); 960 bind(skip_constant); 961 #else 962 fldd(fd, Address(PC), cond); 963 b(skip_constant); 964 emit_int32(accessor.i[0]); 965 emit_int32(accessor.i[1]); 966 bind(skip_constant); 967 #endif // AARCH64 968 } 969 970 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { 971 intptr_t addr = (intptr_t) address_of_global; 972 #ifdef AARCH64 973 assert((addr & 0x3) == 0, "address should be aligned"); 974 975 // FIXME: TODO 976 if (false && page_reachable_from_cache(address_of_global)) { 977 assert(false,"TODO: relocate"); 978 //relocate(); 979 adrp(reg, address_of_global); 980 ldrsw(reg, Address(reg, addr & 0xfff)); 981 } else { 982 mov_slow(reg, addr & ~0x3fff); 983 ldrsw(reg, Address(reg, addr & 0x3fff)); 984 } 985 #else 986 mov_slow(reg, addr & ~0xfff); 987 ldr(reg, Address(reg, addr & 0xfff)); 988 #endif 989 } 990 991 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { 992 #ifdef AARCH64 993 intptr_t addr = (intptr_t) address_of_global; 994 assert ((addr & 0x7) == 0, "address should be aligned"); 995 mov_slow(reg, addr & ~0x7fff); 996 ldr(reg, Address(reg, addr & 0x7fff)); 997 #else 998 ldr_global_s32(reg, address_of_global); 999 #endif 1000 } 1001 1002 void MacroAssembler::ldrb_global(Register reg, address address_of_global) { 1003 intptr_t addr = (intptr_t) address_of_global; 1004 mov_slow(reg, addr & ~0xfff); 1005 ldrb(reg, Address(reg, addr & 0xfff)); 1006 } 1007 1008 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { 1009 #ifdef AARCH64 1010 switch (bits) { 1011 case 8: uxtb(rd, rn); break; 1012 case 16: uxth(rd, rn); break; 1013 case 32: mov_w(rd, rn); break; 1014 default: ShouldNotReachHere(); 1015 } 1016 #else 1017 if (bits <= 8) { 1018 andr(rd, rn, (1 << bits) - 1); 1019 } else if (bits >= 24) { 1020 bic(rd, rn, -1 << bits); 1021 } else { 1022 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1023 mov(rd, AsmOperand(rd, lsr, 32 - bits)); 1024 } 1025 #endif 1026 } 1027 1028 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { 1029 #ifdef AARCH64 1030 switch (bits) { 1031 case 8: sxtb(rd, rn); break; 1032 case 16: sxth(rd, rn); break; 1033 case 32: sxtw(rd, rn); break; 1034 default: ShouldNotReachHere(); 1035 } 1036 #else 1037 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1038 mov(rd, AsmOperand(rd, asr, 32 - bits)); 1039 #endif 1040 } 1041 1042 #ifndef AARCH64 1043 1044 void MacroAssembler::long_move(Register rd_lo, Register rd_hi, 1045 Register rn_lo, Register rn_hi, 1046 AsmCondition cond) { 1047 if (rd_lo != rn_hi) { 1048 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1049 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1050 } else if (rd_hi != rn_lo) { 1051 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1052 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1053 } else { 1054 eor(rd_lo, rd_hi, rd_lo, cond); 1055 eor(rd_hi, rd_lo, rd_hi, cond); 1056 eor(rd_lo, rd_hi, rd_lo, cond); 1057 } 1058 } 1059 1060 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1061 Register rn_lo, Register rn_hi, 1062 AsmShift shift, Register count) { 1063 Register tmp; 1064 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { 1065 tmp = rd_lo; 1066 } else { 1067 tmp = rd_hi; 1068 } 1069 assert_different_registers(tmp, count, rn_lo, rn_hi); 1070 1071 subs(tmp, count, 32); 1072 if (shift == lsl) { 1073 assert_different_registers(rd_hi, rn_lo); 1074 assert_different_registers(count, rd_hi); 1075 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); 1076 rsb(tmp, count, 32, mi); 1077 if (rd_hi == rn_hi) { 1078 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1079 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1080 } else { 1081 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1082 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1083 } 1084 mov(rd_lo, AsmOperand(rn_lo, shift, count)); 1085 } else { 1086 assert_different_registers(rd_lo, rn_hi); 1087 assert_different_registers(rd_lo, count); 1088 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); 1089 rsb(tmp, count, 32, mi); 1090 if (rd_lo == rn_lo) { 1091 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1092 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1093 } else { 1094 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1095 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1096 } 1097 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1098 } 1099 } 1100 1101 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1102 Register rn_lo, Register rn_hi, 1103 AsmShift shift, int count) { 1104 assert(count != 0 && (count & ~63) == 0, "must be"); 1105 1106 if (shift == lsl) { 1107 assert_different_registers(rd_hi, rn_lo); 1108 if (count >= 32) { 1109 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); 1110 mov(rd_lo, 0); 1111 } else { 1112 mov(rd_hi, AsmOperand(rn_hi, lsl, count)); 1113 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); 1114 mov(rd_lo, AsmOperand(rn_lo, lsl, count)); 1115 } 1116 } else { 1117 assert_different_registers(rd_lo, rn_hi); 1118 if (count >= 32) { 1119 if (count == 32) { 1120 mov(rd_lo, rn_hi); 1121 } else { 1122 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); 1123 } 1124 if (shift == asr) { 1125 mov(rd_hi, AsmOperand(rn_hi, asr, 0)); 1126 } else { 1127 mov(rd_hi, 0); 1128 } 1129 } else { 1130 mov(rd_lo, AsmOperand(rn_lo, lsr, count)); 1131 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); 1132 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1133 } 1134 } 1135 } 1136 #endif // !AARCH64 1137 1138 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 1139 // This code pattern is matched in NativeIntruction::skip_verify_oop. 1140 // Update it at modifications. 1141 if (!VerifyOops) return; 1142 1143 char buffer[64]; 1144 #ifdef COMPILER1 1145 if (CommentedAssembly) { 1146 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); 1147 block_comment(buffer); 1148 } 1149 #endif 1150 const char* msg_buffer = NULL; 1151 { 1152 ResourceMark rm; 1153 stringStream ss; 1154 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); 1155 msg_buffer = code_string(ss.as_string()); 1156 } 1157 1158 save_all_registers(); 1159 1160 if (reg != R2) { 1161 mov(R2, reg); // oop to verify 1162 } 1163 mov(R1, SP); // register save area 1164 1165 Label done; 1166 InlinedString Lmsg(msg_buffer); 1167 ldr_literal(R0, Lmsg); // message 1168 1169 // call indirectly to solve generation ordering problem 1170 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1171 call(Rtemp); 1172 1173 restore_all_registers(); 1174 1175 b(done); 1176 #ifdef COMPILER2 1177 int off = offset(); 1178 #endif 1179 bind_literal(Lmsg); 1180 #ifdef COMPILER2 1181 if (offset() - off == 1 * wordSize) { 1182 // no padding, so insert nop for worst-case sizing 1183 nop(); 1184 } 1185 #endif 1186 bind(done); 1187 } 1188 1189 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 1190 if (!VerifyOops) return; 1191 1192 const char* msg_buffer = NULL; 1193 { 1194 ResourceMark rm; 1195 stringStream ss; 1196 if ((addr.base() == SP) && (addr.index()==noreg)) { 1197 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); 1198 } else { 1199 ss.print("verify_oop_addr: %s", s); 1200 } 1201 ss.print(" (%s:%d)", file, line); 1202 msg_buffer = code_string(ss.as_string()); 1203 } 1204 1205 int push_size = save_all_registers(); 1206 1207 if (addr.base() == SP) { 1208 // computes an addr that takes into account the push 1209 if (addr.index() != noreg) { 1210 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index 1211 add(new_base, SP, push_size); 1212 addr = addr.rebase(new_base); 1213 } else { 1214 addr = addr.plus_disp(push_size); 1215 } 1216 } 1217 1218 ldr(R2, addr); // oop to verify 1219 mov(R1, SP); // register save area 1220 1221 Label done; 1222 InlinedString Lmsg(msg_buffer); 1223 ldr_literal(R0, Lmsg); // message 1224 1225 // call indirectly to solve generation ordering problem 1226 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1227 call(Rtemp); 1228 1229 restore_all_registers(); 1230 1231 b(done); 1232 bind_literal(Lmsg); 1233 bind(done); 1234 } 1235 1236 void MacroAssembler::null_check(Register reg, Register tmp, int offset) { 1237 if (needs_explicit_null_check(offset)) { 1238 #ifdef AARCH64 1239 ldr(ZR, Address(reg)); 1240 #else 1241 assert_different_registers(reg, tmp); 1242 if (tmp == noreg) { 1243 tmp = Rtemp; 1244 assert((! Thread::current()->is_Compiler_thread()) || 1245 (! (ciEnv::current()->task() == NULL)) || 1246 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), 1247 "Rtemp not available in C2"); // explicit tmp register required 1248 // XXX: could we mark the code buffer as not compatible with C2 ? 1249 } 1250 ldr(tmp, Address(reg)); 1251 #endif 1252 } 1253 } 1254 1255 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1256 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, 1257 RegisterOrConstant size_expression, Label& slow_case) { 1258 if (!Universe::heap()->supports_inline_contig_alloc()) { 1259 b(slow_case); 1260 return; 1261 } 1262 1263 CollectedHeap* ch = Universe::heap(); 1264 1265 const Register top_addr = tmp1; 1266 const Register heap_end = tmp2; 1267 1268 if (size_expression.is_register()) { 1269 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); 1270 } else { 1271 assert_different_registers(obj, obj_end, top_addr, heap_end); 1272 } 1273 1274 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance 1275 if (load_const) { 1276 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); 1277 } else { 1278 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); 1279 } 1280 // Calculate new heap_top by adding the size of the object 1281 Label retry; 1282 bind(retry); 1283 1284 #ifdef AARCH64 1285 ldxr(obj, top_addr); 1286 #else 1287 ldr(obj, Address(top_addr)); 1288 #endif // AARCH64 1289 1290 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); 1291 add_rc(obj_end, obj, size_expression); 1292 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. 1293 cmp(obj_end, obj); 1294 b(slow_case, lo); 1295 // Update heap_top if allocation succeeded 1296 cmp(obj_end, heap_end); 1297 b(slow_case, hi); 1298 1299 #ifdef AARCH64 1300 stxr(heap_end/*scratched*/, obj_end, top_addr); 1301 cbnz_w(heap_end, retry); 1302 #else 1303 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); 1304 b(retry, ne); 1305 #endif // AARCH64 1306 } 1307 1308 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1309 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, 1310 RegisterOrConstant size_expression, Label& slow_case) { 1311 const Register tlab_end = tmp1; 1312 assert_different_registers(obj, obj_end, tlab_end); 1313 1314 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); 1315 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); 1316 add_rc(obj_end, obj, size_expression); 1317 cmp(obj_end, tlab_end); 1318 b(slow_case, hi); 1319 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); 1320 } 1321 1322 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. 1323 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { 1324 Label loop; 1325 const Register ptr = start; 1326 1327 #ifdef AARCH64 1328 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x 1329 const Register size = tmp; 1330 Label remaining, done; 1331 1332 sub(size, end, start); 1333 1334 #ifdef ASSERT 1335 { Label L; 1336 tst(size, wordSize - 1); 1337 b(L, eq); 1338 stop("size is not a multiple of wordSize"); 1339 bind(L); 1340 } 1341 #endif // ASSERT 1342 1343 subs(size, size, wordSize); 1344 b(remaining, le); 1345 1346 // Zero by 2 words per iteration. 1347 bind(loop); 1348 subs(size, size, 2*wordSize); 1349 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); 1350 b(loop, gt); 1351 1352 bind(remaining); 1353 b(done, ne); 1354 str(ZR, Address(ptr)); 1355 bind(done); 1356 #else 1357 mov(tmp, 0); 1358 bind(loop); 1359 cmp(ptr, end); 1360 str(tmp, Address(ptr, wordSize, post_indexed), lo); 1361 b(loop, lo); 1362 #endif // AARCH64 1363 } 1364 1365 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { 1366 #ifdef AARCH64 1367 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1368 add_rc(tmp, tmp, size_in_bytes); 1369 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1370 #else 1371 // Bump total bytes allocated by this thread 1372 Label done; 1373 1374 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1375 adds(tmp, tmp, size_in_bytes); 1376 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); 1377 b(done, cc); 1378 1379 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) 1380 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by 1381 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. 1382 Register low, high; 1383 // Select ether R0/R1 or R2/R3 1384 1385 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { 1386 low = R2; 1387 high = R3; 1388 } else { 1389 low = R0; 1390 high = R1; 1391 } 1392 push(RegisterSet(low, high)); 1393 1394 ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1395 adds(low, low, size_in_bytes); 1396 adc(high, high, 0); 1397 strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1398 1399 pop(RegisterSet(low, high)); 1400 1401 bind(done); 1402 #endif // AARCH64 1403 } 1404 1405 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { 1406 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM 1407 if (UseStackBanging) { 1408 const int page_size = os::vm_page_size(); 1409 1410 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); 1411 strb(R0, Address(tmp)); 1412 #ifdef AARCH64 1413 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { 1414 sub(tmp, tmp, page_size); 1415 strb(R0, Address(tmp)); 1416 } 1417 #else 1418 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { 1419 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1420 } 1421 #endif // AARCH64 1422 } 1423 } 1424 1425 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { 1426 if (UseStackBanging) { 1427 Label loop; 1428 1429 mov(tmp, SP); 1430 add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); 1431 #ifdef AARCH64 1432 sub(tmp, tmp, Rsize); 1433 bind(loop); 1434 subs(Rsize, Rsize, os::vm_page_size()); 1435 strb(ZR, Address(tmp, Rsize)); 1436 #else 1437 bind(loop); 1438 subs(Rsize, Rsize, 0xff0); 1439 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1440 #endif // AARCH64 1441 b(loop, hi); 1442 } 1443 } 1444 1445 void MacroAssembler::stop(const char* msg) { 1446 // This code pattern is matched in NativeIntruction::is_stop. 1447 // Update it at modifications. 1448 #ifdef COMPILER1 1449 if (CommentedAssembly) { 1450 block_comment("stop"); 1451 } 1452 #endif 1453 1454 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); 1455 InlinedString Lmsg(msg); 1456 1457 // save all registers for further inspection 1458 save_all_registers(); 1459 1460 ldr_literal(R0, Lmsg); // message 1461 mov(R1, SP); // register save area 1462 1463 #ifdef AARCH64 1464 ldr_literal(Rtemp, Ldebug); 1465 br(Rtemp); 1466 #else 1467 ldr_literal(PC, Ldebug); // call MacroAssembler::debug 1468 #endif // AARCH64 1469 1470 #if defined(COMPILER2) && defined(AARCH64) 1471 int off = offset(); 1472 #endif 1473 bind_literal(Lmsg); 1474 bind_literal(Ldebug); 1475 #if defined(COMPILER2) && defined(AARCH64) 1476 if (offset() - off == 2 * wordSize) { 1477 // no padding, so insert nop for worst-case sizing 1478 nop(); 1479 } 1480 #endif 1481 } 1482 1483 void MacroAssembler::warn(const char* msg) { 1484 #ifdef COMPILER1 1485 if (CommentedAssembly) { 1486 block_comment("warn"); 1487 } 1488 #endif 1489 1490 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); 1491 InlinedString Lmsg(msg); 1492 Label done; 1493 1494 int push_size = save_caller_save_registers(); 1495 1496 #ifdef AARCH64 1497 // TODO-AARCH64 - get rid of extra debug parameters 1498 mov(R1, LR); 1499 mov(R2, FP); 1500 add(R3, SP, push_size); 1501 #endif 1502 1503 ldr_literal(R0, Lmsg); // message 1504 ldr_literal(LR, Lwarn); // call warning 1505 1506 call(LR); 1507 1508 restore_caller_save_registers(); 1509 1510 b(done); 1511 bind_literal(Lmsg); 1512 bind_literal(Lwarn); 1513 bind(done); 1514 } 1515 1516 1517 int MacroAssembler::save_all_registers() { 1518 // This code pattern is matched in NativeIntruction::is_save_all_registers. 1519 // Update it at modifications. 1520 #ifdef AARCH64 1521 const Register tmp = Rtemp; 1522 raw_push(R30, ZR); 1523 for (int i = 28; i >= 0; i -= 2) { 1524 raw_push(as_Register(i), as_Register(i+1)); 1525 } 1526 mov_pc_to(tmp); 1527 str(tmp, Address(SP, 31*wordSize)); 1528 ldr(tmp, Address(SP, tmp->encoding()*wordSize)); 1529 return 32*wordSize; 1530 #else 1531 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); 1532 return 15*wordSize; 1533 #endif // AARCH64 1534 } 1535 1536 void MacroAssembler::restore_all_registers() { 1537 #ifdef AARCH64 1538 for (int i = 0; i <= 28; i += 2) { 1539 raw_pop(as_Register(i), as_Register(i+1)); 1540 } 1541 raw_pop(R30, ZR); 1542 #else 1543 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers 1544 add(SP, SP, wordSize); // discard saved PC 1545 #endif // AARCH64 1546 } 1547 1548 int MacroAssembler::save_caller_save_registers() { 1549 #ifdef AARCH64 1550 for (int i = 0; i <= 16; i += 2) { 1551 raw_push(as_Register(i), as_Register(i+1)); 1552 } 1553 raw_push(R18, LR); 1554 return 20*wordSize; 1555 #else 1556 #if R9_IS_SCRATCHED 1557 // Save also R10 to preserve alignment 1558 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1559 return 8*wordSize; 1560 #else 1561 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1562 return 6*wordSize; 1563 #endif 1564 #endif // AARCH64 1565 } 1566 1567 void MacroAssembler::restore_caller_save_registers() { 1568 #ifdef AARCH64 1569 raw_pop(R18, LR); 1570 for (int i = 16; i >= 0; i -= 2) { 1571 raw_pop(as_Register(i), as_Register(i+1)); 1572 } 1573 #else 1574 #if R9_IS_SCRATCHED 1575 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1576 #else 1577 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1578 #endif 1579 #endif // AARCH64 1580 } 1581 1582 void MacroAssembler::debug(const char* msg, const intx* registers) { 1583 // In order to get locks to work, we need to fake a in_VM state 1584 JavaThread* thread = JavaThread::current(); 1585 thread->set_thread_state(_thread_in_vm); 1586 1587 if (ShowMessageBoxOnError) { 1588 ttyLocker ttyl; 1589 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1590 BytecodeCounter::print(); 1591 } 1592 if (os::message_box(msg, "Execution stopped, print registers?")) { 1593 #ifdef AARCH64 1594 // saved registers: R0-R30, PC 1595 const int nregs = 32; 1596 #else 1597 // saved registers: R0-R12, LR, PC 1598 const int nregs = 15; 1599 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; 1600 #endif // AARCH64 1601 1602 for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { 1603 tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); 1604 } 1605 1606 #ifdef AARCH64 1607 tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); 1608 #endif // AARCH64 1609 1610 // derive original SP value from the address of register save area 1611 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); 1612 } 1613 BREAKPOINT; 1614 } else { 1615 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1616 } 1617 assert(false, "DEBUG MESSAGE: %s", msg); 1618 fatal("%s", msg); // returning from MacroAssembler::debug is not supported 1619 } 1620 1621 void MacroAssembler::unimplemented(const char* what) { 1622 const char* buf = NULL; 1623 { 1624 ResourceMark rm; 1625 stringStream ss; 1626 ss.print("unimplemented: %s", what); 1627 buf = code_string(ss.as_string()); 1628 } 1629 stop(buf); 1630 } 1631 1632 1633 // Implementation of FixedSizeCodeBlock 1634 1635 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : 1636 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { 1637 } 1638 1639 FixedSizeCodeBlock::~FixedSizeCodeBlock() { 1640 if (_enabled) { 1641 address curr_pc = _masm->pc(); 1642 1643 assert(_start < curr_pc, "invalid current pc"); 1644 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); 1645 1646 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; 1647 for (int i = 0; i < nops_count; i++) { 1648 _masm->nop(); 1649 } 1650 } 1651 } 1652 1653 #ifdef AARCH64 1654 1655 // Serializes memory. 1656 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM 1657 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { 1658 if (!os::is_MP()) return; 1659 1660 // TODO-AARCH64 investigate dsb vs dmb effects 1661 if (order_constraint == StoreStore) { 1662 dmb(DMB_st); 1663 } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { 1664 dmb(DMB_ld); 1665 } else { 1666 dmb(DMB_all); 1667 } 1668 } 1669 1670 #else 1671 1672 // Serializes memory. Potentially blows flags and reg. 1673 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) 1674 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. 1675 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. 1676 void MacroAssembler::membar(Membar_mask_bits order_constraint, 1677 Register tmp, 1678 bool preserve_flags, 1679 Register load_tgt) { 1680 if (!os::is_MP()) return; 1681 1682 if (order_constraint == StoreStore) { 1683 dmb(DMB_st, tmp); 1684 } else if ((order_constraint & StoreLoad) || 1685 (order_constraint & LoadLoad) || 1686 (order_constraint & StoreStore) || 1687 (load_tgt == noreg) || 1688 preserve_flags) { 1689 dmb(DMB_all, tmp); 1690 } else { 1691 // LoadStore: speculative stores reordeing is prohibited 1692 1693 // By providing an ordered load target register, we avoid an extra memory load reference 1694 Label not_taken; 1695 bind(not_taken); 1696 cmp(load_tgt, load_tgt); 1697 b(not_taken, ne); 1698 } 1699 } 1700 1701 #endif // AARCH64 1702 1703 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" 1704 // on failure, so fall-through can only mean success. 1705 // "one_shot" controls whether we loop and retry to mitigate spurious failures. 1706 // This is only needed for C2, which for some reason does not rety, 1707 // while C1/interpreter does. 1708 // TODO: measure if it makes a difference 1709 1710 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, 1711 Register base, Register tmp, Label &slow_case, 1712 bool allow_fallthrough_on_failure, bool one_shot) 1713 { 1714 1715 bool fallthrough_is_success = false; 1716 1717 // ARM Litmus Test example does prefetching here. 1718 // TODO: investigate if it helps performance 1719 1720 // The last store was to the displaced header, so to prevent 1721 // reordering we must issue a StoreStore or Release barrier before 1722 // the CAS store. 1723 1724 #ifdef AARCH64 1725 1726 Register Rscratch = tmp; 1727 Register Roop = base; 1728 Register mark = oldval; 1729 Register Rbox = newval; 1730 Label loop; 1731 1732 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1733 1734 // Instead of StoreStore here, we use store-release-exclusive below 1735 1736 bind(loop); 1737 1738 ldaxr(tmp, base); // acquire 1739 cmp(tmp, oldval); 1740 b(slow_case, ne); 1741 stlxr(tmp, newval, base); // release 1742 if (one_shot) { 1743 cmp_w(tmp, 0); 1744 } else { 1745 cbnz_w(tmp, loop); 1746 fallthrough_is_success = true; 1747 } 1748 1749 // MemBarAcquireLock would normally go here, but 1750 // we already do ldaxr+stlxr above, which has 1751 // Sequential Consistency 1752 1753 #else 1754 membar(MacroAssembler::StoreStore, noreg); 1755 1756 if (one_shot) { 1757 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1758 cmp(tmp, oldval); 1759 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1760 cmp(tmp, 0, eq); 1761 } else { 1762 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1763 } 1764 1765 // MemBarAcquireLock barrier 1766 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, 1767 // but that doesn't prevent a load or store from floating up between 1768 // the load and store in the CAS sequence, so play it safe and 1769 // do a full fence. 1770 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); 1771 #endif 1772 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1773 b(slow_case, ne); 1774 } 1775 } 1776 1777 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, 1778 Register base, Register tmp, Label &slow_case, 1779 bool allow_fallthrough_on_failure, bool one_shot) 1780 { 1781 1782 bool fallthrough_is_success = false; 1783 1784 assert_different_registers(oldval,newval,base,tmp); 1785 1786 #ifdef AARCH64 1787 Label loop; 1788 1789 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1790 1791 bind(loop); 1792 ldxr(tmp, base); 1793 cmp(tmp, oldval); 1794 b(slow_case, ne); 1795 // MemBarReleaseLock barrier 1796 stlxr(tmp, newval, base); 1797 if (one_shot) { 1798 cmp_w(tmp, 0); 1799 } else { 1800 cbnz_w(tmp, loop); 1801 fallthrough_is_success = true; 1802 } 1803 #else 1804 // MemBarReleaseLock barrier 1805 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, 1806 // but that doesn't prevent a load or store from floating down between 1807 // the load and store in the CAS sequence, so play it safe and 1808 // do a full fence. 1809 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); 1810 1811 if (one_shot) { 1812 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1813 cmp(tmp, oldval); 1814 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1815 cmp(tmp, 0, eq); 1816 } else { 1817 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1818 } 1819 #endif 1820 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1821 b(slow_case, ne); 1822 } 1823 1824 // ExitEnter 1825 // According to JSR-133 Cookbook, this should be StoreLoad, the same 1826 // barrier that follows volatile store. 1827 // TODO: Should be able to remove on armv8 if volatile loads 1828 // use the load-acquire instruction. 1829 membar(StoreLoad, noreg); 1830 } 1831 1832 #ifndef PRODUCT 1833 1834 // Preserves flags and all registers. 1835 // On SMP the updated value might not be visible to external observers without a sychronization barrier 1836 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { 1837 if (counter_addr != NULL) { 1838 InlinedAddress counter_addr_literal((address)counter_addr); 1839 Label done, retry; 1840 if (cond != al) { 1841 b(done, inverse(cond)); 1842 } 1843 1844 #ifdef AARCH64 1845 raw_push(R0, R1); 1846 raw_push(R2, ZR); 1847 1848 ldr_literal(R0, counter_addr_literal); 1849 1850 bind(retry); 1851 ldxr_w(R1, R0); 1852 add_w(R1, R1, 1); 1853 stxr_w(R2, R1, R0); 1854 cbnz_w(R2, retry); 1855 1856 raw_pop(R2, ZR); 1857 raw_pop(R0, R1); 1858 #else 1859 push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1860 ldr_literal(R0, counter_addr_literal); 1861 1862 mrs(CPSR, Rtemp); 1863 1864 bind(retry); 1865 ldr_s32(R1, Address(R0)); 1866 add(R2, R1, 1); 1867 atomic_cas_bool(R1, R2, R0, 0, R3); 1868 b(retry, ne); 1869 1870 msr(CPSR_fsxc, Rtemp); 1871 1872 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1873 #endif // AARCH64 1874 1875 b(done); 1876 bind_literal(counter_addr_literal); 1877 1878 bind(done); 1879 } 1880 } 1881 1882 #endif // !PRODUCT 1883 1884 1885 // Building block for CAS cases of biased locking: makes CAS and records statistics. 1886 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. 1887 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, 1888 Register tmp, Label& slow_case, int* counter_addr) { 1889 1890 cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); 1891 #ifdef ASSERT 1892 breakpoint(ne); // Fallthrough only on success 1893 #endif 1894 #ifndef PRODUCT 1895 if (counter_addr != NULL) { 1896 cond_atomic_inc32(al, counter_addr); 1897 } 1898 #endif // !PRODUCT 1899 } 1900 1901 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, 1902 bool swap_reg_contains_mark, 1903 Register tmp2, 1904 Label& done, Label& slow_case, 1905 BiasedLockingCounters* counters) { 1906 // obj_reg must be preserved (at least) if the bias locking fails 1907 // tmp_reg is a temporary register 1908 // swap_reg was used as a temporary but contained a value 1909 // that was used afterwards in some call pathes. Callers 1910 // have been fixed so that swap_reg no longer needs to be 1911 // saved. 1912 // Rtemp in no longer scratched 1913 1914 assert(UseBiasedLocking, "why call this otherwise?"); 1915 assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); 1916 guarantee(swap_reg!=tmp_reg, "invariant"); 1917 assert(tmp_reg != noreg, "must supply tmp_reg"); 1918 1919 #ifndef PRODUCT 1920 if (PrintBiasedLockingStatistics && (counters == NULL)) { 1921 counters = BiasedLocking::counters(); 1922 } 1923 #endif 1924 1925 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 1926 Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); 1927 1928 // Biased locking 1929 // See whether the lock is currently biased toward our thread and 1930 // whether the epoch is still valid 1931 // Note that the runtime guarantees sufficient alignment of JavaThread 1932 // pointers to allow age to be placed into low bits 1933 // First check to see whether biasing is even enabled for this object 1934 Label cas_label; 1935 1936 // The null check applies to the mark loading, if we need to load it. 1937 // If the mark has already been loaded in swap_reg then it has already 1938 // been performed and the offset is irrelevant. 1939 int null_check_offset = offset(); 1940 if (!swap_reg_contains_mark) { 1941 ldr(swap_reg, mark_addr); 1942 } 1943 1944 // On MP platform loads could return 'stale' values in some cases. 1945 // That is acceptable since either CAS or slow case path is taken in the worst case. 1946 1947 andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1948 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 1949 1950 b(cas_label, ne); 1951 1952 // The bias pattern is present in the object's header. Need to check 1953 // whether the bias owner and the epoch are both still current. 1954 load_klass(tmp_reg, obj_reg); 1955 ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 1956 orr(tmp_reg, tmp_reg, Rthread); 1957 eor(tmp_reg, tmp_reg, swap_reg); 1958 1959 #ifdef AARCH64 1960 ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); 1961 #else 1962 bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); 1963 #endif // AARCH64 1964 1965 #ifndef PRODUCT 1966 if (counters != NULL) { 1967 cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); 1968 } 1969 #endif // !PRODUCT 1970 1971 b(done, eq); 1972 1973 Label try_revoke_bias; 1974 Label try_rebias; 1975 1976 // At this point we know that the header has the bias pattern and 1977 // that we are not the bias owner in the current epoch. We need to 1978 // figure out more details about the state of the header in order to 1979 // know what operations can be legally performed on the object's 1980 // header. 1981 1982 // If the low three bits in the xor result aren't clear, that means 1983 // the prototype header is no longer biased and we have to revoke 1984 // the bias on this object. 1985 tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1986 b(try_revoke_bias, ne); 1987 1988 // Biasing is still enabled for this data type. See whether the 1989 // epoch of the current bias is still valid, meaning that the epoch 1990 // bits of the mark word are equal to the epoch bits of the 1991 // prototype header. (Note that the prototype header's epoch bits 1992 // only change at a safepoint.) If not, attempt to rebias the object 1993 // toward the current thread. Note that we must be absolutely sure 1994 // that the current epoch is invalid in order to do this because 1995 // otherwise the manipulations it performs on the mark word are 1996 // illegal. 1997 tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); 1998 b(try_rebias, ne); 1999 2000 // tmp_reg has the age, epoch and pattern bits cleared 2001 // The remaining (owner) bits are (Thread ^ current_owner) 2002 2003 // The epoch of the current bias is still valid but we know nothing 2004 // about the owner; it might be set or it might be clear. Try to 2005 // acquire the bias of the object using an atomic operation. If this 2006 // fails we will go in to the runtime to revoke the object's bias. 2007 // Note that we first construct the presumed unbiased header so we 2008 // don't accidentally blow away another thread's valid bias. 2009 2010 // Note that we know the owner is not ourself. Hence, success can 2011 // only happen when the owner bits is 0 2012 2013 #ifdef AARCH64 2014 // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has 2015 // cleared bit in the middle (cms bit). So it is loaded with separate instruction. 2016 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2017 andr(swap_reg, swap_reg, tmp2); 2018 #else 2019 // until the assembler can be made smarter, we need to make some assumptions about the values 2020 // so we can optimize this: 2021 assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); 2022 2023 mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); 2024 mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) 2025 #endif // AARCH64 2026 2027 orr(tmp_reg, swap_reg, Rthread); // new mark 2028 2029 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2030 (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); 2031 2032 // If the biasing toward our thread failed, this means that 2033 // another thread succeeded in biasing it toward itself and we 2034 // need to revoke that bias. The revocation will occur in the 2035 // interpreter runtime in the slow case. 2036 2037 b(done); 2038 2039 bind(try_rebias); 2040 2041 // At this point we know the epoch has expired, meaning that the 2042 // current "bias owner", if any, is actually invalid. Under these 2043 // circumstances _only_, we are allowed to use the current header's 2044 // value as the comparison value when doing the cas to acquire the 2045 // bias in the current epoch. In other words, we allow transfer of 2046 // the bias from one thread to another directly in this situation. 2047 2048 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2049 2050 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2051 2052 // owner bits 'random'. Set them to Rthread. 2053 #ifdef AARCH64 2054 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2055 andr(tmp_reg, tmp_reg, tmp2); 2056 #else 2057 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2058 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2059 #endif // AARCH64 2060 2061 orr(tmp_reg, tmp_reg, Rthread); // new mark 2062 2063 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2064 (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); 2065 2066 // If the biasing toward our thread failed, then another thread 2067 // succeeded in biasing it toward itself and we need to revoke that 2068 // bias. The revocation will occur in the runtime in the slow case. 2069 2070 b(done); 2071 2072 bind(try_revoke_bias); 2073 2074 // The prototype mark in the klass doesn't have the bias bit set any 2075 // more, indicating that objects of this data type are not supposed 2076 // to be biased any more. We are going to try to reset the mark of 2077 // this object to the prototype value and fall through to the 2078 // CAS-based locking scheme. Note that if our CAS fails, it means 2079 // that another thread raced us for the privilege of revoking the 2080 // bias of this particular object, so it's okay to continue in the 2081 // normal locking code. 2082 2083 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2084 2085 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2086 2087 // owner bits 'random'. Clear them 2088 #ifdef AARCH64 2089 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2090 andr(tmp_reg, tmp_reg, tmp2); 2091 #else 2092 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2093 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2094 #endif // AARCH64 2095 2096 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, 2097 (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); 2098 2099 // Fall through to the normal CAS-based lock, because no matter what 2100 // the result of the above CAS, some thread must have succeeded in 2101 // removing the bias bit from the object's header. 2102 2103 bind(cas_label); 2104 2105 return null_check_offset; 2106 } 2107 2108 2109 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { 2110 assert(UseBiasedLocking, "why call this otherwise?"); 2111 2112 // Check for biased locking unlock case, which is a no-op 2113 // Note: we do not have to check the thread ID for two reasons. 2114 // First, the interpreter checks for IllegalMonitorStateException at 2115 // a higher level. Second, if the bias was revoked while we held the 2116 // lock, the object could not be rebiased toward another thread, so 2117 // the bias bit would be clear. 2118 ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2119 2120 andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2121 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 2122 b(done, eq); 2123 } 2124 2125 2126 void MacroAssembler::resolve_jobject(Register value, 2127 Register tmp1, 2128 Register tmp2) { 2129 assert_different_registers(value, tmp1, tmp2); 2130 Label done, not_weak; 2131 cbz(value, done); // Use NULL as-is. 2132 STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); 2133 tbz(value, 0, not_weak); // Test for jweak tag. 2134 // Resolve jweak. 2135 ldr(value, Address(value, -JNIHandles::weak_tag_value)); 2136 verify_oop(value); 2137 #if INCLUDE_ALL_GCS 2138 if (UseG1GC) { 2139 g1_write_barrier_pre(noreg, // store_addr 2140 noreg, // new_val 2141 value, // pre_val 2142 tmp1, // tmp1 2143 tmp2); // tmp2 2144 } 2145 #endif // INCLUDE_ALL_GCS 2146 b(done); 2147 bind(not_weak); 2148 // Resolve (untagged) jobject. 2149 ldr(value, Address(value)); 2150 verify_oop(value); 2151 bind(done); 2152 } 2153 2154 2155 ////////////////////////////////////////////////////////////////////////////////// 2156 2157 #if INCLUDE_ALL_GCS 2158 2159 // G1 pre-barrier. 2160 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 2161 // If store_addr != noreg, then previous value is loaded from [store_addr]; 2162 // in such case store_addr and new_val registers are preserved; 2163 // otherwise pre_val register is preserved. 2164 void MacroAssembler::g1_write_barrier_pre(Register store_addr, 2165 Register new_val, 2166 Register pre_val, 2167 Register tmp1, 2168 Register tmp2) { 2169 Label done; 2170 Label runtime; 2171 2172 if (store_addr != noreg) { 2173 assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); 2174 } else { 2175 assert (new_val == noreg, "should be"); 2176 assert_different_registers(pre_val, tmp1, tmp2, noreg); 2177 } 2178 2179 Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); 2180 Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); 2181 Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); 2182 2183 // Is marking active? 2184 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); 2185 ldrb(tmp1, in_progress); 2186 cbz(tmp1, done); 2187 2188 // Do we need to load the previous value? 2189 if (store_addr != noreg) { 2190 load_heap_oop(pre_val, Address(store_addr, 0)); 2191 } 2192 2193 // Is the previous value null? 2194 cbz(pre_val, done); 2195 2196 // Can we store original value in the thread's buffer? 2197 // Is index == 0? 2198 // (The index field is typed as size_t.) 2199 2200 ldr(tmp1, index); // tmp1 := *index_adr 2201 ldr(tmp2, buffer); 2202 2203 subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize 2204 b(runtime, lt); // If negative, goto runtime 2205 2206 str(tmp1, index); // *index_adr := tmp1 2207 2208 // Record the previous value 2209 str(pre_val, Address(tmp2, tmp1)); 2210 b(done); 2211 2212 bind(runtime); 2213 2214 // save the live input values 2215 #ifdef AARCH64 2216 if (store_addr != noreg) { 2217 raw_push(store_addr, new_val); 2218 } else { 2219 raw_push(pre_val, ZR); 2220 } 2221 #else 2222 if (store_addr != noreg) { 2223 // avoid raw_push to support any ordering of store_addr and new_val 2224 push(RegisterSet(store_addr) | RegisterSet(new_val)); 2225 } else { 2226 push(pre_val); 2227 } 2228 #endif // AARCH64 2229 2230 if (pre_val != R0) { 2231 mov(R0, pre_val); 2232 } 2233 mov(R1, Rthread); 2234 2235 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); 2236 2237 #ifdef AARCH64 2238 if (store_addr != noreg) { 2239 raw_pop(store_addr, new_val); 2240 } else { 2241 raw_pop(pre_val, ZR); 2242 } 2243 #else 2244 if (store_addr != noreg) { 2245 pop(RegisterSet(store_addr) | RegisterSet(new_val)); 2246 } else { 2247 pop(pre_val); 2248 } 2249 #endif // AARCH64 2250 2251 bind(done); 2252 } 2253 2254 // G1 post-barrier. 2255 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 2256 void MacroAssembler::g1_write_barrier_post(Register store_addr, 2257 Register new_val, 2258 Register tmp1, 2259 Register tmp2, 2260 Register tmp3) { 2261 2262 Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); 2263 Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); 2264 2265 BarrierSet* bs = BarrierSet::barrier_set(); 2266 CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs); 2267 CardTable* ct = ctbs->card_table(); 2268 Label done; 2269 Label runtime; 2270 2271 // Does store cross heap regions? 2272 2273 eor(tmp1, store_addr, new_val); 2274 #ifdef AARCH64 2275 logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); 2276 cbz(tmp1, done); 2277 #else 2278 movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); 2279 b(done, eq); 2280 #endif 2281 2282 // crosses regions, storing NULL? 2283 2284 cbz(new_val, done); 2285 2286 // storing region crossing non-NULL, is card already dirty? 2287 const Register card_addr = tmp1; 2288 assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); 2289 2290 mov_address(tmp2, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference); 2291 add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift)); 2292 2293 ldrb(tmp2, Address(card_addr)); 2294 cmp(tmp2, (int)G1CardTable::g1_young_card_val()); 2295 b(done, eq); 2296 2297 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); 2298 2299 assert(CardTable::dirty_card_val() == 0, "adjust this code"); 2300 ldrb(tmp2, Address(card_addr)); 2301 cbz(tmp2, done); 2302 2303 // storing a region crossing, non-NULL oop, card is clean. 2304 // dirty card and log. 2305 2306 strb(zero_register(tmp2), Address(card_addr)); 2307 2308 ldr(tmp2, queue_index); 2309 ldr(tmp3, buffer); 2310 2311 subs(tmp2, tmp2, wordSize); 2312 b(runtime, lt); // go to runtime if now negative 2313 2314 str(tmp2, queue_index); 2315 2316 str(card_addr, Address(tmp3, tmp2)); 2317 b(done); 2318 2319 bind(runtime); 2320 2321 if (card_addr != R0) { 2322 mov(R0, card_addr); 2323 } 2324 mov(R1, Rthread); 2325 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); 2326 2327 bind(done); 2328 } 2329 2330 #endif // INCLUDE_ALL_GCS 2331 2332 ////////////////////////////////////////////////////////////////////////////////// 2333 2334 #ifdef AARCH64 2335 2336 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2337 switch (size_in_bytes) { 2338 case 8: ldr(dst, src); break; 2339 case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; 2340 case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; 2341 case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; 2342 default: ShouldNotReachHere(); 2343 } 2344 } 2345 2346 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 2347 switch (size_in_bytes) { 2348 case 8: str(src, dst); break; 2349 case 4: str_32(src, dst); break; 2350 case 2: strh(src, dst); break; 2351 case 1: strb(src, dst); break; 2352 default: ShouldNotReachHere(); 2353 } 2354 } 2355 2356 #else 2357 2358 void MacroAssembler::load_sized_value(Register dst, Address src, 2359 size_t size_in_bytes, bool is_signed, AsmCondition cond) { 2360 switch (size_in_bytes) { 2361 case 4: ldr(dst, src, cond); break; 2362 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; 2363 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; 2364 default: ShouldNotReachHere(); 2365 } 2366 } 2367 2368 2369 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { 2370 switch (size_in_bytes) { 2371 case 4: str(src, dst, cond); break; 2372 case 2: strh(src, dst, cond); break; 2373 case 1: strb(src, dst, cond); break; 2374 default: ShouldNotReachHere(); 2375 } 2376 } 2377 #endif // AARCH64 2378 2379 // Look up the method for a megamorphic invokeinterface call. 2380 // The target method is determined by <Rinterf, Rindex>. 2381 // The receiver klass is in Rklass. 2382 // On success, the result will be in method_result, and execution falls through. 2383 // On failure, execution transfers to the given label. 2384 void MacroAssembler::lookup_interface_method(Register Rklass, 2385 Register Rintf, 2386 RegisterOrConstant itable_index, 2387 Register method_result, 2388 Register Rscan, 2389 Register Rtmp, 2390 Label& L_no_such_interface) { 2391 2392 assert_different_registers(Rklass, Rintf, Rscan, Rtmp); 2393 2394 const int entry_size = itableOffsetEntry::size() * HeapWordSize; 2395 assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience"); 2396 2397 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 2398 const int base = in_bytes(Klass::vtable_start_offset()); 2399 const int scale = exact_log2(vtableEntry::size_in_bytes()); 2400 ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable 2401 add(Rscan, Rklass, base); 2402 add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale)); 2403 2404 // Search through the itable for an interface equal to incoming Rintf 2405 // itable looks like [intface][offset][intface][offset][intface][offset] 2406 2407 Label loop; 2408 bind(loop); 2409 ldr(Rtmp, Address(Rscan, entry_size, post_indexed)); 2410 #ifdef AARCH64 2411 Label found; 2412 cmp(Rtmp, Rintf); 2413 b(found, eq); 2414 cbnz(Rtmp, loop); 2415 #else 2416 cmp(Rtmp, Rintf); // set ZF and CF if interface is found 2417 cmn(Rtmp, 0, ne); // check if tmp == 0 and clear CF if it is 2418 b(loop, ne); 2419 #endif // AARCH64 2420 2421 #ifdef AARCH64 2422 b(L_no_such_interface); 2423 bind(found); 2424 #else 2425 // CF == 0 means we reached the end of itable without finding icklass 2426 b(L_no_such_interface, cc); 2427 #endif // !AARCH64 2428 2429 if (method_result != noreg) { 2430 // Interface found at previous position of Rscan, now load the method 2431 ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size)); 2432 if (itable_index.is_register()) { 2433 add(Rtmp, Rtmp, Rklass); // Add offset to Klass* 2434 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); 2435 assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); 2436 ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register())); 2437 } else { 2438 int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() + 2439 itableMethodEntry::method_offset_in_bytes(); 2440 add_slow(method_result, Rklass, method_offset); 2441 ldr(method_result, Address(method_result, Rtmp)); 2442 } 2443 } 2444 } 2445 2446 #ifdef COMPILER2 2447 // TODO: 8 bytes at a time? pre-fetch? 2448 // Compare char[] arrays aligned to 4 bytes. 2449 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, 2450 Register limit, Register result, 2451 Register chr1, Register chr2, Label& Ldone) { 2452 Label Lvector, Lloop; 2453 2454 // Note: limit contains number of bytes (2*char_elements) != 0. 2455 tst(limit, 0x2); // trailing character ? 2456 b(Lvector, eq); 2457 2458 // compare the trailing char 2459 sub(limit, limit, sizeof(jchar)); 2460 ldrh(chr1, Address(ary1, limit)); 2461 ldrh(chr2, Address(ary2, limit)); 2462 cmp(chr1, chr2); 2463 mov(result, 0, ne); // not equal 2464 b(Ldone, ne); 2465 2466 // only one char ? 2467 tst(limit, limit); 2468 mov(result, 1, eq); 2469 b(Ldone, eq); 2470 2471 // word by word compare, dont't need alignment check 2472 bind(Lvector); 2473 2474 // Shift ary1 and ary2 to the end of the arrays, negate limit 2475 add(ary1, limit, ary1); 2476 add(ary2, limit, ary2); 2477 neg(limit, limit); 2478 2479 bind(Lloop); 2480 ldr_u32(chr1, Address(ary1, limit)); 2481 ldr_u32(chr2, Address(ary2, limit)); 2482 cmp_32(chr1, chr2); 2483 mov(result, 0, ne); // not equal 2484 b(Ldone, ne); 2485 adds(limit, limit, 2*sizeof(jchar)); 2486 b(Lloop, ne); 2487 2488 // Caller should set it: 2489 // mov(result_reg, 1); //equal 2490 } 2491 #endif 2492 2493 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { 2494 mov_slow(tmpreg1, counter_addr); 2495 ldr_s32(tmpreg2, tmpreg1); 2496 add_32(tmpreg2, tmpreg2, 1); 2497 str_32(tmpreg2, tmpreg1); 2498 } 2499 2500 void MacroAssembler::floating_cmp(Register dst) { 2501 #ifdef AARCH64 2502 NOT_TESTED(); 2503 cset(dst, gt); // 1 if '>', else 0 2504 csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 2505 #else 2506 vmrs(dst, FPSCR); 2507 orr(dst, dst, 0x08000000); 2508 eor(dst, dst, AsmOperand(dst, lsl, 3)); 2509 mov(dst, AsmOperand(dst, asr, 30)); 2510 #endif 2511 } 2512 2513 void MacroAssembler::restore_default_fp_mode() { 2514 #ifdef AARCH64 2515 msr(SysReg_FPCR, ZR); 2516 #else 2517 #ifndef __SOFTFP__ 2518 // Round to Near mode, IEEE compatible, masked exceptions 2519 mov(Rtemp, 0); 2520 vmsr(FPSCR, Rtemp); 2521 #endif // !__SOFTFP__ 2522 #endif // AARCH64 2523 } 2524 2525 #ifndef AARCH64 2526 // 24-bit word range == 26-bit byte range 2527 bool check26(int offset) { 2528 // this could be simplified, but it mimics encoding and decoding 2529 // an actual branch insrtuction 2530 int off1 = offset << 6 >> 8; 2531 int encoded = off1 & ((1<<24)-1); 2532 int decoded = encoded << 8 >> 6; 2533 return offset == decoded; 2534 } 2535 #endif // !AARCH64 2536 2537 // Perform some slight adjustments so the default 32MB code cache 2538 // is fully reachable. 2539 static inline address first_cache_address() { 2540 return CodeCache::low_bound() + sizeof(HeapBlock::Header); 2541 } 2542 static inline address last_cache_address() { 2543 return CodeCache::high_bound() - Assembler::InstructionSize; 2544 } 2545 2546 #ifdef AARCH64 2547 // Can we reach target using ADRP? 2548 bool MacroAssembler::page_reachable_from_cache(address target) { 2549 intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; 2550 intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; 2551 intptr_t addr = (intptr_t)target & ~0xfff; 2552 2553 intptr_t loffset = addr - cl; 2554 intptr_t hoffset = addr - ch; 2555 return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); 2556 } 2557 #endif 2558 2559 // Can we reach target using unconditional branch or call from anywhere 2560 // in the code cache (because code can be relocated)? 2561 bool MacroAssembler::_reachable_from_cache(address target) { 2562 #ifdef __thumb__ 2563 if ((1 & (intptr_t)target) != 0) { 2564 // Return false to avoid 'b' if we need switching to THUMB mode. 2565 return false; 2566 } 2567 #endif 2568 2569 address cl = first_cache_address(); 2570 address ch = last_cache_address(); 2571 2572 if (ForceUnreachable) { 2573 // Only addresses from CodeCache can be treated as reachable. 2574 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { 2575 return false; 2576 } 2577 } 2578 2579 intptr_t loffset = (intptr_t)target - (intptr_t)cl; 2580 intptr_t hoffset = (intptr_t)target - (intptr_t)ch; 2581 2582 #ifdef AARCH64 2583 return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); 2584 #else 2585 return check26(loffset - 8) && check26(hoffset - 8); 2586 #endif 2587 } 2588 2589 bool MacroAssembler::reachable_from_cache(address target) { 2590 assert(CodeCache::contains(pc()), "not supported"); 2591 return _reachable_from_cache(target); 2592 } 2593 2594 // Can we reach the entire code cache from anywhere else in the code cache? 2595 bool MacroAssembler::_cache_fully_reachable() { 2596 address cl = first_cache_address(); 2597 address ch = last_cache_address(); 2598 return _reachable_from_cache(cl) && _reachable_from_cache(ch); 2599 } 2600 2601 bool MacroAssembler::cache_fully_reachable() { 2602 assert(CodeCache::contains(pc()), "not supported"); 2603 return _cache_fully_reachable(); 2604 } 2605 2606 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2607 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2608 if (reachable_from_cache(target)) { 2609 relocate(rtype); 2610 b(target NOT_AARCH64_ARG(cond)); 2611 return; 2612 } 2613 2614 // Note: relocate is not needed for the code below, 2615 // encoding targets in absolute format. 2616 if (ignore_non_patchable_relocations()) { 2617 rtype = relocInfo::none; 2618 } 2619 2620 #ifdef AARCH64 2621 assert (scratch != noreg, "should be specified"); 2622 InlinedAddress address_literal(target, rtype); 2623 ldr_literal(scratch, address_literal); 2624 br(scratch); 2625 int off = offset(); 2626 bind_literal(address_literal); 2627 #ifdef COMPILER2 2628 if (offset() - off == wordSize) { 2629 // no padding, so insert nop for worst-case sizing 2630 nop(); 2631 } 2632 #endif 2633 #else 2634 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { 2635 // Note: this version cannot be (atomically) patched 2636 mov_slow(scratch, (intptr_t)target, cond); 2637 bx(scratch, cond); 2638 } else { 2639 Label skip; 2640 InlinedAddress address_literal(target); 2641 if (cond != al) { 2642 b(skip, inverse(cond)); 2643 } 2644 relocate(rtype); 2645 ldr_literal(PC, address_literal); 2646 bind_literal(address_literal); 2647 bind(skip); 2648 } 2649 #endif // AARCH64 2650 } 2651 2652 // Similar to jump except that: 2653 // - near calls are valid only if any destination in the cache is near 2654 // - no movt/movw (not atomically patchable) 2655 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2656 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2657 if (cache_fully_reachable()) { 2658 // Note: this assumes that all possible targets (the initial one 2659 // and the addressed patched to) are all in the code cache. 2660 assert(CodeCache::contains(target), "target might be too far"); 2661 relocate(rtype); 2662 b(target NOT_AARCH64_ARG(cond)); 2663 return; 2664 } 2665 2666 // Discard the relocation information if not needed for CacheCompiledCode 2667 // since the next encodings are all in absolute format. 2668 if (ignore_non_patchable_relocations()) { 2669 rtype = relocInfo::none; 2670 } 2671 2672 #ifdef AARCH64 2673 assert (scratch != noreg, "should be specified"); 2674 InlinedAddress address_literal(target); 2675 relocate(rtype); 2676 ldr_literal(scratch, address_literal); 2677 br(scratch); 2678 int off = offset(); 2679 bind_literal(address_literal); 2680 #ifdef COMPILER2 2681 if (offset() - off == wordSize) { 2682 // no padding, so insert nop for worst-case sizing 2683 nop(); 2684 } 2685 #endif 2686 #else 2687 { 2688 Label skip; 2689 InlinedAddress address_literal(target); 2690 if (cond != al) { 2691 b(skip, inverse(cond)); 2692 } 2693 relocate(rtype); 2694 ldr_literal(PC, address_literal); 2695 bind_literal(address_literal); 2696 bind(skip); 2697 } 2698 #endif // AARCH64 2699 } 2700 2701 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { 2702 Register scratch = LR; 2703 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); 2704 if (reachable_from_cache(target)) { 2705 relocate(rspec); 2706 bl(target NOT_AARCH64_ARG(cond)); 2707 return; 2708 } 2709 2710 // Note: relocate is not needed for the code below, 2711 // encoding targets in absolute format. 2712 if (ignore_non_patchable_relocations()) { 2713 // This assumes the information was needed only for relocating the code. 2714 rspec = RelocationHolder::none; 2715 } 2716 2717 #ifndef AARCH64 2718 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { 2719 // Note: this version cannot be (atomically) patched 2720 mov_slow(scratch, (intptr_t)target, cond); 2721 blx(scratch, cond); 2722 return; 2723 } 2724 #endif 2725 2726 { 2727 Label ret_addr; 2728 #ifndef AARCH64 2729 if (cond != al) { 2730 b(ret_addr, inverse(cond)); 2731 } 2732 #endif 2733 2734 2735 #ifdef AARCH64 2736 // TODO-AARCH64: make more optimal implementation 2737 // [ Keep in sync with MacroAssembler::call_size ] 2738 assert(rspec.type() == relocInfo::none, "call reloc not implemented"); 2739 mov_slow(scratch, target); 2740 blr(scratch); 2741 #else 2742 InlinedAddress address_literal(target); 2743 relocate(rspec); 2744 adr(LR, ret_addr); 2745 ldr_literal(PC, address_literal); 2746 2747 bind_literal(address_literal); 2748 bind(ret_addr); 2749 #endif 2750 } 2751 } 2752 2753 #if defined(AARCH64) && defined(COMPILER2) 2754 int MacroAssembler::call_size(address target, bool far, bool patchable) { 2755 // FIXME: mov_slow is variable-length 2756 if (!far) return 1; // bl 2757 if (patchable) return 2; // ldr; blr 2758 return instr_count_for_mov_slow((intptr_t)target) + 1; 2759 } 2760 #endif 2761 2762 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { 2763 assert(rspec.type() == relocInfo::static_call_type || 2764 rspec.type() == relocInfo::none || 2765 rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); 2766 2767 // Always generate the relocation information, needed for patching 2768 relocate(rspec); // used by NativeCall::is_call_before() 2769 if (cache_fully_reachable()) { 2770 // Note: this assumes that all possible targets (the initial one 2771 // and the addresses patched to) are all in the code cache. 2772 assert(CodeCache::contains(target), "target might be too far"); 2773 bl(target); 2774 } else { 2775 #if defined(AARCH64) && defined(COMPILER2) 2776 if (c2) { 2777 // return address needs to match call_size(). 2778 // no need to trash Rtemp 2779 int off = offset(); 2780 Label skip_literal; 2781 InlinedAddress address_literal(target); 2782 ldr_literal(LR, address_literal); 2783 blr(LR); 2784 int ret_addr_offset = offset(); 2785 assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); 2786 b(skip_literal); 2787 int off2 = offset(); 2788 bind_literal(address_literal); 2789 if (offset() - off2 == wordSize) { 2790 // no padding, so insert nop for worst-case sizing 2791 nop(); 2792 } 2793 bind(skip_literal); 2794 return ret_addr_offset; 2795 } 2796 #endif 2797 Label ret_addr; 2798 InlinedAddress address_literal(target); 2799 #ifdef AARCH64 2800 ldr_literal(Rtemp, address_literal); 2801 adr(LR, ret_addr); 2802 br(Rtemp); 2803 #else 2804 adr(LR, ret_addr); 2805 ldr_literal(PC, address_literal); 2806 #endif 2807 bind_literal(address_literal); 2808 bind(ret_addr); 2809 } 2810 return offset(); 2811 } 2812 2813 // ((OopHandle)result).resolve(); 2814 void MacroAssembler::resolve_oop_handle(Register result) { 2815 // OopHandle::resolve is an indirection. 2816 ldr(result, Address(result, 0)); 2817 } 2818 2819 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { 2820 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2821 ldr(tmp, Address(method, Method::const_offset())); 2822 ldr(tmp, Address(tmp, ConstMethod::constants_offset())); 2823 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); 2824 ldr(mirror, Address(tmp, mirror_offset)); 2825 resolve_oop_handle(mirror); 2826 } 2827 2828 2829 /////////////////////////////////////////////////////////////////////////////// 2830 2831 // Compressed pointers 2832 2833 #ifdef AARCH64 2834 2835 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { 2836 if (UseCompressedClassPointers) { 2837 ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2838 decode_klass_not_null(dst_klass); 2839 } else { 2840 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2841 } 2842 } 2843 2844 #else 2845 2846 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { 2847 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); 2848 } 2849 2850 #endif // AARCH64 2851 2852 // Blows src_klass. 2853 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { 2854 #ifdef AARCH64 2855 if (UseCompressedClassPointers) { 2856 assert(src_klass != dst_oop, "not enough registers"); 2857 encode_klass_not_null(src_klass); 2858 str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2859 return; 2860 } 2861 #endif // AARCH64 2862 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2863 } 2864 2865 #ifdef AARCH64 2866 2867 void MacroAssembler::store_klass_gap(Register dst) { 2868 if (UseCompressedClassPointers) { 2869 str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2870 } 2871 } 2872 2873 #endif // AARCH64 2874 2875 2876 void MacroAssembler::load_heap_oop(Register dst, Address src) { 2877 #ifdef AARCH64 2878 if (UseCompressedOops) { 2879 ldr_w(dst, src); 2880 decode_heap_oop(dst); 2881 return; 2882 } 2883 #endif // AARCH64 2884 ldr(dst, src); 2885 } 2886 2887 // Blows src and flags. 2888 void MacroAssembler::store_heap_oop(Register src, Address dst) { 2889 #ifdef AARCH64 2890 if (UseCompressedOops) { 2891 assert(!dst.uses(src), "not enough registers"); 2892 encode_heap_oop(src); 2893 str_w(src, dst); 2894 return; 2895 } 2896 #endif // AARCH64 2897 str(src, dst); 2898 } 2899 2900 void MacroAssembler::store_heap_oop_null(Register src, Address dst) { 2901 #ifdef AARCH64 2902 if (UseCompressedOops) { 2903 str_w(src, dst); 2904 return; 2905 } 2906 #endif // AARCH64 2907 str(src, dst); 2908 } 2909 2910 2911 #ifdef AARCH64 2912 2913 // Algorithm must match oop.inline.hpp encode_heap_oop. 2914 void MacroAssembler::encode_heap_oop(Register dst, Register src) { 2915 // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. 2916 // Update it at modifications. 2917 assert (UseCompressedOops, "must be compressed"); 2918 assert (Universe::heap() != NULL, "java heap should be initialized"); 2919 #ifdef ASSERT 2920 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2921 #endif 2922 verify_oop(src); 2923 if (Universe::narrow_oop_base() == NULL) { 2924 if (Universe::narrow_oop_shift() != 0) { 2925 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2926 _lsr(dst, src, Universe::narrow_oop_shift()); 2927 } else if (dst != src) { 2928 mov(dst, src); 2929 } 2930 } else { 2931 tst(src, src); 2932 csel(dst, Rheap_base, src, eq); 2933 sub(dst, dst, Rheap_base); 2934 if (Universe::narrow_oop_shift() != 0) { 2935 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2936 _lsr(dst, dst, Universe::narrow_oop_shift()); 2937 } 2938 } 2939 } 2940 2941 // Same algorithm as oop.inline.hpp decode_heap_oop. 2942 void MacroAssembler::decode_heap_oop(Register dst, Register src) { 2943 #ifdef ASSERT 2944 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2945 #endif 2946 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2947 if (Universe::narrow_oop_base() != NULL) { 2948 tst(src, src); 2949 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2950 csel(dst, dst, ZR, ne); 2951 } else { 2952 _lsl(dst, src, Universe::narrow_oop_shift()); 2953 } 2954 verify_oop(dst); 2955 } 2956 2957 #ifdef COMPILER2 2958 // Algorithm must match oop.inline.hpp encode_heap_oop. 2959 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule 2960 // must be changed. 2961 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2962 assert (UseCompressedOops, "must be compressed"); 2963 assert (Universe::heap() != NULL, "java heap should be initialized"); 2964 #ifdef ASSERT 2965 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2966 #endif 2967 verify_oop(src); 2968 if (Universe::narrow_oop_base() == NULL) { 2969 if (Universe::narrow_oop_shift() != 0) { 2970 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2971 _lsr(dst, src, Universe::narrow_oop_shift()); 2972 } else if (dst != src) { 2973 mov(dst, src); 2974 } 2975 } else { 2976 sub(dst, src, Rheap_base); 2977 if (Universe::narrow_oop_shift() != 0) { 2978 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2979 _lsr(dst, dst, Universe::narrow_oop_shift()); 2980 } 2981 } 2982 } 2983 2984 // Same algorithm as oops.inline.hpp decode_heap_oop. 2985 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule 2986 // must be changed. 2987 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2988 #ifdef ASSERT 2989 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2990 #endif 2991 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2992 if (Universe::narrow_oop_base() != NULL) { 2993 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2994 } else { 2995 _lsl(dst, src, Universe::narrow_oop_shift()); 2996 } 2997 verify_oop(dst); 2998 } 2999 3000 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3001 assert(UseCompressedClassPointers, "should only be used for compressed header"); 3002 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3003 int klass_index = oop_recorder()->find_index(k); 3004 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 3005 3006 // Relocation with special format (see relocInfo_arm.hpp). 3007 relocate(rspec); 3008 narrowKlass encoded_k = Klass::encode_klass(k); 3009 movz(dst, encoded_k & 0xffff, 0); 3010 movk(dst, (encoded_k >> 16) & 0xffff, 16); 3011 } 3012 3013 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3014 assert(UseCompressedOops, "should only be used for compressed header"); 3015 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3016 int oop_index = oop_recorder()->find_index(obj); 3017 RelocationHolder rspec = oop_Relocation::spec(oop_index); 3018 3019 relocate(rspec); 3020 movz(dst, 0xffff, 0); 3021 movk(dst, 0xffff, 16); 3022 } 3023 3024 #endif // COMPILER2 3025 // Must preserve condition codes, or C2 encodeKlass_not_null rule 3026 // must be changed. 3027 void MacroAssembler::encode_klass_not_null(Register r) { 3028 if (Universe::narrow_klass_base() != NULL) { 3029 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 3030 assert(r != Rheap_base, "Encoding a klass in Rheap_base"); 3031 mov_slow(Rheap_base, Universe::narrow_klass_base()); 3032 sub(r, r, Rheap_base); 3033 } 3034 if (Universe::narrow_klass_shift() != 0) { 3035 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3036 _lsr(r, r, Universe::narrow_klass_shift()); 3037 } 3038 if (Universe::narrow_klass_base() != NULL) { 3039 reinit_heapbase(); 3040 } 3041 } 3042 3043 // Must preserve condition codes, or C2 encodeKlass_not_null rule 3044 // must be changed. 3045 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3046 if (dst == src) { 3047 encode_klass_not_null(src); 3048 return; 3049 } 3050 if (Universe::narrow_klass_base() != NULL) { 3051 mov_slow(dst, (int64_t)Universe::narrow_klass_base()); 3052 sub(dst, src, dst); 3053 if (Universe::narrow_klass_shift() != 0) { 3054 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3055 _lsr(dst, dst, Universe::narrow_klass_shift()); 3056 } 3057 } else { 3058 if (Universe::narrow_klass_shift() != 0) { 3059 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3060 _lsr(dst, src, Universe::narrow_klass_shift()); 3061 } else { 3062 mov(dst, src); 3063 } 3064 } 3065 } 3066 3067 // Function instr_count_for_decode_klass_not_null() counts the instructions 3068 // generated by decode_klass_not_null(register r) and reinit_heapbase(), 3069 // when (Universe::heap() != NULL). Hence, if the instructions they 3070 // generate change, then this method needs to be updated. 3071 int MacroAssembler::instr_count_for_decode_klass_not_null() { 3072 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3073 assert(Universe::heap() != NULL, "java heap should be initialized"); 3074 if (Universe::narrow_klass_base() != NULL) { 3075 return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow 3076 1 + // add 3077 instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow 3078 } else { 3079 if (Universe::narrow_klass_shift() != 0) { 3080 return 1; 3081 } 3082 } 3083 return 0; 3084 } 3085 3086 // Must preserve condition codes, or C2 decodeKlass_not_null rule 3087 // must be changed. 3088 void MacroAssembler::decode_klass_not_null(Register r) { 3089 int off = offset(); 3090 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 3091 assert(Universe::heap() != NULL, "java heap should be initialized"); 3092 assert(r != Rheap_base, "Decoding a klass in Rheap_base"); 3093 // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. 3094 // Also do not verify_oop as this is called by verify_oop. 3095 if (Universe::narrow_klass_base() != NULL) { 3096 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 3097 mov_slow(Rheap_base, Universe::narrow_klass_base()); 3098 add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); 3099 reinit_heapbase(); 3100 } else { 3101 if (Universe::narrow_klass_shift() != 0) { 3102 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3103 _lsl(r, r, Universe::narrow_klass_shift()); 3104 } 3105 } 3106 assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); 3107 } 3108 3109 // Must preserve condition codes, or C2 decodeKlass_not_null rule 3110 // must be changed. 3111 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 3112 if (src == dst) { 3113 decode_klass_not_null(src); 3114 return; 3115 } 3116 3117 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 3118 assert(Universe::heap() != NULL, "java heap should be initialized"); 3119 assert(src != Rheap_base, "Decoding a klass in Rheap_base"); 3120 assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); 3121 // Also do not verify_oop as this is called by verify_oop. 3122 if (Universe::narrow_klass_base() != NULL) { 3123 mov_slow(dst, Universe::narrow_klass_base()); 3124 add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); 3125 } else { 3126 _lsl(dst, src, Universe::narrow_klass_shift()); 3127 } 3128 } 3129 3130 3131 void MacroAssembler::reinit_heapbase() { 3132 if (UseCompressedOops || UseCompressedClassPointers) { 3133 if (Universe::heap() != NULL) { 3134 mov_slow(Rheap_base, Universe::narrow_ptrs_base()); 3135 } else { 3136 ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); 3137 } 3138 } 3139 } 3140 3141 #ifdef ASSERT 3142 void MacroAssembler::verify_heapbase(const char* msg) { 3143 // This code pattern is matched in NativeIntruction::skip_verify_heapbase. 3144 // Update it at modifications. 3145 assert (UseCompressedOops, "should be compressed"); 3146 assert (Universe::heap() != NULL, "java heap should be initialized"); 3147 if (CheckCompressedOops) { 3148 Label ok; 3149 str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 3150 raw_push(Rtemp, ZR); 3151 mrs(Rtemp, Assembler::SysReg_NZCV); 3152 str(Rtemp, Address(SP, 1 * wordSize)); 3153 mov_slow(Rtemp, Universe::narrow_ptrs_base()); 3154 cmp(Rheap_base, Rtemp); 3155 b(ok, eq); 3156 stop(msg); 3157 bind(ok); 3158 ldr(Rtemp, Address(SP, 1 * wordSize)); 3159 msr(Assembler::SysReg_NZCV, Rtemp); 3160 raw_pop(Rtemp, ZR); 3161 str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 3162 } 3163 } 3164 #endif // ASSERT 3165 3166 #endif // AARCH64 3167 3168 #ifdef COMPILER2 3169 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3170 { 3171 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3172 3173 Register Rmark = Rscratch2; 3174 3175 assert(Roop != Rscratch, ""); 3176 assert(Roop != Rmark, ""); 3177 assert(Rbox != Rscratch, ""); 3178 assert(Rbox != Rmark, ""); 3179 3180 Label fast_lock, done; 3181 3182 if (UseBiasedLocking && !UseOptoBiasInlining) { 3183 Label failed; 3184 #ifdef AARCH64 3185 biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); 3186 #else 3187 biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); 3188 #endif 3189 bind(failed); 3190 } 3191 3192 ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); 3193 tst(Rmark, markOopDesc::unlocked_value); 3194 b(fast_lock, ne); 3195 3196 // Check for recursive lock 3197 // See comments in InterpreterMacroAssembler::lock_object for 3198 // explanations on the fast recursive locking check. 3199 #ifdef AARCH64 3200 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); 3201 Assembler::LogicalImmediate imm(mask, false); 3202 mov(Rscratch, SP); 3203 sub(Rscratch, Rmark, Rscratch); 3204 ands(Rscratch, Rscratch, imm); 3205 b(done, ne); // exit with failure 3206 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero 3207 b(done); 3208 3209 #else 3210 // -1- test low 2 bits 3211 movs(Rscratch, AsmOperand(Rmark, lsl, 30)); 3212 // -2- test (hdr - SP) if the low two bits are 0 3213 sub(Rscratch, Rmark, SP, eq); 3214 movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); 3215 // If still 'eq' then recursive locking OK 3216 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero 3217 b(done); 3218 #endif 3219 3220 bind(fast_lock); 3221 str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3222 3223 bool allow_fallthrough_on_failure = true; 3224 bool one_shot = true; 3225 cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3226 3227 bind(done); 3228 3229 } 3230 3231 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3232 { 3233 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3234 3235 Register Rmark = Rscratch2; 3236 3237 assert(Roop != Rscratch, ""); 3238 assert(Roop != Rmark, ""); 3239 assert(Rbox != Rscratch, ""); 3240 assert(Rbox != Rmark, ""); 3241 3242 Label done; 3243 3244 if (UseBiasedLocking && !UseOptoBiasInlining) { 3245 biased_locking_exit(Roop, Rscratch, done); 3246 } 3247 3248 ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3249 // If hdr is NULL, we've got recursive locking and there's nothing more to do 3250 cmp(Rmark, 0); 3251 b(done, eq); 3252 3253 // Restore the object header 3254 bool allow_fallthrough_on_failure = true; 3255 bool one_shot = true; 3256 cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3257 3258 bind(done); 3259 3260 } 3261 #endif // COMPILER2