1 /* 2 * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "asm/macroAssembler.hpp" 29 #include "ci/ciEnv.hpp" 30 #include "code/nativeInst.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/cardTable.hpp" 33 #include "gc/shared/cardTableBarrierSet.hpp" 34 #include "gc/shared/collectedHeap.inline.hpp" 35 #include "interpreter/interpreter.hpp" 36 #include "memory/resourceArea.hpp" 37 #include "oops/klass.inline.hpp" 38 #include "prims/methodHandles.hpp" 39 #include "runtime/biasedLocking.hpp" 40 #include "runtime/interfaceSupport.inline.hpp" 41 #include "runtime/objectMonitor.hpp" 42 #include "runtime/os.hpp" 43 #include "runtime/sharedRuntime.hpp" 44 #include "runtime/stubRoutines.hpp" 45 #include "utilities/macros.hpp" 46 #if INCLUDE_ALL_GCS 47 #include "gc/g1/g1BarrierSet.hpp" 48 #include "gc/g1/g1CardTable.hpp" 49 #include "gc/g1/heapRegion.hpp" 50 #endif 51 52 // Implementation of AddressLiteral 53 54 void AddressLiteral::set_rspec(relocInfo::relocType rtype) { 55 switch (rtype) { 56 case relocInfo::oop_type: 57 // Oops are a special case. Normally they would be their own section 58 // but in cases like icBuffer they are literals in the code stream that 59 // we don't have a section for. We use none so that we get a literal address 60 // which is always patchable. 61 break; 62 case relocInfo::external_word_type: 63 _rspec = external_word_Relocation::spec(_target); 64 break; 65 case relocInfo::internal_word_type: 66 _rspec = internal_word_Relocation::spec(_target); 67 break; 68 case relocInfo::opt_virtual_call_type: 69 _rspec = opt_virtual_call_Relocation::spec(); 70 break; 71 case relocInfo::static_call_type: 72 _rspec = static_call_Relocation::spec(); 73 break; 74 case relocInfo::runtime_call_type: 75 _rspec = runtime_call_Relocation::spec(); 76 break; 77 case relocInfo::poll_type: 78 case relocInfo::poll_return_type: 79 _rspec = Relocation::spec_simple(rtype); 80 break; 81 case relocInfo::none: 82 break; 83 default: 84 ShouldNotReachHere(); 85 break; 86 } 87 } 88 89 // Initially added to the Assembler interface as a pure virtual: 90 // RegisterConstant delayed_value(..) 91 // for: 92 // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) 93 // this was subsequently modified to its present name and return type 94 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 95 Register tmp, 96 int offset) { 97 ShouldNotReachHere(); 98 return RegisterOrConstant(-1); 99 } 100 101 102 #ifdef AARCH64 103 // Note: ARM32 version is OS dependent 104 void MacroAssembler::breakpoint(AsmCondition cond) { 105 if (cond == al) { 106 brk(); 107 } else { 108 Label L; 109 b(L, inverse(cond)); 110 brk(); 111 bind(L); 112 } 113 } 114 #endif // AARCH64 115 116 117 // virtual method calling 118 void MacroAssembler::lookup_virtual_method(Register recv_klass, 119 Register vtable_index, 120 Register method_result) { 121 const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); 122 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 123 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); 124 ldr(method_result, Address(recv_klass, base_offset)); 125 } 126 127 128 // Simplified, combined version, good for typical uses. 129 // Falls through on failure. 130 void MacroAssembler::check_klass_subtype(Register sub_klass, 131 Register super_klass, 132 Register temp_reg, 133 Register temp_reg2, 134 Register temp_reg3, 135 Label& L_success) { 136 Label L_failure; 137 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); 138 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); 139 bind(L_failure); 140 }; 141 142 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 143 Register super_klass, 144 Register temp_reg, 145 Register temp_reg2, 146 Label* L_success, 147 Label* L_failure, 148 Label* L_slow_path) { 149 150 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); 151 const Register super_check_offset = temp_reg2; 152 153 Label L_fallthrough; 154 int label_nulls = 0; 155 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 156 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 157 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 158 assert(label_nulls <= 1, "at most one NULL in the batch"); 159 160 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 161 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 162 Address super_check_offset_addr(super_klass, sco_offset); 163 164 // If the pointers are equal, we are done (e.g., String[] elements). 165 // This self-check enables sharing of secondary supertype arrays among 166 // non-primary types such as array-of-interface. Otherwise, each such 167 // type would need its own customized SSA. 168 // We move this check to the front of the fast path because many 169 // type checks are in fact trivially successful in this manner, 170 // so we get a nicely predicted branch right at the start of the check. 171 cmp(sub_klass, super_klass); 172 b(*L_success, eq); 173 174 // Check the supertype display: 175 ldr_u32(super_check_offset, super_check_offset_addr); 176 177 Address super_check_addr(sub_klass, super_check_offset); 178 ldr(temp_reg, super_check_addr); 179 cmp(super_klass, temp_reg); // load displayed supertype 180 181 // This check has worked decisively for primary supers. 182 // Secondary supers are sought in the super_cache ('super_cache_addr'). 183 // (Secondary supers are interfaces and very deeply nested subtypes.) 184 // This works in the same check above because of a tricky aliasing 185 // between the super_cache and the primary super display elements. 186 // (The 'super_check_addr' can address either, as the case requires.) 187 // Note that the cache is updated below if it does not help us find 188 // what we need immediately. 189 // So if it was a primary super, we can just fail immediately. 190 // Otherwise, it's the slow path for us (no success at this point). 191 192 b(*L_success, eq); 193 cmp_32(super_check_offset, sc_offset); 194 if (L_failure == &L_fallthrough) { 195 b(*L_slow_path, eq); 196 } else { 197 b(*L_failure, ne); 198 if (L_slow_path != &L_fallthrough) { 199 b(*L_slow_path); 200 } 201 } 202 203 bind(L_fallthrough); 204 } 205 206 207 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 208 Register super_klass, 209 Register temp_reg, 210 Register temp2_reg, 211 Register temp3_reg, 212 Label* L_success, 213 Label* L_failure, 214 bool set_cond_codes) { 215 #ifdef AARCH64 216 NOT_IMPLEMENTED(); 217 #else 218 // Note: if used by code that expects a register to be 0 on success, 219 // this register must be temp_reg and set_cond_codes must be true 220 221 Register saved_reg = noreg; 222 223 // get additional tmp registers 224 if (temp3_reg == noreg) { 225 saved_reg = temp3_reg = LR; 226 push(saved_reg); 227 } 228 229 assert(temp2_reg != noreg, "need all the temporary registers"); 230 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); 231 232 Register cmp_temp = temp_reg; 233 Register scan_temp = temp3_reg; 234 Register count_temp = temp2_reg; 235 236 Label L_fallthrough; 237 int label_nulls = 0; 238 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 239 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 240 assert(label_nulls <= 1, "at most one NULL in the batch"); 241 242 // a couple of useful fields in sub_klass: 243 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 244 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 245 Address secondary_supers_addr(sub_klass, ss_offset); 246 Address super_cache_addr( sub_klass, sc_offset); 247 248 #ifndef PRODUCT 249 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); 250 #endif 251 252 // We will consult the secondary-super array. 253 ldr(scan_temp, Address(sub_klass, ss_offset)); 254 255 assert(! UseCompressedOops, "search_key must be the compressed super_klass"); 256 // else search_key is the 257 Register search_key = super_klass; 258 259 // Load the array length. 260 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes())); 261 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes()); 262 263 add(count_temp, count_temp, 1); 264 265 Label L_loop, L_setnz_and_fail, L_fail; 266 267 // Top of search loop 268 bind(L_loop); 269 // Notes: 270 // scan_temp starts at the array elements 271 // count_temp is 1+size 272 subs(count_temp, count_temp, 1); 273 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { 274 // direct jump to L_failure if failed and no cleanup needed 275 b(*L_failure, eq); // not found and 276 } else { 277 b(L_fail, eq); // not found in the array 278 } 279 280 // Load next super to check 281 // In the array of super classes elements are pointer sized. 282 int element_size = wordSize; 283 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); 284 285 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list 286 subs(cmp_temp, cmp_temp, search_key); 287 288 // A miss means we are NOT a subtype and need to keep looping 289 b(L_loop, ne); 290 291 // Falling out the bottom means we found a hit; we ARE a subtype 292 293 // Note: temp_reg/cmp_temp is already 0 and flag Z is set 294 295 // Success. Cache the super we found and proceed in triumph. 296 str(super_klass, Address(sub_klass, sc_offset)); 297 298 if (saved_reg != noreg) { 299 // Return success 300 pop(saved_reg); 301 } 302 303 b(*L_success); 304 305 bind(L_fail); 306 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here 307 if (set_cond_codes) { 308 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed 309 } 310 if (saved_reg != noreg) { 311 pop(saved_reg); 312 } 313 if (L_failure != &L_fallthrough) { 314 b(*L_failure); 315 } 316 317 bind(L_fallthrough); 318 #endif 319 } 320 321 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. 322 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { 323 assert_different_registers(params_base, params_count); 324 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); 325 return Address(tmp, -Interpreter::stackElementSize); 326 } 327 328 329 void MacroAssembler::align(int modulus) { 330 while (offset() % modulus != 0) { 331 nop(); 332 } 333 } 334 335 int MacroAssembler::set_last_Java_frame(Register last_java_sp, 336 Register last_java_fp, 337 bool save_last_java_pc, 338 Register tmp) { 339 int pc_offset; 340 if (last_java_fp != noreg) { 341 // optional 342 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); 343 _fp_saved = true; 344 } else { 345 _fp_saved = false; 346 } 347 if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM 348 #ifdef AARCH64 349 pc_offset = mov_pc_to(tmp); 350 str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); 351 #else 352 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); 353 pc_offset = offset() + VM_Version::stored_pc_adjustment(); 354 #endif 355 _pc_saved = true; 356 } else { 357 _pc_saved = false; 358 pc_offset = -1; 359 } 360 // According to comment in javaFrameAnchorm SP must be saved last, so that other 361 // entries are valid when SP is set. 362 363 // However, this is probably not a strong constrainst since for instance PC is 364 // sometimes read from the stack at SP... but is pushed later (by the call). Hence, 365 // we now write the fields in the expected order but we have not added a StoreStore 366 // barrier. 367 368 // XXX: if the ordering is really important, PC should always be saved (without forgetting 369 // to update oop_map offsets) and a StoreStore barrier might be needed. 370 371 if (last_java_sp == noreg) { 372 last_java_sp = SP; // always saved 373 } 374 #ifdef AARCH64 375 if (last_java_sp == SP) { 376 mov(tmp, SP); 377 str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); 378 } else { 379 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 380 } 381 #else 382 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 383 #endif 384 385 return pc_offset; // for oopmaps 386 } 387 388 void MacroAssembler::reset_last_Java_frame(Register tmp) { 389 const Register Rzero = zero_register(tmp); 390 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); 391 if (_fp_saved) { 392 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); 393 } 394 if (_pc_saved) { 395 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); 396 } 397 } 398 399 400 // Implementation of call_VM versions 401 402 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { 403 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 404 assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); 405 406 #ifndef AARCH64 407 // Safer to save R9 here since callers may have been written 408 // assuming R9 survives. This is suboptimal but is not worth 409 // optimizing for the few platforms where R9 is scratched. 410 push(RegisterSet(R4) | R9ifScratched); 411 mov(R4, SP); 412 bic(SP, SP, StackAlignmentInBytes - 1); 413 #endif // AARCH64 414 call(entry_point, relocInfo::runtime_call_type); 415 #ifndef AARCH64 416 mov(SP, R4); 417 pop(RegisterSet(R4) | R9ifScratched); 418 #endif // AARCH64 419 } 420 421 422 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 423 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 424 assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); 425 426 const Register tmp = Rtemp; 427 assert_different_registers(oop_result, tmp); 428 429 set_last_Java_frame(SP, FP, true, tmp); 430 431 #ifdef ASSERT 432 AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); 433 #endif // ASSERT 434 435 #ifndef AARCH64 436 #if R9_IS_SCRATCHED 437 // Safer to save R9 here since callers may have been written 438 // assuming R9 survives. This is suboptimal but is not worth 439 // optimizing for the few platforms where R9 is scratched. 440 441 // Note: cannot save R9 above the saved SP (some calls expect for 442 // instance the Java stack top at the saved SP) 443 // => once saved (with set_last_Java_frame), decrease SP before rounding to 444 // ensure the slot at SP will be free for R9). 445 sub(SP, SP, 4); 446 bic(SP, SP, StackAlignmentInBytes - 1); 447 str(R9, Address(SP, 0)); 448 #else 449 bic(SP, SP, StackAlignmentInBytes - 1); 450 #endif // R9_IS_SCRATCHED 451 #endif 452 453 mov(R0, Rthread); 454 call(entry_point, relocInfo::runtime_call_type); 455 456 #ifndef AARCH64 457 #if R9_IS_SCRATCHED 458 ldr(R9, Address(SP, 0)); 459 #endif 460 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); 461 #endif 462 463 reset_last_Java_frame(tmp); 464 465 // C++ interp handles this in the interpreter 466 check_and_handle_popframe(); 467 check_and_handle_earlyret(); 468 469 if (check_exceptions) { 470 // check for pending exceptions 471 ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); 472 #ifdef AARCH64 473 Label L; 474 cbz(tmp, L); 475 mov_pc_to(Rexception_pc); 476 b(StubRoutines::forward_exception_entry()); 477 bind(L); 478 #else 479 cmp(tmp, 0); 480 mov(Rexception_pc, PC, ne); 481 b(StubRoutines::forward_exception_entry(), ne); 482 #endif // AARCH64 483 } 484 485 // get oop result if there is one and reset the value in the thread 486 if (oop_result->is_valid()) { 487 get_vm_result(oop_result, tmp); 488 } 489 } 490 491 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 492 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 493 } 494 495 496 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 497 assert (arg_1 == R1, "fixed register for arg_1"); 498 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 499 } 500 501 502 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 503 assert (arg_1 == R1, "fixed register for arg_1"); 504 assert (arg_2 == R2, "fixed register for arg_2"); 505 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 506 } 507 508 509 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 510 assert (arg_1 == R1, "fixed register for arg_1"); 511 assert (arg_2 == R2, "fixed register for arg_2"); 512 assert (arg_3 == R3, "fixed register for arg_3"); 513 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 514 } 515 516 517 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { 518 // Not used on ARM 519 Unimplemented(); 520 } 521 522 523 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 524 // Not used on ARM 525 Unimplemented(); 526 } 527 528 529 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 530 // Not used on ARM 531 Unimplemented(); 532 } 533 534 535 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 536 // Not used on ARM 537 Unimplemented(); 538 } 539 540 // Raw call, without saving/restoring registers, exception handling, etc. 541 // Mainly used from various stubs. 542 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { 543 const Register tmp = Rtemp; // Rtemp free since scratched by call 544 set_last_Java_frame(SP, FP, true, tmp); 545 #if R9_IS_SCRATCHED 546 if (save_R9_if_scratched) { 547 // Note: Saving also R10 for alignment. 548 push(RegisterSet(R9, R10)); 549 } 550 #endif 551 mov(R0, Rthread); 552 call(entry_point, relocInfo::runtime_call_type); 553 #if R9_IS_SCRATCHED 554 if (save_R9_if_scratched) { 555 pop(RegisterSet(R9, R10)); 556 } 557 #endif 558 reset_last_Java_frame(tmp); 559 } 560 561 void MacroAssembler::call_VM_leaf(address entry_point) { 562 call_VM_leaf_helper(entry_point, 0); 563 } 564 565 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 566 assert (arg_1 == R0, "fixed register for arg_1"); 567 call_VM_leaf_helper(entry_point, 1); 568 } 569 570 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 571 assert (arg_1 == R0, "fixed register for arg_1"); 572 assert (arg_2 == R1, "fixed register for arg_2"); 573 call_VM_leaf_helper(entry_point, 2); 574 } 575 576 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 577 assert (arg_1 == R0, "fixed register for arg_1"); 578 assert (arg_2 == R1, "fixed register for arg_2"); 579 assert (arg_3 == R2, "fixed register for arg_3"); 580 call_VM_leaf_helper(entry_point, 3); 581 } 582 583 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { 584 assert (arg_1 == R0, "fixed register for arg_1"); 585 assert (arg_2 == R1, "fixed register for arg_2"); 586 assert (arg_3 == R2, "fixed register for arg_3"); 587 assert (arg_4 == R3, "fixed register for arg_4"); 588 call_VM_leaf_helper(entry_point, 4); 589 } 590 591 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { 592 assert_different_registers(oop_result, tmp); 593 ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); 594 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); 595 verify_oop(oop_result); 596 } 597 598 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { 599 assert_different_registers(metadata_result, tmp); 600 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); 601 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); 602 } 603 604 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { 605 if (arg2.is_register()) { 606 add(dst, arg1, arg2.as_register()); 607 } else { 608 add(dst, arg1, arg2.as_constant()); 609 } 610 } 611 612 void MacroAssembler::add_slow(Register rd, Register rn, int c) { 613 #ifdef AARCH64 614 if (c == 0) { 615 if (rd != rn) { 616 mov(rd, rn); 617 } 618 return; 619 } 620 if (c < 0) { 621 sub_slow(rd, rn, -c); 622 return; 623 } 624 if (c > right_n_bits(24)) { 625 guarantee(rd != rn, "no large add_slow with only one register"); 626 mov_slow(rd, c); 627 add(rd, rn, rd); 628 } else { 629 int lo = c & right_n_bits(12); 630 int hi = (c >> 12) & right_n_bits(12); 631 if (lo != 0) { 632 add(rd, rn, lo, lsl0); 633 } 634 if (hi != 0) { 635 add(rd, (lo == 0) ? rn : rd, hi, lsl12); 636 } 637 } 638 #else 639 // This function is used in compiler for handling large frame offsets 640 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 641 return sub(rd, rn, (-c)); 642 } 643 int low = c & 0x3fc; 644 if (low != 0) { 645 add(rd, rn, low); 646 rn = rd; 647 } 648 if (c & ~0x3fc) { 649 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); 650 add(rd, rn, c & ~0x3fc); 651 } else if (rd != rn) { 652 assert(c == 0, ""); 653 mov(rd, rn); // need to generate at least one move! 654 } 655 #endif // AARCH64 656 } 657 658 void MacroAssembler::sub_slow(Register rd, Register rn, int c) { 659 #ifdef AARCH64 660 if (c <= 0) { 661 add_slow(rd, rn, -c); 662 return; 663 } 664 if (c > right_n_bits(24)) { 665 guarantee(rd != rn, "no large sub_slow with only one register"); 666 mov_slow(rd, c); 667 sub(rd, rn, rd); 668 } else { 669 int lo = c & right_n_bits(12); 670 int hi = (c >> 12) & right_n_bits(12); 671 if (lo != 0) { 672 sub(rd, rn, lo, lsl0); 673 } 674 if (hi != 0) { 675 sub(rd, (lo == 0) ? rn : rd, hi, lsl12); 676 } 677 } 678 #else 679 // This function is used in compiler for handling large frame offsets 680 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 681 return add(rd, rn, (-c)); 682 } 683 int low = c & 0x3fc; 684 if (low != 0) { 685 sub(rd, rn, low); 686 rn = rd; 687 } 688 if (c & ~0x3fc) { 689 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); 690 sub(rd, rn, c & ~0x3fc); 691 } else if (rd != rn) { 692 assert(c == 0, ""); 693 mov(rd, rn); // need to generate at least one move! 694 } 695 #endif // AARCH64 696 } 697 698 void MacroAssembler::mov_slow(Register rd, address addr) { 699 // do *not* call the non relocated mov_related_address 700 mov_slow(rd, (intptr_t)addr); 701 } 702 703 void MacroAssembler::mov_slow(Register rd, const char *str) { 704 mov_slow(rd, (intptr_t)str); 705 } 706 707 #ifdef AARCH64 708 709 // Common code for mov_slow and instr_count_for_mov_slow. 710 // Returns number of instructions of mov_slow pattern, 711 // generating it if non-null MacroAssembler is given. 712 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { 713 // This code pattern is matched in NativeIntruction::is_mov_slow. 714 // Update it at modifications. 715 716 const intx mask = right_n_bits(16); 717 // 1 movz instruction 718 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 719 if ((c & ~(mask << base_shift)) == 0) { 720 if (masm != NULL) { 721 masm->movz(rd, ((uintx)c) >> base_shift, base_shift); 722 } 723 return 1; 724 } 725 } 726 // 1 movn instruction 727 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 728 if (((~c) & ~(mask << base_shift)) == 0) { 729 if (masm != NULL) { 730 masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); 731 } 732 return 1; 733 } 734 } 735 // 1 orr instruction 736 { 737 LogicalImmediate imm(c, false); 738 if (imm.is_encoded()) { 739 if (masm != NULL) { 740 masm->orr(rd, ZR, imm); 741 } 742 return 1; 743 } 744 } 745 // 1 movz/movn + up to 3 movk instructions 746 int zeroes = 0; 747 int ones = 0; 748 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 749 int part = (c >> base_shift) & mask; 750 if (part == 0) { 751 ++zeroes; 752 } else if (part == mask) { 753 ++ones; 754 } 755 } 756 int def_bits = 0; 757 if (ones > zeroes) { 758 def_bits = mask; 759 } 760 int inst_count = 0; 761 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 762 int part = (c >> base_shift) & mask; 763 if (part != def_bits) { 764 if (masm != NULL) { 765 if (inst_count > 0) { 766 masm->movk(rd, part, base_shift); 767 } else { 768 if (def_bits == 0) { 769 masm->movz(rd, part, base_shift); 770 } else { 771 masm->movn(rd, ~part & mask, base_shift); 772 } 773 } 774 } 775 inst_count++; 776 } 777 } 778 assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); 779 return inst_count; 780 } 781 782 void MacroAssembler::mov_slow(Register rd, intptr_t c) { 783 #ifdef ASSERT 784 int off = offset(); 785 #endif 786 (void) mov_slow_helper(rd, c, this); 787 assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); 788 } 789 790 // Counts instructions generated by mov_slow(rd, c). 791 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { 792 return mov_slow_helper(noreg, c, NULL); 793 } 794 795 int MacroAssembler::instr_count_for_mov_slow(address c) { 796 return mov_slow_helper(noreg, (intptr_t)c, NULL); 797 } 798 799 #else 800 801 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { 802 if (AsmOperand::is_rotated_imm(c)) { 803 mov(rd, c, cond); 804 } else if (AsmOperand::is_rotated_imm(~c)) { 805 mvn(rd, ~c, cond); 806 } else if (VM_Version::supports_movw()) { 807 movw(rd, c & 0xffff, cond); 808 if ((unsigned int)c >> 16) { 809 movt(rd, (unsigned int)c >> 16, cond); 810 } 811 } else { 812 // Find first non-zero bit 813 int shift = 0; 814 while ((c & (3 << shift)) == 0) { 815 shift += 2; 816 } 817 // Put the least significant part of the constant 818 int mask = 0xff << shift; 819 mov(rd, c & mask, cond); 820 // Add up to 3 other parts of the constant; 821 // each of them can be represented as rotated_imm 822 if (c & (mask << 8)) { 823 orr(rd, rd, c & (mask << 8), cond); 824 } 825 if (c & (mask << 16)) { 826 orr(rd, rd, c & (mask << 16), cond); 827 } 828 if (c & (mask << 24)) { 829 orr(rd, rd, c & (mask << 24), cond); 830 } 831 } 832 } 833 834 #endif // AARCH64 835 836 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, 837 #ifdef AARCH64 838 bool patchable 839 #else 840 AsmCondition cond 841 #endif 842 ) { 843 844 if (o == NULL) { 845 #ifdef AARCH64 846 if (patchable) { 847 nop(); 848 } 849 mov(rd, ZR); 850 #else 851 mov(rd, 0, cond); 852 #endif 853 return; 854 } 855 856 if (oop_index == 0) { 857 oop_index = oop_recorder()->allocate_oop_index(o); 858 } 859 relocate(oop_Relocation::spec(oop_index)); 860 861 #ifdef AARCH64 862 if (patchable) { 863 nop(); 864 } 865 ldr(rd, pc()); 866 #else 867 if (VM_Version::supports_movw()) { 868 movw(rd, 0, cond); 869 movt(rd, 0, cond); 870 } else { 871 ldr(rd, Address(PC), cond); 872 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). 873 nop(); 874 } 875 #endif 876 } 877 878 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { 879 if (o == NULL) { 880 #ifdef AARCH64 881 if (patchable) { 882 nop(); 883 } 884 #endif 885 mov(rd, 0); 886 return; 887 } 888 889 if (metadata_index == 0) { 890 metadata_index = oop_recorder()->allocate_metadata_index(o); 891 } 892 relocate(metadata_Relocation::spec(metadata_index)); 893 894 #ifdef AARCH64 895 if (patchable) { 896 nop(); 897 } 898 #ifdef COMPILER2 899 if (!patchable && VM_Version::prefer_moves_over_load_literal()) { 900 mov_slow(rd, (address)o); 901 return; 902 } 903 #endif 904 ldr(rd, pc()); 905 #else 906 if (VM_Version::supports_movw()) { 907 movw(rd, ((int)o) & 0xffff); 908 movt(rd, (unsigned int)o >> 16); 909 } else { 910 ldr(rd, Address(PC)); 911 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). 912 nop(); 913 } 914 #endif // AARCH64 915 } 916 917 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { 918 Label skip_constant; 919 union { 920 jfloat f; 921 jint i; 922 } accessor; 923 accessor.f = c; 924 925 #ifdef AARCH64 926 // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow 927 Label L; 928 ldr_s(fd, target(L)); 929 b(skip_constant); 930 bind(L); 931 emit_int32(accessor.i); 932 bind(skip_constant); 933 #else 934 flds(fd, Address(PC), cond); 935 b(skip_constant); 936 emit_int32(accessor.i); 937 bind(skip_constant); 938 #endif // AARCH64 939 } 940 941 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { 942 Label skip_constant; 943 union { 944 jdouble d; 945 jint i[2]; 946 } accessor; 947 accessor.d = c; 948 949 #ifdef AARCH64 950 // TODO-AARCH64 - try to optimize loading of double constants with fmov 951 Label L; 952 ldr_d(fd, target(L)); 953 b(skip_constant); 954 align(wordSize); 955 bind(L); 956 emit_int32(accessor.i[0]); 957 emit_int32(accessor.i[1]); 958 bind(skip_constant); 959 #else 960 fldd(fd, Address(PC), cond); 961 b(skip_constant); 962 emit_int32(accessor.i[0]); 963 emit_int32(accessor.i[1]); 964 bind(skip_constant); 965 #endif // AARCH64 966 } 967 968 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { 969 intptr_t addr = (intptr_t) address_of_global; 970 #ifdef AARCH64 971 assert((addr & 0x3) == 0, "address should be aligned"); 972 973 // FIXME: TODO 974 if (false && page_reachable_from_cache(address_of_global)) { 975 assert(false,"TODO: relocate"); 976 //relocate(); 977 adrp(reg, address_of_global); 978 ldrsw(reg, Address(reg, addr & 0xfff)); 979 } else { 980 mov_slow(reg, addr & ~0x3fff); 981 ldrsw(reg, Address(reg, addr & 0x3fff)); 982 } 983 #else 984 mov_slow(reg, addr & ~0xfff); 985 ldr(reg, Address(reg, addr & 0xfff)); 986 #endif 987 } 988 989 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { 990 #ifdef AARCH64 991 intptr_t addr = (intptr_t) address_of_global; 992 assert ((addr & 0x7) == 0, "address should be aligned"); 993 mov_slow(reg, addr & ~0x7fff); 994 ldr(reg, Address(reg, addr & 0x7fff)); 995 #else 996 ldr_global_s32(reg, address_of_global); 997 #endif 998 } 999 1000 void MacroAssembler::ldrb_global(Register reg, address address_of_global) { 1001 intptr_t addr = (intptr_t) address_of_global; 1002 mov_slow(reg, addr & ~0xfff); 1003 ldrb(reg, Address(reg, addr & 0xfff)); 1004 } 1005 1006 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { 1007 #ifdef AARCH64 1008 switch (bits) { 1009 case 8: uxtb(rd, rn); break; 1010 case 16: uxth(rd, rn); break; 1011 case 32: mov_w(rd, rn); break; 1012 default: ShouldNotReachHere(); 1013 } 1014 #else 1015 if (bits <= 8) { 1016 andr(rd, rn, (1 << bits) - 1); 1017 } else if (bits >= 24) { 1018 bic(rd, rn, -1 << bits); 1019 } else { 1020 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1021 mov(rd, AsmOperand(rd, lsr, 32 - bits)); 1022 } 1023 #endif 1024 } 1025 1026 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { 1027 #ifdef AARCH64 1028 switch (bits) { 1029 case 8: sxtb(rd, rn); break; 1030 case 16: sxth(rd, rn); break; 1031 case 32: sxtw(rd, rn); break; 1032 default: ShouldNotReachHere(); 1033 } 1034 #else 1035 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1036 mov(rd, AsmOperand(rd, asr, 32 - bits)); 1037 #endif 1038 } 1039 1040 #ifndef AARCH64 1041 1042 void MacroAssembler::long_move(Register rd_lo, Register rd_hi, 1043 Register rn_lo, Register rn_hi, 1044 AsmCondition cond) { 1045 if (rd_lo != rn_hi) { 1046 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1047 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1048 } else if (rd_hi != rn_lo) { 1049 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1050 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1051 } else { 1052 eor(rd_lo, rd_hi, rd_lo, cond); 1053 eor(rd_hi, rd_lo, rd_hi, cond); 1054 eor(rd_lo, rd_hi, rd_lo, cond); 1055 } 1056 } 1057 1058 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1059 Register rn_lo, Register rn_hi, 1060 AsmShift shift, Register count) { 1061 Register tmp; 1062 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { 1063 tmp = rd_lo; 1064 } else { 1065 tmp = rd_hi; 1066 } 1067 assert_different_registers(tmp, count, rn_lo, rn_hi); 1068 1069 subs(tmp, count, 32); 1070 if (shift == lsl) { 1071 assert_different_registers(rd_hi, rn_lo); 1072 assert_different_registers(count, rd_hi); 1073 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); 1074 rsb(tmp, count, 32, mi); 1075 if (rd_hi == rn_hi) { 1076 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1077 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1078 } else { 1079 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1080 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1081 } 1082 mov(rd_lo, AsmOperand(rn_lo, shift, count)); 1083 } else { 1084 assert_different_registers(rd_lo, rn_hi); 1085 assert_different_registers(rd_lo, count); 1086 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); 1087 rsb(tmp, count, 32, mi); 1088 if (rd_lo == rn_lo) { 1089 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1090 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1091 } else { 1092 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1093 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1094 } 1095 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1096 } 1097 } 1098 1099 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1100 Register rn_lo, Register rn_hi, 1101 AsmShift shift, int count) { 1102 assert(count != 0 && (count & ~63) == 0, "must be"); 1103 1104 if (shift == lsl) { 1105 assert_different_registers(rd_hi, rn_lo); 1106 if (count >= 32) { 1107 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); 1108 mov(rd_lo, 0); 1109 } else { 1110 mov(rd_hi, AsmOperand(rn_hi, lsl, count)); 1111 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); 1112 mov(rd_lo, AsmOperand(rn_lo, lsl, count)); 1113 } 1114 } else { 1115 assert_different_registers(rd_lo, rn_hi); 1116 if (count >= 32) { 1117 if (count == 32) { 1118 mov(rd_lo, rn_hi); 1119 } else { 1120 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); 1121 } 1122 if (shift == asr) { 1123 mov(rd_hi, AsmOperand(rn_hi, asr, 0)); 1124 } else { 1125 mov(rd_hi, 0); 1126 } 1127 } else { 1128 mov(rd_lo, AsmOperand(rn_lo, lsr, count)); 1129 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); 1130 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1131 } 1132 } 1133 } 1134 #endif // !AARCH64 1135 1136 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 1137 // This code pattern is matched in NativeIntruction::skip_verify_oop. 1138 // Update it at modifications. 1139 if (!VerifyOops) return; 1140 1141 char buffer[64]; 1142 #ifdef COMPILER1 1143 if (CommentedAssembly) { 1144 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); 1145 block_comment(buffer); 1146 } 1147 #endif 1148 const char* msg_buffer = NULL; 1149 { 1150 ResourceMark rm; 1151 stringStream ss; 1152 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); 1153 msg_buffer = code_string(ss.as_string()); 1154 } 1155 1156 save_all_registers(); 1157 1158 if (reg != R2) { 1159 mov(R2, reg); // oop to verify 1160 } 1161 mov(R1, SP); // register save area 1162 1163 Label done; 1164 InlinedString Lmsg(msg_buffer); 1165 ldr_literal(R0, Lmsg); // message 1166 1167 // call indirectly to solve generation ordering problem 1168 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1169 call(Rtemp); 1170 1171 restore_all_registers(); 1172 1173 b(done); 1174 #ifdef COMPILER2 1175 int off = offset(); 1176 #endif 1177 bind_literal(Lmsg); 1178 #ifdef COMPILER2 1179 if (offset() - off == 1 * wordSize) { 1180 // no padding, so insert nop for worst-case sizing 1181 nop(); 1182 } 1183 #endif 1184 bind(done); 1185 } 1186 1187 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 1188 if (!VerifyOops) return; 1189 1190 const char* msg_buffer = NULL; 1191 { 1192 ResourceMark rm; 1193 stringStream ss; 1194 if ((addr.base() == SP) && (addr.index()==noreg)) { 1195 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); 1196 } else { 1197 ss.print("verify_oop_addr: %s", s); 1198 } 1199 ss.print(" (%s:%d)", file, line); 1200 msg_buffer = code_string(ss.as_string()); 1201 } 1202 1203 int push_size = save_all_registers(); 1204 1205 if (addr.base() == SP) { 1206 // computes an addr that takes into account the push 1207 if (addr.index() != noreg) { 1208 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index 1209 add(new_base, SP, push_size); 1210 addr = addr.rebase(new_base); 1211 } else { 1212 addr = addr.plus_disp(push_size); 1213 } 1214 } 1215 1216 ldr(R2, addr); // oop to verify 1217 mov(R1, SP); // register save area 1218 1219 Label done; 1220 InlinedString Lmsg(msg_buffer); 1221 ldr_literal(R0, Lmsg); // message 1222 1223 // call indirectly to solve generation ordering problem 1224 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1225 call(Rtemp); 1226 1227 restore_all_registers(); 1228 1229 b(done); 1230 bind_literal(Lmsg); 1231 bind(done); 1232 } 1233 1234 void MacroAssembler::null_check(Register reg, Register tmp, int offset) { 1235 if (needs_explicit_null_check(offset)) { 1236 #ifdef AARCH64 1237 ldr(ZR, Address(reg)); 1238 #else 1239 assert_different_registers(reg, tmp); 1240 if (tmp == noreg) { 1241 tmp = Rtemp; 1242 assert((! Thread::current()->is_Compiler_thread()) || 1243 (! (ciEnv::current()->task() == NULL)) || 1244 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), 1245 "Rtemp not available in C2"); // explicit tmp register required 1246 // XXX: could we mark the code buffer as not compatible with C2 ? 1247 } 1248 ldr(tmp, Address(reg)); 1249 #endif 1250 } 1251 } 1252 1253 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1254 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, 1255 RegisterOrConstant size_expression, Label& slow_case) { 1256 if (!Universe::heap()->supports_inline_contig_alloc()) { 1257 b(slow_case); 1258 return; 1259 } 1260 1261 CollectedHeap* ch = Universe::heap(); 1262 1263 const Register top_addr = tmp1; 1264 const Register heap_end = tmp2; 1265 1266 if (size_expression.is_register()) { 1267 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); 1268 } else { 1269 assert_different_registers(obj, obj_end, top_addr, heap_end); 1270 } 1271 1272 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance 1273 if (load_const) { 1274 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); 1275 } else { 1276 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); 1277 } 1278 // Calculate new heap_top by adding the size of the object 1279 Label retry; 1280 bind(retry); 1281 1282 #ifdef AARCH64 1283 ldxr(obj, top_addr); 1284 #else 1285 ldr(obj, Address(top_addr)); 1286 #endif // AARCH64 1287 1288 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); 1289 add_rc(obj_end, obj, size_expression); 1290 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. 1291 cmp(obj_end, obj); 1292 b(slow_case, lo); 1293 // Update heap_top if allocation succeeded 1294 cmp(obj_end, heap_end); 1295 b(slow_case, hi); 1296 1297 #ifdef AARCH64 1298 stxr(heap_end/*scratched*/, obj_end, top_addr); 1299 cbnz_w(heap_end, retry); 1300 #else 1301 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); 1302 b(retry, ne); 1303 #endif // AARCH64 1304 } 1305 1306 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1307 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, 1308 RegisterOrConstant size_expression, Label& slow_case) { 1309 const Register tlab_end = tmp1; 1310 assert_different_registers(obj, obj_end, tlab_end); 1311 1312 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); 1313 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); 1314 add_rc(obj_end, obj, size_expression); 1315 cmp(obj_end, tlab_end); 1316 b(slow_case, hi); 1317 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); 1318 } 1319 1320 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. 1321 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { 1322 Label loop; 1323 const Register ptr = start; 1324 1325 #ifdef AARCH64 1326 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x 1327 const Register size = tmp; 1328 Label remaining, done; 1329 1330 sub(size, end, start); 1331 1332 #ifdef ASSERT 1333 { Label L; 1334 tst(size, wordSize - 1); 1335 b(L, eq); 1336 stop("size is not a multiple of wordSize"); 1337 bind(L); 1338 } 1339 #endif // ASSERT 1340 1341 subs(size, size, wordSize); 1342 b(remaining, le); 1343 1344 // Zero by 2 words per iteration. 1345 bind(loop); 1346 subs(size, size, 2*wordSize); 1347 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); 1348 b(loop, gt); 1349 1350 bind(remaining); 1351 b(done, ne); 1352 str(ZR, Address(ptr)); 1353 bind(done); 1354 #else 1355 mov(tmp, 0); 1356 bind(loop); 1357 cmp(ptr, end); 1358 str(tmp, Address(ptr, wordSize, post_indexed), lo); 1359 b(loop, lo); 1360 #endif // AARCH64 1361 } 1362 1363 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { 1364 #ifdef AARCH64 1365 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1366 add_rc(tmp, tmp, size_in_bytes); 1367 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1368 #else 1369 // Bump total bytes allocated by this thread 1370 Label done; 1371 1372 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1373 adds(tmp, tmp, size_in_bytes); 1374 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); 1375 b(done, cc); 1376 1377 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) 1378 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by 1379 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. 1380 Register low, high; 1381 // Select ether R0/R1 or R2/R3 1382 1383 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { 1384 low = R2; 1385 high = R3; 1386 } else { 1387 low = R0; 1388 high = R1; 1389 } 1390 push(RegisterSet(low, high)); 1391 1392 ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1393 adds(low, low, size_in_bytes); 1394 adc(high, high, 0); 1395 strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1396 1397 pop(RegisterSet(low, high)); 1398 1399 bind(done); 1400 #endif // AARCH64 1401 } 1402 1403 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { 1404 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM 1405 if (UseStackBanging) { 1406 const int page_size = os::vm_page_size(); 1407 1408 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); 1409 strb(R0, Address(tmp)); 1410 #ifdef AARCH64 1411 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { 1412 sub(tmp, tmp, page_size); 1413 strb(R0, Address(tmp)); 1414 } 1415 #else 1416 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { 1417 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1418 } 1419 #endif // AARCH64 1420 } 1421 } 1422 1423 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { 1424 if (UseStackBanging) { 1425 Label loop; 1426 1427 mov(tmp, SP); 1428 add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); 1429 #ifdef AARCH64 1430 sub(tmp, tmp, Rsize); 1431 bind(loop); 1432 subs(Rsize, Rsize, os::vm_page_size()); 1433 strb(ZR, Address(tmp, Rsize)); 1434 #else 1435 bind(loop); 1436 subs(Rsize, Rsize, 0xff0); 1437 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1438 #endif // AARCH64 1439 b(loop, hi); 1440 } 1441 } 1442 1443 void MacroAssembler::stop(const char* msg) { 1444 // This code pattern is matched in NativeIntruction::is_stop. 1445 // Update it at modifications. 1446 #ifdef COMPILER1 1447 if (CommentedAssembly) { 1448 block_comment("stop"); 1449 } 1450 #endif 1451 1452 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); 1453 InlinedString Lmsg(msg); 1454 1455 // save all registers for further inspection 1456 save_all_registers(); 1457 1458 ldr_literal(R0, Lmsg); // message 1459 mov(R1, SP); // register save area 1460 1461 #ifdef AARCH64 1462 ldr_literal(Rtemp, Ldebug); 1463 br(Rtemp); 1464 #else 1465 ldr_literal(PC, Ldebug); // call MacroAssembler::debug 1466 #endif // AARCH64 1467 1468 #if defined(COMPILER2) && defined(AARCH64) 1469 int off = offset(); 1470 #endif 1471 bind_literal(Lmsg); 1472 bind_literal(Ldebug); 1473 #if defined(COMPILER2) && defined(AARCH64) 1474 if (offset() - off == 2 * wordSize) { 1475 // no padding, so insert nop for worst-case sizing 1476 nop(); 1477 } 1478 #endif 1479 } 1480 1481 void MacroAssembler::warn(const char* msg) { 1482 #ifdef COMPILER1 1483 if (CommentedAssembly) { 1484 block_comment("warn"); 1485 } 1486 #endif 1487 1488 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); 1489 InlinedString Lmsg(msg); 1490 Label done; 1491 1492 int push_size = save_caller_save_registers(); 1493 1494 #ifdef AARCH64 1495 // TODO-AARCH64 - get rid of extra debug parameters 1496 mov(R1, LR); 1497 mov(R2, FP); 1498 add(R3, SP, push_size); 1499 #endif 1500 1501 ldr_literal(R0, Lmsg); // message 1502 ldr_literal(LR, Lwarn); // call warning 1503 1504 call(LR); 1505 1506 restore_caller_save_registers(); 1507 1508 b(done); 1509 bind_literal(Lmsg); 1510 bind_literal(Lwarn); 1511 bind(done); 1512 } 1513 1514 1515 int MacroAssembler::save_all_registers() { 1516 // This code pattern is matched in NativeIntruction::is_save_all_registers. 1517 // Update it at modifications. 1518 #ifdef AARCH64 1519 const Register tmp = Rtemp; 1520 raw_push(R30, ZR); 1521 for (int i = 28; i >= 0; i -= 2) { 1522 raw_push(as_Register(i), as_Register(i+1)); 1523 } 1524 mov_pc_to(tmp); 1525 str(tmp, Address(SP, 31*wordSize)); 1526 ldr(tmp, Address(SP, tmp->encoding()*wordSize)); 1527 return 32*wordSize; 1528 #else 1529 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); 1530 return 15*wordSize; 1531 #endif // AARCH64 1532 } 1533 1534 void MacroAssembler::restore_all_registers() { 1535 #ifdef AARCH64 1536 for (int i = 0; i <= 28; i += 2) { 1537 raw_pop(as_Register(i), as_Register(i+1)); 1538 } 1539 raw_pop(R30, ZR); 1540 #else 1541 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers 1542 add(SP, SP, wordSize); // discard saved PC 1543 #endif // AARCH64 1544 } 1545 1546 int MacroAssembler::save_caller_save_registers() { 1547 #ifdef AARCH64 1548 for (int i = 0; i <= 16; i += 2) { 1549 raw_push(as_Register(i), as_Register(i+1)); 1550 } 1551 raw_push(R18, LR); 1552 return 20*wordSize; 1553 #else 1554 #if R9_IS_SCRATCHED 1555 // Save also R10 to preserve alignment 1556 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1557 return 8*wordSize; 1558 #else 1559 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1560 return 6*wordSize; 1561 #endif 1562 #endif // AARCH64 1563 } 1564 1565 void MacroAssembler::restore_caller_save_registers() { 1566 #ifdef AARCH64 1567 raw_pop(R18, LR); 1568 for (int i = 16; i >= 0; i -= 2) { 1569 raw_pop(as_Register(i), as_Register(i+1)); 1570 } 1571 #else 1572 #if R9_IS_SCRATCHED 1573 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1574 #else 1575 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1576 #endif 1577 #endif // AARCH64 1578 } 1579 1580 void MacroAssembler::debug(const char* msg, const intx* registers) { 1581 // In order to get locks to work, we need to fake a in_VM state 1582 JavaThread* thread = JavaThread::current(); 1583 thread->set_thread_state(_thread_in_vm); 1584 1585 if (ShowMessageBoxOnError) { 1586 ttyLocker ttyl; 1587 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1588 BytecodeCounter::print(); 1589 } 1590 if (os::message_box(msg, "Execution stopped, print registers?")) { 1591 #ifdef AARCH64 1592 // saved registers: R0-R30, PC 1593 const int nregs = 32; 1594 #else 1595 // saved registers: R0-R12, LR, PC 1596 const int nregs = 15; 1597 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; 1598 #endif // AARCH64 1599 1600 for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { 1601 tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); 1602 } 1603 1604 #ifdef AARCH64 1605 tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); 1606 #endif // AARCH64 1607 1608 // derive original SP value from the address of register save area 1609 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); 1610 } 1611 BREAKPOINT; 1612 } else { 1613 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1614 } 1615 assert(false, "DEBUG MESSAGE: %s", msg); 1616 fatal("%s", msg); // returning from MacroAssembler::debug is not supported 1617 } 1618 1619 void MacroAssembler::unimplemented(const char* what) { 1620 const char* buf = NULL; 1621 { 1622 ResourceMark rm; 1623 stringStream ss; 1624 ss.print("unimplemented: %s", what); 1625 buf = code_string(ss.as_string()); 1626 } 1627 stop(buf); 1628 } 1629 1630 1631 // Implementation of FixedSizeCodeBlock 1632 1633 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : 1634 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { 1635 } 1636 1637 FixedSizeCodeBlock::~FixedSizeCodeBlock() { 1638 if (_enabled) { 1639 address curr_pc = _masm->pc(); 1640 1641 assert(_start < curr_pc, "invalid current pc"); 1642 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); 1643 1644 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; 1645 for (int i = 0; i < nops_count; i++) { 1646 _masm->nop(); 1647 } 1648 } 1649 } 1650 1651 #ifdef AARCH64 1652 1653 // Serializes memory. 1654 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM 1655 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { 1656 if (!os::is_MP()) return; 1657 1658 // TODO-AARCH64 investigate dsb vs dmb effects 1659 if (order_constraint == StoreStore) { 1660 dmb(DMB_st); 1661 } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { 1662 dmb(DMB_ld); 1663 } else { 1664 dmb(DMB_all); 1665 } 1666 } 1667 1668 #else 1669 1670 // Serializes memory. Potentially blows flags and reg. 1671 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) 1672 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. 1673 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. 1674 void MacroAssembler::membar(Membar_mask_bits order_constraint, 1675 Register tmp, 1676 bool preserve_flags, 1677 Register load_tgt) { 1678 if (!os::is_MP()) return; 1679 1680 if (order_constraint == StoreStore) { 1681 dmb(DMB_st, tmp); 1682 } else if ((order_constraint & StoreLoad) || 1683 (order_constraint & LoadLoad) || 1684 (order_constraint & StoreStore) || 1685 (load_tgt == noreg) || 1686 preserve_flags) { 1687 dmb(DMB_all, tmp); 1688 } else { 1689 // LoadStore: speculative stores reordeing is prohibited 1690 1691 // By providing an ordered load target register, we avoid an extra memory load reference 1692 Label not_taken; 1693 bind(not_taken); 1694 cmp(load_tgt, load_tgt); 1695 b(not_taken, ne); 1696 } 1697 } 1698 1699 #endif // AARCH64 1700 1701 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" 1702 // on failure, so fall-through can only mean success. 1703 // "one_shot" controls whether we loop and retry to mitigate spurious failures. 1704 // This is only needed for C2, which for some reason does not rety, 1705 // while C1/interpreter does. 1706 // TODO: measure if it makes a difference 1707 1708 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, 1709 Register base, Register tmp, Label &slow_case, 1710 bool allow_fallthrough_on_failure, bool one_shot) 1711 { 1712 1713 bool fallthrough_is_success = false; 1714 1715 // ARM Litmus Test example does prefetching here. 1716 // TODO: investigate if it helps performance 1717 1718 // The last store was to the displaced header, so to prevent 1719 // reordering we must issue a StoreStore or Release barrier before 1720 // the CAS store. 1721 1722 #ifdef AARCH64 1723 1724 Register Rscratch = tmp; 1725 Register Roop = base; 1726 Register mark = oldval; 1727 Register Rbox = newval; 1728 Label loop; 1729 1730 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1731 1732 // Instead of StoreStore here, we use store-release-exclusive below 1733 1734 bind(loop); 1735 1736 ldaxr(tmp, base); // acquire 1737 cmp(tmp, oldval); 1738 b(slow_case, ne); 1739 stlxr(tmp, newval, base); // release 1740 if (one_shot) { 1741 cmp_w(tmp, 0); 1742 } else { 1743 cbnz_w(tmp, loop); 1744 fallthrough_is_success = true; 1745 } 1746 1747 // MemBarAcquireLock would normally go here, but 1748 // we already do ldaxr+stlxr above, which has 1749 // Sequential Consistency 1750 1751 #else 1752 membar(MacroAssembler::StoreStore, noreg); 1753 1754 if (one_shot) { 1755 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1756 cmp(tmp, oldval); 1757 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1758 cmp(tmp, 0, eq); 1759 } else { 1760 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1761 } 1762 1763 // MemBarAcquireLock barrier 1764 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, 1765 // but that doesn't prevent a load or store from floating up between 1766 // the load and store in the CAS sequence, so play it safe and 1767 // do a full fence. 1768 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); 1769 #endif 1770 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1771 b(slow_case, ne); 1772 } 1773 } 1774 1775 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, 1776 Register base, Register tmp, Label &slow_case, 1777 bool allow_fallthrough_on_failure, bool one_shot) 1778 { 1779 1780 bool fallthrough_is_success = false; 1781 1782 assert_different_registers(oldval,newval,base,tmp); 1783 1784 #ifdef AARCH64 1785 Label loop; 1786 1787 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1788 1789 bind(loop); 1790 ldxr(tmp, base); 1791 cmp(tmp, oldval); 1792 b(slow_case, ne); 1793 // MemBarReleaseLock barrier 1794 stlxr(tmp, newval, base); 1795 if (one_shot) { 1796 cmp_w(tmp, 0); 1797 } else { 1798 cbnz_w(tmp, loop); 1799 fallthrough_is_success = true; 1800 } 1801 #else 1802 // MemBarReleaseLock barrier 1803 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, 1804 // but that doesn't prevent a load or store from floating down between 1805 // the load and store in the CAS sequence, so play it safe and 1806 // do a full fence. 1807 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); 1808 1809 if (one_shot) { 1810 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1811 cmp(tmp, oldval); 1812 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1813 cmp(tmp, 0, eq); 1814 } else { 1815 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1816 } 1817 #endif 1818 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1819 b(slow_case, ne); 1820 } 1821 1822 // ExitEnter 1823 // According to JSR-133 Cookbook, this should be StoreLoad, the same 1824 // barrier that follows volatile store. 1825 // TODO: Should be able to remove on armv8 if volatile loads 1826 // use the load-acquire instruction. 1827 membar(StoreLoad, noreg); 1828 } 1829 1830 #ifndef PRODUCT 1831 1832 // Preserves flags and all registers. 1833 // On SMP the updated value might not be visible to external observers without a sychronization barrier 1834 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { 1835 if (counter_addr != NULL) { 1836 InlinedAddress counter_addr_literal((address)counter_addr); 1837 Label done, retry; 1838 if (cond != al) { 1839 b(done, inverse(cond)); 1840 } 1841 1842 #ifdef AARCH64 1843 raw_push(R0, R1); 1844 raw_push(R2, ZR); 1845 1846 ldr_literal(R0, counter_addr_literal); 1847 1848 bind(retry); 1849 ldxr_w(R1, R0); 1850 add_w(R1, R1, 1); 1851 stxr_w(R2, R1, R0); 1852 cbnz_w(R2, retry); 1853 1854 raw_pop(R2, ZR); 1855 raw_pop(R0, R1); 1856 #else 1857 push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1858 ldr_literal(R0, counter_addr_literal); 1859 1860 mrs(CPSR, Rtemp); 1861 1862 bind(retry); 1863 ldr_s32(R1, Address(R0)); 1864 add(R2, R1, 1); 1865 atomic_cas_bool(R1, R2, R0, 0, R3); 1866 b(retry, ne); 1867 1868 msr(CPSR_fsxc, Rtemp); 1869 1870 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1871 #endif // AARCH64 1872 1873 b(done); 1874 bind_literal(counter_addr_literal); 1875 1876 bind(done); 1877 } 1878 } 1879 1880 #endif // !PRODUCT 1881 1882 1883 // Building block for CAS cases of biased locking: makes CAS and records statistics. 1884 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. 1885 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, 1886 Register tmp, Label& slow_case, int* counter_addr) { 1887 1888 cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); 1889 #ifdef ASSERT 1890 breakpoint(ne); // Fallthrough only on success 1891 #endif 1892 #ifndef PRODUCT 1893 if (counter_addr != NULL) { 1894 cond_atomic_inc32(al, counter_addr); 1895 } 1896 #endif // !PRODUCT 1897 } 1898 1899 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, 1900 bool swap_reg_contains_mark, 1901 Register tmp2, 1902 Label& done, Label& slow_case, 1903 BiasedLockingCounters* counters) { 1904 // obj_reg must be preserved (at least) if the bias locking fails 1905 // tmp_reg is a temporary register 1906 // swap_reg was used as a temporary but contained a value 1907 // that was used afterwards in some call pathes. Callers 1908 // have been fixed so that swap_reg no longer needs to be 1909 // saved. 1910 // Rtemp in no longer scratched 1911 1912 assert(UseBiasedLocking, "why call this otherwise?"); 1913 assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); 1914 guarantee(swap_reg!=tmp_reg, "invariant"); 1915 assert(tmp_reg != noreg, "must supply tmp_reg"); 1916 1917 #ifndef PRODUCT 1918 if (PrintBiasedLockingStatistics && (counters == NULL)) { 1919 counters = BiasedLocking::counters(); 1920 } 1921 #endif 1922 1923 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 1924 Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); 1925 1926 // Biased locking 1927 // See whether the lock is currently biased toward our thread and 1928 // whether the epoch is still valid 1929 // Note that the runtime guarantees sufficient alignment of JavaThread 1930 // pointers to allow age to be placed into low bits 1931 // First check to see whether biasing is even enabled for this object 1932 Label cas_label; 1933 1934 // The null check applies to the mark loading, if we need to load it. 1935 // If the mark has already been loaded in swap_reg then it has already 1936 // been performed and the offset is irrelevant. 1937 int null_check_offset = offset(); 1938 if (!swap_reg_contains_mark) { 1939 ldr(swap_reg, mark_addr); 1940 } 1941 1942 // On MP platform loads could return 'stale' values in some cases. 1943 // That is acceptable since either CAS or slow case path is taken in the worst case. 1944 1945 andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1946 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 1947 1948 b(cas_label, ne); 1949 1950 // The bias pattern is present in the object's header. Need to check 1951 // whether the bias owner and the epoch are both still current. 1952 load_klass(tmp_reg, obj_reg); 1953 ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 1954 orr(tmp_reg, tmp_reg, Rthread); 1955 eor(tmp_reg, tmp_reg, swap_reg); 1956 1957 #ifdef AARCH64 1958 ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); 1959 #else 1960 bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); 1961 #endif // AARCH64 1962 1963 #ifndef PRODUCT 1964 if (counters != NULL) { 1965 cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); 1966 } 1967 #endif // !PRODUCT 1968 1969 b(done, eq); 1970 1971 Label try_revoke_bias; 1972 Label try_rebias; 1973 1974 // At this point we know that the header has the bias pattern and 1975 // that we are not the bias owner in the current epoch. We need to 1976 // figure out more details about the state of the header in order to 1977 // know what operations can be legally performed on the object's 1978 // header. 1979 1980 // If the low three bits in the xor result aren't clear, that means 1981 // the prototype header is no longer biased and we have to revoke 1982 // the bias on this object. 1983 tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 1984 b(try_revoke_bias, ne); 1985 1986 // Biasing is still enabled for this data type. See whether the 1987 // epoch of the current bias is still valid, meaning that the epoch 1988 // bits of the mark word are equal to the epoch bits of the 1989 // prototype header. (Note that the prototype header's epoch bits 1990 // only change at a safepoint.) If not, attempt to rebias the object 1991 // toward the current thread. Note that we must be absolutely sure 1992 // that the current epoch is invalid in order to do this because 1993 // otherwise the manipulations it performs on the mark word are 1994 // illegal. 1995 tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); 1996 b(try_rebias, ne); 1997 1998 // tmp_reg has the age, epoch and pattern bits cleared 1999 // The remaining (owner) bits are (Thread ^ current_owner) 2000 2001 // The epoch of the current bias is still valid but we know nothing 2002 // about the owner; it might be set or it might be clear. Try to 2003 // acquire the bias of the object using an atomic operation. If this 2004 // fails we will go in to the runtime to revoke the object's bias. 2005 // Note that we first construct the presumed unbiased header so we 2006 // don't accidentally blow away another thread's valid bias. 2007 2008 // Note that we know the owner is not ourself. Hence, success can 2009 // only happen when the owner bits is 0 2010 2011 #ifdef AARCH64 2012 // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has 2013 // cleared bit in the middle (cms bit). So it is loaded with separate instruction. 2014 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2015 andr(swap_reg, swap_reg, tmp2); 2016 #else 2017 // until the assembler can be made smarter, we need to make some assumptions about the values 2018 // so we can optimize this: 2019 assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); 2020 2021 mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); 2022 mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) 2023 #endif // AARCH64 2024 2025 orr(tmp_reg, swap_reg, Rthread); // new mark 2026 2027 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2028 (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); 2029 2030 // If the biasing toward our thread failed, this means that 2031 // another thread succeeded in biasing it toward itself and we 2032 // need to revoke that bias. The revocation will occur in the 2033 // interpreter runtime in the slow case. 2034 2035 b(done); 2036 2037 bind(try_rebias); 2038 2039 // At this point we know the epoch has expired, meaning that the 2040 // current "bias owner", if any, is actually invalid. Under these 2041 // circumstances _only_, we are allowed to use the current header's 2042 // value as the comparison value when doing the cas to acquire the 2043 // bias in the current epoch. In other words, we allow transfer of 2044 // the bias from one thread to another directly in this situation. 2045 2046 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2047 2048 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2049 2050 // owner bits 'random'. Set them to Rthread. 2051 #ifdef AARCH64 2052 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2053 andr(tmp_reg, tmp_reg, tmp2); 2054 #else 2055 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2056 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2057 #endif // AARCH64 2058 2059 orr(tmp_reg, tmp_reg, Rthread); // new mark 2060 2061 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2062 (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); 2063 2064 // If the biasing toward our thread failed, then another thread 2065 // succeeded in biasing it toward itself and we need to revoke that 2066 // bias. The revocation will occur in the runtime in the slow case. 2067 2068 b(done); 2069 2070 bind(try_revoke_bias); 2071 2072 // The prototype mark in the klass doesn't have the bias bit set any 2073 // more, indicating that objects of this data type are not supposed 2074 // to be biased any more. We are going to try to reset the mark of 2075 // this object to the prototype value and fall through to the 2076 // CAS-based locking scheme. Note that if our CAS fails, it means 2077 // that another thread raced us for the privilege of revoking the 2078 // bias of this particular object, so it's okay to continue in the 2079 // normal locking code. 2080 2081 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2082 2083 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2084 2085 // owner bits 'random'. Clear them 2086 #ifdef AARCH64 2087 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2088 andr(tmp_reg, tmp_reg, tmp2); 2089 #else 2090 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2091 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2092 #endif // AARCH64 2093 2094 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, 2095 (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); 2096 2097 // Fall through to the normal CAS-based lock, because no matter what 2098 // the result of the above CAS, some thread must have succeeded in 2099 // removing the bias bit from the object's header. 2100 2101 bind(cas_label); 2102 2103 return null_check_offset; 2104 } 2105 2106 2107 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { 2108 assert(UseBiasedLocking, "why call this otherwise?"); 2109 2110 // Check for biased locking unlock case, which is a no-op 2111 // Note: we do not have to check the thread ID for two reasons. 2112 // First, the interpreter checks for IllegalMonitorStateException at 2113 // a higher level. Second, if the bias was revoked while we held the 2114 // lock, the object could not be rebiased toward another thread, so 2115 // the bias bit would be clear. 2116 ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2117 2118 andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2119 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 2120 b(done, eq); 2121 } 2122 2123 2124 void MacroAssembler::resolve_jobject(Register value, 2125 Register tmp1, 2126 Register tmp2) { 2127 assert_different_registers(value, tmp1, tmp2); 2128 Label done, not_weak; 2129 cbz(value, done); // Use NULL as-is. 2130 STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); 2131 tbz(value, 0, not_weak); // Test for jweak tag. 2132 // Resolve jweak. 2133 ldr(value, Address(value, -JNIHandles::weak_tag_value)); 2134 verify_oop(value); 2135 #if INCLUDE_ALL_GCS 2136 if (UseG1GC) { 2137 g1_write_barrier_pre(noreg, // store_addr 2138 noreg, // new_val 2139 value, // pre_val 2140 tmp1, // tmp1 2141 tmp2); // tmp2 2142 } 2143 #endif // INCLUDE_ALL_GCS 2144 b(done); 2145 bind(not_weak); 2146 // Resolve (untagged) jobject. 2147 ldr(value, Address(value)); 2148 verify_oop(value); 2149 bind(done); 2150 } 2151 2152 2153 ////////////////////////////////////////////////////////////////////////////////// 2154 2155 #if INCLUDE_ALL_GCS 2156 2157 // G1 pre-barrier. 2158 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 2159 // If store_addr != noreg, then previous value is loaded from [store_addr]; 2160 // in such case store_addr and new_val registers are preserved; 2161 // otherwise pre_val register is preserved. 2162 void MacroAssembler::g1_write_barrier_pre(Register store_addr, 2163 Register new_val, 2164 Register pre_val, 2165 Register tmp1, 2166 Register tmp2) { 2167 Label done; 2168 Label runtime; 2169 2170 if (store_addr != noreg) { 2171 assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); 2172 } else { 2173 assert (new_val == noreg, "should be"); 2174 assert_different_registers(pre_val, tmp1, tmp2, noreg); 2175 } 2176 2177 Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 2178 SATBMarkQueue::byte_offset_of_active())); 2179 Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 2180 SATBMarkQueue::byte_offset_of_index())); 2181 Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 2182 SATBMarkQueue::byte_offset_of_buf())); 2183 2184 // Is marking active? 2185 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); 2186 ldrb(tmp1, in_progress); 2187 cbz(tmp1, done); 2188 2189 // Do we need to load the previous value? 2190 if (store_addr != noreg) { 2191 load_heap_oop(pre_val, Address(store_addr, 0)); 2192 } 2193 2194 // Is the previous value null? 2195 cbz(pre_val, done); 2196 2197 // Can we store original value in the thread's buffer? 2198 // Is index == 0? 2199 // (The index field is typed as size_t.) 2200 2201 ldr(tmp1, index); // tmp1 := *index_adr 2202 ldr(tmp2, buffer); 2203 2204 subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize 2205 b(runtime, lt); // If negative, goto runtime 2206 2207 str(tmp1, index); // *index_adr := tmp1 2208 2209 // Record the previous value 2210 str(pre_val, Address(tmp2, tmp1)); 2211 b(done); 2212 2213 bind(runtime); 2214 2215 // save the live input values 2216 #ifdef AARCH64 2217 if (store_addr != noreg) { 2218 raw_push(store_addr, new_val); 2219 } else { 2220 raw_push(pre_val, ZR); 2221 } 2222 #else 2223 if (store_addr != noreg) { 2224 // avoid raw_push to support any ordering of store_addr and new_val 2225 push(RegisterSet(store_addr) | RegisterSet(new_val)); 2226 } else { 2227 push(pre_val); 2228 } 2229 #endif // AARCH64 2230 2231 if (pre_val != R0) { 2232 mov(R0, pre_val); 2233 } 2234 mov(R1, Rthread); 2235 2236 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); 2237 2238 #ifdef AARCH64 2239 if (store_addr != noreg) { 2240 raw_pop(store_addr, new_val); 2241 } else { 2242 raw_pop(pre_val, ZR); 2243 } 2244 #else 2245 if (store_addr != noreg) { 2246 pop(RegisterSet(store_addr) | RegisterSet(new_val)); 2247 } else { 2248 pop(pre_val); 2249 } 2250 #endif // AARCH64 2251 2252 bind(done); 2253 } 2254 2255 // G1 post-barrier. 2256 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 2257 void MacroAssembler::g1_write_barrier_post(Register store_addr, 2258 Register new_val, 2259 Register tmp1, 2260 Register tmp2, 2261 Register tmp3) { 2262 2263 Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + 2264 DirtyCardQueue::byte_offset_of_index())); 2265 Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + 2266 DirtyCardQueue::byte_offset_of_buf())); 2267 2268 BarrierSet* bs = Universe::heap()->barrier_set(); 2269 CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs); 2270 CardTable* ct = ctbs->card_table(); 2271 Label done; 2272 Label runtime; 2273 2274 // Does store cross heap regions? 2275 2276 eor(tmp1, store_addr, new_val); 2277 #ifdef AARCH64 2278 logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); 2279 cbz(tmp1, done); 2280 #else 2281 movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); 2282 b(done, eq); 2283 #endif 2284 2285 // crosses regions, storing NULL? 2286 2287 cbz(new_val, done); 2288 2289 // storing region crossing non-NULL, is card already dirty? 2290 const Register card_addr = tmp1; 2291 assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); 2292 2293 mov_address(tmp2, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference); 2294 add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift)); 2295 2296 ldrb(tmp2, Address(card_addr)); 2297 cmp(tmp2, (int)G1CardTable::g1_young_card_val()); 2298 b(done, eq); 2299 2300 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); 2301 2302 assert(CardTable::dirty_card_val() == 0, "adjust this code"); 2303 ldrb(tmp2, Address(card_addr)); 2304 cbz(tmp2, done); 2305 2306 // storing a region crossing, non-NULL oop, card is clean. 2307 // dirty card and log. 2308 2309 strb(zero_register(tmp2), Address(card_addr)); 2310 2311 ldr(tmp2, queue_index); 2312 ldr(tmp3, buffer); 2313 2314 subs(tmp2, tmp2, wordSize); 2315 b(runtime, lt); // go to runtime if now negative 2316 2317 str(tmp2, queue_index); 2318 2319 str(card_addr, Address(tmp3, tmp2)); 2320 b(done); 2321 2322 bind(runtime); 2323 2324 if (card_addr != R0) { 2325 mov(R0, card_addr); 2326 } 2327 mov(R1, Rthread); 2328 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); 2329 2330 bind(done); 2331 } 2332 2333 #endif // INCLUDE_ALL_GCS 2334 2335 ////////////////////////////////////////////////////////////////////////////////// 2336 2337 #ifdef AARCH64 2338 2339 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2340 switch (size_in_bytes) { 2341 case 8: ldr(dst, src); break; 2342 case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; 2343 case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; 2344 case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; 2345 default: ShouldNotReachHere(); 2346 } 2347 } 2348 2349 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 2350 switch (size_in_bytes) { 2351 case 8: str(src, dst); break; 2352 case 4: str_32(src, dst); break; 2353 case 2: strh(src, dst); break; 2354 case 1: strb(src, dst); break; 2355 default: ShouldNotReachHere(); 2356 } 2357 } 2358 2359 #else 2360 2361 void MacroAssembler::load_sized_value(Register dst, Address src, 2362 size_t size_in_bytes, bool is_signed, AsmCondition cond) { 2363 switch (size_in_bytes) { 2364 case 4: ldr(dst, src, cond); break; 2365 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; 2366 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; 2367 default: ShouldNotReachHere(); 2368 } 2369 } 2370 2371 2372 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { 2373 switch (size_in_bytes) { 2374 case 4: str(src, dst, cond); break; 2375 case 2: strh(src, dst, cond); break; 2376 case 1: strb(src, dst, cond); break; 2377 default: ShouldNotReachHere(); 2378 } 2379 } 2380 #endif // AARCH64 2381 2382 // Look up the method for a megamorphic invokeinterface call. 2383 // The target method is determined by <Rinterf, Rindex>. 2384 // The receiver klass is in Rklass. 2385 // On success, the result will be in method_result, and execution falls through. 2386 // On failure, execution transfers to the given label. 2387 void MacroAssembler::lookup_interface_method(Register Rklass, 2388 Register Rintf, 2389 RegisterOrConstant itable_index, 2390 Register method_result, 2391 Register Rscan, 2392 Register Rtmp, 2393 Label& L_no_such_interface) { 2394 2395 assert_different_registers(Rklass, Rintf, Rscan, Rtmp); 2396 2397 const int entry_size = itableOffsetEntry::size() * HeapWordSize; 2398 assert(itableOffsetEntry::interface_offset_in_bytes() == 0, "not added for convenience"); 2399 2400 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 2401 const int base = in_bytes(Klass::vtable_start_offset()); 2402 const int scale = exact_log2(vtableEntry::size_in_bytes()); 2403 ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable 2404 add(Rscan, Rklass, base); 2405 add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale)); 2406 2407 // Search through the itable for an interface equal to incoming Rintf 2408 // itable looks like [intface][offset][intface][offset][intface][offset] 2409 2410 Label loop; 2411 bind(loop); 2412 ldr(Rtmp, Address(Rscan, entry_size, post_indexed)); 2413 #ifdef AARCH64 2414 Label found; 2415 cmp(Rtmp, Rintf); 2416 b(found, eq); 2417 cbnz(Rtmp, loop); 2418 #else 2419 cmp(Rtmp, Rintf); // set ZF and CF if interface is found 2420 cmn(Rtmp, 0, ne); // check if tmp == 0 and clear CF if it is 2421 b(loop, ne); 2422 #endif // AARCH64 2423 2424 #ifdef AARCH64 2425 b(L_no_such_interface); 2426 bind(found); 2427 #else 2428 // CF == 0 means we reached the end of itable without finding icklass 2429 b(L_no_such_interface, cc); 2430 #endif // !AARCH64 2431 2432 if (method_result != noreg) { 2433 // Interface found at previous position of Rscan, now load the method 2434 ldr_s32(Rtmp, Address(Rscan, itableOffsetEntry::offset_offset_in_bytes() - entry_size)); 2435 if (itable_index.is_register()) { 2436 add(Rtmp, Rtmp, Rklass); // Add offset to Klass* 2437 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); 2438 assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); 2439 ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register())); 2440 } else { 2441 int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() + 2442 itableMethodEntry::method_offset_in_bytes(); 2443 add_slow(method_result, Rklass, method_offset); 2444 ldr(method_result, Address(method_result, Rtmp)); 2445 } 2446 } 2447 } 2448 2449 #ifdef COMPILER2 2450 // TODO: 8 bytes at a time? pre-fetch? 2451 // Compare char[] arrays aligned to 4 bytes. 2452 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, 2453 Register limit, Register result, 2454 Register chr1, Register chr2, Label& Ldone) { 2455 Label Lvector, Lloop; 2456 2457 // Note: limit contains number of bytes (2*char_elements) != 0. 2458 tst(limit, 0x2); // trailing character ? 2459 b(Lvector, eq); 2460 2461 // compare the trailing char 2462 sub(limit, limit, sizeof(jchar)); 2463 ldrh(chr1, Address(ary1, limit)); 2464 ldrh(chr2, Address(ary2, limit)); 2465 cmp(chr1, chr2); 2466 mov(result, 0, ne); // not equal 2467 b(Ldone, ne); 2468 2469 // only one char ? 2470 tst(limit, limit); 2471 mov(result, 1, eq); 2472 b(Ldone, eq); 2473 2474 // word by word compare, dont't need alignment check 2475 bind(Lvector); 2476 2477 // Shift ary1 and ary2 to the end of the arrays, negate limit 2478 add(ary1, limit, ary1); 2479 add(ary2, limit, ary2); 2480 neg(limit, limit); 2481 2482 bind(Lloop); 2483 ldr_u32(chr1, Address(ary1, limit)); 2484 ldr_u32(chr2, Address(ary2, limit)); 2485 cmp_32(chr1, chr2); 2486 mov(result, 0, ne); // not equal 2487 b(Ldone, ne); 2488 adds(limit, limit, 2*sizeof(jchar)); 2489 b(Lloop, ne); 2490 2491 // Caller should set it: 2492 // mov(result_reg, 1); //equal 2493 } 2494 #endif 2495 2496 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { 2497 mov_slow(tmpreg1, counter_addr); 2498 ldr_s32(tmpreg2, tmpreg1); 2499 add_32(tmpreg2, tmpreg2, 1); 2500 str_32(tmpreg2, tmpreg1); 2501 } 2502 2503 void MacroAssembler::floating_cmp(Register dst) { 2504 #ifdef AARCH64 2505 NOT_TESTED(); 2506 cset(dst, gt); // 1 if '>', else 0 2507 csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 2508 #else 2509 vmrs(dst, FPSCR); 2510 orr(dst, dst, 0x08000000); 2511 eor(dst, dst, AsmOperand(dst, lsl, 3)); 2512 mov(dst, AsmOperand(dst, asr, 30)); 2513 #endif 2514 } 2515 2516 void MacroAssembler::restore_default_fp_mode() { 2517 #ifdef AARCH64 2518 msr(SysReg_FPCR, ZR); 2519 #else 2520 #ifndef __SOFTFP__ 2521 // Round to Near mode, IEEE compatible, masked exceptions 2522 mov(Rtemp, 0); 2523 vmsr(FPSCR, Rtemp); 2524 #endif // !__SOFTFP__ 2525 #endif // AARCH64 2526 } 2527 2528 #ifndef AARCH64 2529 // 24-bit word range == 26-bit byte range 2530 bool check26(int offset) { 2531 // this could be simplified, but it mimics encoding and decoding 2532 // an actual branch insrtuction 2533 int off1 = offset << 6 >> 8; 2534 int encoded = off1 & ((1<<24)-1); 2535 int decoded = encoded << 8 >> 6; 2536 return offset == decoded; 2537 } 2538 #endif // !AARCH64 2539 2540 // Perform some slight adjustments so the default 32MB code cache 2541 // is fully reachable. 2542 static inline address first_cache_address() { 2543 return CodeCache::low_bound() + sizeof(HeapBlock::Header); 2544 } 2545 static inline address last_cache_address() { 2546 return CodeCache::high_bound() - Assembler::InstructionSize; 2547 } 2548 2549 #ifdef AARCH64 2550 // Can we reach target using ADRP? 2551 bool MacroAssembler::page_reachable_from_cache(address target) { 2552 intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; 2553 intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; 2554 intptr_t addr = (intptr_t)target & ~0xfff; 2555 2556 intptr_t loffset = addr - cl; 2557 intptr_t hoffset = addr - ch; 2558 return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); 2559 } 2560 #endif 2561 2562 // Can we reach target using unconditional branch or call from anywhere 2563 // in the code cache (because code can be relocated)? 2564 bool MacroAssembler::_reachable_from_cache(address target) { 2565 #ifdef __thumb__ 2566 if ((1 & (intptr_t)target) != 0) { 2567 // Return false to avoid 'b' if we need switching to THUMB mode. 2568 return false; 2569 } 2570 #endif 2571 2572 address cl = first_cache_address(); 2573 address ch = last_cache_address(); 2574 2575 if (ForceUnreachable) { 2576 // Only addresses from CodeCache can be treated as reachable. 2577 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { 2578 return false; 2579 } 2580 } 2581 2582 intptr_t loffset = (intptr_t)target - (intptr_t)cl; 2583 intptr_t hoffset = (intptr_t)target - (intptr_t)ch; 2584 2585 #ifdef AARCH64 2586 return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); 2587 #else 2588 return check26(loffset - 8) && check26(hoffset - 8); 2589 #endif 2590 } 2591 2592 bool MacroAssembler::reachable_from_cache(address target) { 2593 assert(CodeCache::contains(pc()), "not supported"); 2594 return _reachable_from_cache(target); 2595 } 2596 2597 // Can we reach the entire code cache from anywhere else in the code cache? 2598 bool MacroAssembler::_cache_fully_reachable() { 2599 address cl = first_cache_address(); 2600 address ch = last_cache_address(); 2601 return _reachable_from_cache(cl) && _reachable_from_cache(ch); 2602 } 2603 2604 bool MacroAssembler::cache_fully_reachable() { 2605 assert(CodeCache::contains(pc()), "not supported"); 2606 return _cache_fully_reachable(); 2607 } 2608 2609 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2610 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2611 if (reachable_from_cache(target)) { 2612 relocate(rtype); 2613 b(target NOT_AARCH64_ARG(cond)); 2614 return; 2615 } 2616 2617 // Note: relocate is not needed for the code below, 2618 // encoding targets in absolute format. 2619 if (ignore_non_patchable_relocations()) { 2620 rtype = relocInfo::none; 2621 } 2622 2623 #ifdef AARCH64 2624 assert (scratch != noreg, "should be specified"); 2625 InlinedAddress address_literal(target, rtype); 2626 ldr_literal(scratch, address_literal); 2627 br(scratch); 2628 int off = offset(); 2629 bind_literal(address_literal); 2630 #ifdef COMPILER2 2631 if (offset() - off == wordSize) { 2632 // no padding, so insert nop for worst-case sizing 2633 nop(); 2634 } 2635 #endif 2636 #else 2637 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { 2638 // Note: this version cannot be (atomically) patched 2639 mov_slow(scratch, (intptr_t)target, cond); 2640 bx(scratch, cond); 2641 } else { 2642 Label skip; 2643 InlinedAddress address_literal(target); 2644 if (cond != al) { 2645 b(skip, inverse(cond)); 2646 } 2647 relocate(rtype); 2648 ldr_literal(PC, address_literal); 2649 bind_literal(address_literal); 2650 bind(skip); 2651 } 2652 #endif // AARCH64 2653 } 2654 2655 // Similar to jump except that: 2656 // - near calls are valid only if any destination in the cache is near 2657 // - no movt/movw (not atomically patchable) 2658 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2659 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2660 if (cache_fully_reachable()) { 2661 // Note: this assumes that all possible targets (the initial one 2662 // and the addressed patched to) are all in the code cache. 2663 assert(CodeCache::contains(target), "target might be too far"); 2664 relocate(rtype); 2665 b(target NOT_AARCH64_ARG(cond)); 2666 return; 2667 } 2668 2669 // Discard the relocation information if not needed for CacheCompiledCode 2670 // since the next encodings are all in absolute format. 2671 if (ignore_non_patchable_relocations()) { 2672 rtype = relocInfo::none; 2673 } 2674 2675 #ifdef AARCH64 2676 assert (scratch != noreg, "should be specified"); 2677 InlinedAddress address_literal(target); 2678 relocate(rtype); 2679 ldr_literal(scratch, address_literal); 2680 br(scratch); 2681 int off = offset(); 2682 bind_literal(address_literal); 2683 #ifdef COMPILER2 2684 if (offset() - off == wordSize) { 2685 // no padding, so insert nop for worst-case sizing 2686 nop(); 2687 } 2688 #endif 2689 #else 2690 { 2691 Label skip; 2692 InlinedAddress address_literal(target); 2693 if (cond != al) { 2694 b(skip, inverse(cond)); 2695 } 2696 relocate(rtype); 2697 ldr_literal(PC, address_literal); 2698 bind_literal(address_literal); 2699 bind(skip); 2700 } 2701 #endif // AARCH64 2702 } 2703 2704 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { 2705 Register scratch = LR; 2706 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); 2707 if (reachable_from_cache(target)) { 2708 relocate(rspec); 2709 bl(target NOT_AARCH64_ARG(cond)); 2710 return; 2711 } 2712 2713 // Note: relocate is not needed for the code below, 2714 // encoding targets in absolute format. 2715 if (ignore_non_patchable_relocations()) { 2716 // This assumes the information was needed only for relocating the code. 2717 rspec = RelocationHolder::none; 2718 } 2719 2720 #ifndef AARCH64 2721 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { 2722 // Note: this version cannot be (atomically) patched 2723 mov_slow(scratch, (intptr_t)target, cond); 2724 blx(scratch, cond); 2725 return; 2726 } 2727 #endif 2728 2729 { 2730 Label ret_addr; 2731 #ifndef AARCH64 2732 if (cond != al) { 2733 b(ret_addr, inverse(cond)); 2734 } 2735 #endif 2736 2737 2738 #ifdef AARCH64 2739 // TODO-AARCH64: make more optimal implementation 2740 // [ Keep in sync with MacroAssembler::call_size ] 2741 assert(rspec.type() == relocInfo::none, "call reloc not implemented"); 2742 mov_slow(scratch, target); 2743 blr(scratch); 2744 #else 2745 InlinedAddress address_literal(target); 2746 relocate(rspec); 2747 adr(LR, ret_addr); 2748 ldr_literal(PC, address_literal); 2749 2750 bind_literal(address_literal); 2751 bind(ret_addr); 2752 #endif 2753 } 2754 } 2755 2756 #if defined(AARCH64) && defined(COMPILER2) 2757 int MacroAssembler::call_size(address target, bool far, bool patchable) { 2758 // FIXME: mov_slow is variable-length 2759 if (!far) return 1; // bl 2760 if (patchable) return 2; // ldr; blr 2761 return instr_count_for_mov_slow((intptr_t)target) + 1; 2762 } 2763 #endif 2764 2765 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { 2766 assert(rspec.type() == relocInfo::static_call_type || 2767 rspec.type() == relocInfo::none || 2768 rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); 2769 2770 // Always generate the relocation information, needed for patching 2771 relocate(rspec); // used by NativeCall::is_call_before() 2772 if (cache_fully_reachable()) { 2773 // Note: this assumes that all possible targets (the initial one 2774 // and the addresses patched to) are all in the code cache. 2775 assert(CodeCache::contains(target), "target might be too far"); 2776 bl(target); 2777 } else { 2778 #if defined(AARCH64) && defined(COMPILER2) 2779 if (c2) { 2780 // return address needs to match call_size(). 2781 // no need to trash Rtemp 2782 int off = offset(); 2783 Label skip_literal; 2784 InlinedAddress address_literal(target); 2785 ldr_literal(LR, address_literal); 2786 blr(LR); 2787 int ret_addr_offset = offset(); 2788 assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); 2789 b(skip_literal); 2790 int off2 = offset(); 2791 bind_literal(address_literal); 2792 if (offset() - off2 == wordSize) { 2793 // no padding, so insert nop for worst-case sizing 2794 nop(); 2795 } 2796 bind(skip_literal); 2797 return ret_addr_offset; 2798 } 2799 #endif 2800 Label ret_addr; 2801 InlinedAddress address_literal(target); 2802 #ifdef AARCH64 2803 ldr_literal(Rtemp, address_literal); 2804 adr(LR, ret_addr); 2805 br(Rtemp); 2806 #else 2807 adr(LR, ret_addr); 2808 ldr_literal(PC, address_literal); 2809 #endif 2810 bind_literal(address_literal); 2811 bind(ret_addr); 2812 } 2813 return offset(); 2814 } 2815 2816 // ((OopHandle)result).resolve(); 2817 void MacroAssembler::resolve_oop_handle(Register result) { 2818 // OopHandle::resolve is an indirection. 2819 ldr(result, Address(result, 0)); 2820 } 2821 2822 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { 2823 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2824 ldr(tmp, Address(method, Method::const_offset())); 2825 ldr(tmp, Address(tmp, ConstMethod::constants_offset())); 2826 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); 2827 ldr(mirror, Address(tmp, mirror_offset)); 2828 resolve_oop_handle(mirror); 2829 } 2830 2831 2832 /////////////////////////////////////////////////////////////////////////////// 2833 2834 // Compressed pointers 2835 2836 #ifdef AARCH64 2837 2838 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { 2839 if (UseCompressedClassPointers) { 2840 ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2841 decode_klass_not_null(dst_klass); 2842 } else { 2843 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2844 } 2845 } 2846 2847 #else 2848 2849 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { 2850 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); 2851 } 2852 2853 #endif // AARCH64 2854 2855 // Blows src_klass. 2856 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { 2857 #ifdef AARCH64 2858 if (UseCompressedClassPointers) { 2859 assert(src_klass != dst_oop, "not enough registers"); 2860 encode_klass_not_null(src_klass); 2861 str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2862 return; 2863 } 2864 #endif // AARCH64 2865 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2866 } 2867 2868 #ifdef AARCH64 2869 2870 void MacroAssembler::store_klass_gap(Register dst) { 2871 if (UseCompressedClassPointers) { 2872 str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2873 } 2874 } 2875 2876 #endif // AARCH64 2877 2878 2879 void MacroAssembler::load_heap_oop(Register dst, Address src) { 2880 #ifdef AARCH64 2881 if (UseCompressedOops) { 2882 ldr_w(dst, src); 2883 decode_heap_oop(dst); 2884 return; 2885 } 2886 #endif // AARCH64 2887 ldr(dst, src); 2888 } 2889 2890 // Blows src and flags. 2891 void MacroAssembler::store_heap_oop(Register src, Address dst) { 2892 #ifdef AARCH64 2893 if (UseCompressedOops) { 2894 assert(!dst.uses(src), "not enough registers"); 2895 encode_heap_oop(src); 2896 str_w(src, dst); 2897 return; 2898 } 2899 #endif // AARCH64 2900 str(src, dst); 2901 } 2902 2903 void MacroAssembler::store_heap_oop_null(Register src, Address dst) { 2904 #ifdef AARCH64 2905 if (UseCompressedOops) { 2906 str_w(src, dst); 2907 return; 2908 } 2909 #endif // AARCH64 2910 str(src, dst); 2911 } 2912 2913 2914 #ifdef AARCH64 2915 2916 // Algorithm must match oop.inline.hpp encode_heap_oop. 2917 void MacroAssembler::encode_heap_oop(Register dst, Register src) { 2918 // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. 2919 // Update it at modifications. 2920 assert (UseCompressedOops, "must be compressed"); 2921 assert (Universe::heap() != NULL, "java heap should be initialized"); 2922 #ifdef ASSERT 2923 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2924 #endif 2925 verify_oop(src); 2926 if (Universe::narrow_oop_base() == NULL) { 2927 if (Universe::narrow_oop_shift() != 0) { 2928 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2929 _lsr(dst, src, Universe::narrow_oop_shift()); 2930 } else if (dst != src) { 2931 mov(dst, src); 2932 } 2933 } else { 2934 tst(src, src); 2935 csel(dst, Rheap_base, src, eq); 2936 sub(dst, dst, Rheap_base); 2937 if (Universe::narrow_oop_shift() != 0) { 2938 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2939 _lsr(dst, dst, Universe::narrow_oop_shift()); 2940 } 2941 } 2942 } 2943 2944 // Same algorithm as oop.inline.hpp decode_heap_oop. 2945 void MacroAssembler::decode_heap_oop(Register dst, Register src) { 2946 #ifdef ASSERT 2947 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2948 #endif 2949 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2950 if (Universe::narrow_oop_base() != NULL) { 2951 tst(src, src); 2952 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2953 csel(dst, dst, ZR, ne); 2954 } else { 2955 _lsl(dst, src, Universe::narrow_oop_shift()); 2956 } 2957 verify_oop(dst); 2958 } 2959 2960 #ifdef COMPILER2 2961 // Algorithm must match oop.inline.hpp encode_heap_oop. 2962 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule 2963 // must be changed. 2964 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2965 assert (UseCompressedOops, "must be compressed"); 2966 assert (Universe::heap() != NULL, "java heap should be initialized"); 2967 #ifdef ASSERT 2968 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2969 #endif 2970 verify_oop(src); 2971 if (Universe::narrow_oop_base() == NULL) { 2972 if (Universe::narrow_oop_shift() != 0) { 2973 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2974 _lsr(dst, src, Universe::narrow_oop_shift()); 2975 } else if (dst != src) { 2976 mov(dst, src); 2977 } 2978 } else { 2979 sub(dst, src, Rheap_base); 2980 if (Universe::narrow_oop_shift() != 0) { 2981 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2982 _lsr(dst, dst, Universe::narrow_oop_shift()); 2983 } 2984 } 2985 } 2986 2987 // Same algorithm as oops.inline.hpp decode_heap_oop. 2988 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule 2989 // must be changed. 2990 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 2991 #ifdef ASSERT 2992 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 2993 #endif 2994 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2995 if (Universe::narrow_oop_base() != NULL) { 2996 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 2997 } else { 2998 _lsl(dst, src, Universe::narrow_oop_shift()); 2999 } 3000 verify_oop(dst); 3001 } 3002 3003 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3004 assert(UseCompressedClassPointers, "should only be used for compressed header"); 3005 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3006 int klass_index = oop_recorder()->find_index(k); 3007 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 3008 3009 // Relocation with special format (see relocInfo_arm.hpp). 3010 relocate(rspec); 3011 narrowKlass encoded_k = Klass::encode_klass(k); 3012 movz(dst, encoded_k & 0xffff, 0); 3013 movk(dst, (encoded_k >> 16) & 0xffff, 16); 3014 } 3015 3016 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3017 assert(UseCompressedOops, "should only be used for compressed header"); 3018 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3019 int oop_index = oop_recorder()->find_index(obj); 3020 RelocationHolder rspec = oop_Relocation::spec(oop_index); 3021 3022 relocate(rspec); 3023 movz(dst, 0xffff, 0); 3024 movk(dst, 0xffff, 16); 3025 } 3026 3027 #endif // COMPILER2 3028 // Must preserve condition codes, or C2 encodeKlass_not_null rule 3029 // must be changed. 3030 void MacroAssembler::encode_klass_not_null(Register r) { 3031 if (Universe::narrow_klass_base() != NULL) { 3032 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 3033 assert(r != Rheap_base, "Encoding a klass in Rheap_base"); 3034 mov_slow(Rheap_base, Universe::narrow_klass_base()); 3035 sub(r, r, Rheap_base); 3036 } 3037 if (Universe::narrow_klass_shift() != 0) { 3038 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3039 _lsr(r, r, Universe::narrow_klass_shift()); 3040 } 3041 if (Universe::narrow_klass_base() != NULL) { 3042 reinit_heapbase(); 3043 } 3044 } 3045 3046 // Must preserve condition codes, or C2 encodeKlass_not_null rule 3047 // must be changed. 3048 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3049 if (dst == src) { 3050 encode_klass_not_null(src); 3051 return; 3052 } 3053 if (Universe::narrow_klass_base() != NULL) { 3054 mov_slow(dst, (int64_t)Universe::narrow_klass_base()); 3055 sub(dst, src, dst); 3056 if (Universe::narrow_klass_shift() != 0) { 3057 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3058 _lsr(dst, dst, Universe::narrow_klass_shift()); 3059 } 3060 } else { 3061 if (Universe::narrow_klass_shift() != 0) { 3062 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3063 _lsr(dst, src, Universe::narrow_klass_shift()); 3064 } else { 3065 mov(dst, src); 3066 } 3067 } 3068 } 3069 3070 // Function instr_count_for_decode_klass_not_null() counts the instructions 3071 // generated by decode_klass_not_null(register r) and reinit_heapbase(), 3072 // when (Universe::heap() != NULL). Hence, if the instructions they 3073 // generate change, then this method needs to be updated. 3074 int MacroAssembler::instr_count_for_decode_klass_not_null() { 3075 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3076 assert(Universe::heap() != NULL, "java heap should be initialized"); 3077 if (Universe::narrow_klass_base() != NULL) { 3078 return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow 3079 1 + // add 3080 instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow 3081 } else { 3082 if (Universe::narrow_klass_shift() != 0) { 3083 return 1; 3084 } 3085 } 3086 return 0; 3087 } 3088 3089 // Must preserve condition codes, or C2 decodeKlass_not_null rule 3090 // must be changed. 3091 void MacroAssembler::decode_klass_not_null(Register r) { 3092 int off = offset(); 3093 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 3094 assert(Universe::heap() != NULL, "java heap should be initialized"); 3095 assert(r != Rheap_base, "Decoding a klass in Rheap_base"); 3096 // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. 3097 // Also do not verify_oop as this is called by verify_oop. 3098 if (Universe::narrow_klass_base() != NULL) { 3099 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 3100 mov_slow(Rheap_base, Universe::narrow_klass_base()); 3101 add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); 3102 reinit_heapbase(); 3103 } else { 3104 if (Universe::narrow_klass_shift() != 0) { 3105 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3106 _lsl(r, r, Universe::narrow_klass_shift()); 3107 } 3108 } 3109 assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); 3110 } 3111 3112 // Must preserve condition codes, or C2 decodeKlass_not_null rule 3113 // must be changed. 3114 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 3115 if (src == dst) { 3116 decode_klass_not_null(src); 3117 return; 3118 } 3119 3120 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 3121 assert(Universe::heap() != NULL, "java heap should be initialized"); 3122 assert(src != Rheap_base, "Decoding a klass in Rheap_base"); 3123 assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); 3124 // Also do not verify_oop as this is called by verify_oop. 3125 if (Universe::narrow_klass_base() != NULL) { 3126 mov_slow(dst, Universe::narrow_klass_base()); 3127 add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); 3128 } else { 3129 _lsl(dst, src, Universe::narrow_klass_shift()); 3130 } 3131 } 3132 3133 3134 void MacroAssembler::reinit_heapbase() { 3135 if (UseCompressedOops || UseCompressedClassPointers) { 3136 if (Universe::heap() != NULL) { 3137 mov_slow(Rheap_base, Universe::narrow_ptrs_base()); 3138 } else { 3139 ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); 3140 } 3141 } 3142 } 3143 3144 #ifdef ASSERT 3145 void MacroAssembler::verify_heapbase(const char* msg) { 3146 // This code pattern is matched in NativeIntruction::skip_verify_heapbase. 3147 // Update it at modifications. 3148 assert (UseCompressedOops, "should be compressed"); 3149 assert (Universe::heap() != NULL, "java heap should be initialized"); 3150 if (CheckCompressedOops) { 3151 Label ok; 3152 str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 3153 raw_push(Rtemp, ZR); 3154 mrs(Rtemp, Assembler::SysReg_NZCV); 3155 str(Rtemp, Address(SP, 1 * wordSize)); 3156 mov_slow(Rtemp, Universe::narrow_ptrs_base()); 3157 cmp(Rheap_base, Rtemp); 3158 b(ok, eq); 3159 stop(msg); 3160 bind(ok); 3161 ldr(Rtemp, Address(SP, 1 * wordSize)); 3162 msr(Assembler::SysReg_NZCV, Rtemp); 3163 raw_pop(Rtemp, ZR); 3164 str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 3165 } 3166 } 3167 #endif // ASSERT 3168 3169 #endif // AARCH64 3170 3171 #ifdef COMPILER2 3172 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3173 { 3174 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3175 3176 Register Rmark = Rscratch2; 3177 3178 assert(Roop != Rscratch, ""); 3179 assert(Roop != Rmark, ""); 3180 assert(Rbox != Rscratch, ""); 3181 assert(Rbox != Rmark, ""); 3182 3183 Label fast_lock, done; 3184 3185 if (UseBiasedLocking && !UseOptoBiasInlining) { 3186 Label failed; 3187 #ifdef AARCH64 3188 biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); 3189 #else 3190 biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); 3191 #endif 3192 bind(failed); 3193 } 3194 3195 ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); 3196 tst(Rmark, markOopDesc::unlocked_value); 3197 b(fast_lock, ne); 3198 3199 // Check for recursive lock 3200 // See comments in InterpreterMacroAssembler::lock_object for 3201 // explanations on the fast recursive locking check. 3202 #ifdef AARCH64 3203 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); 3204 Assembler::LogicalImmediate imm(mask, false); 3205 mov(Rscratch, SP); 3206 sub(Rscratch, Rmark, Rscratch); 3207 ands(Rscratch, Rscratch, imm); 3208 b(done, ne); // exit with failure 3209 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero 3210 b(done); 3211 3212 #else 3213 // -1- test low 2 bits 3214 movs(Rscratch, AsmOperand(Rmark, lsl, 30)); 3215 // -2- test (hdr - SP) if the low two bits are 0 3216 sub(Rscratch, Rmark, SP, eq); 3217 movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); 3218 // If still 'eq' then recursive locking OK 3219 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero 3220 b(done); 3221 #endif 3222 3223 bind(fast_lock); 3224 str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3225 3226 bool allow_fallthrough_on_failure = true; 3227 bool one_shot = true; 3228 cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3229 3230 bind(done); 3231 3232 } 3233 3234 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3235 { 3236 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3237 3238 Register Rmark = Rscratch2; 3239 3240 assert(Roop != Rscratch, ""); 3241 assert(Roop != Rmark, ""); 3242 assert(Rbox != Rscratch, ""); 3243 assert(Rbox != Rmark, ""); 3244 3245 Label done; 3246 3247 if (UseBiasedLocking && !UseOptoBiasInlining) { 3248 biased_locking_exit(Roop, Rscratch, done); 3249 } 3250 3251 ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3252 // If hdr is NULL, we've got recursive locking and there's nothing more to do 3253 cmp(Rmark, 0); 3254 b(done, eq); 3255 3256 // Restore the object header 3257 bool allow_fallthrough_on_failure = true; 3258 bool one_shot = true; 3259 cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3260 3261 bind(done); 3262 3263 } 3264 #endif // COMPILER2