1 /* 2 * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "asm/macroAssembler.hpp" 29 #include "ci/ciEnv.hpp" 30 #include "code/nativeInst.hpp" 31 #include "compiler/disassembler.hpp" 32 #include "gc/shared/cardTableModRefBS.hpp" 33 #include "gc/shared/collectedHeap.inline.hpp" 34 #include "interpreter/interpreter.hpp" 35 #include "memory/resourceArea.hpp" 36 #include "oops/klass.inline.hpp" 37 #include "prims/methodHandles.hpp" 38 #include "runtime/biasedLocking.hpp" 39 #include "runtime/interfaceSupport.hpp" 40 #include "runtime/objectMonitor.hpp" 41 #include "runtime/os.hpp" 42 #include "runtime/sharedRuntime.hpp" 43 #include "runtime/stubRoutines.hpp" 44 #include "utilities/macros.hpp" 45 #if INCLUDE_ALL_GCS 46 #include "gc/g1/g1CollectedHeap.inline.hpp" 47 #include "gc/g1/g1SATBCardTableModRefBS.hpp" 48 #include "gc/g1/heapRegion.hpp" 49 #endif 50 51 // Implementation of AddressLiteral 52 53 void AddressLiteral::set_rspec(relocInfo::relocType rtype) { 54 switch (rtype) { 55 case relocInfo::oop_type: 56 // Oops are a special case. Normally they would be their own section 57 // but in cases like icBuffer they are literals in the code stream that 58 // we don't have a section for. We use none so that we get a literal address 59 // which is always patchable. 60 break; 61 case relocInfo::external_word_type: 62 _rspec = external_word_Relocation::spec(_target); 63 break; 64 case relocInfo::internal_word_type: 65 _rspec = internal_word_Relocation::spec(_target); 66 break; 67 case relocInfo::opt_virtual_call_type: 68 _rspec = opt_virtual_call_Relocation::spec(); 69 break; 70 case relocInfo::static_call_type: 71 _rspec = static_call_Relocation::spec(); 72 break; 73 case relocInfo::runtime_call_type: 74 _rspec = runtime_call_Relocation::spec(); 75 break; 76 case relocInfo::poll_type: 77 case relocInfo::poll_return_type: 78 _rspec = Relocation::spec_simple(rtype); 79 break; 80 case relocInfo::none: 81 break; 82 default: 83 ShouldNotReachHere(); 84 break; 85 } 86 } 87 88 // Initially added to the Assembler interface as a pure virtual: 89 // RegisterConstant delayed_value(..) 90 // for: 91 // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) 92 // this was subsequently modified to its present name and return type 93 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 94 Register tmp, 95 int offset) { 96 ShouldNotReachHere(); 97 return RegisterOrConstant(-1); 98 } 99 100 101 #ifdef AARCH64 102 // Note: ARM32 version is OS dependent 103 void MacroAssembler::breakpoint(AsmCondition cond) { 104 if (cond == al) { 105 brk(); 106 } else { 107 Label L; 108 b(L, inverse(cond)); 109 brk(); 110 bind(L); 111 } 112 } 113 #endif // AARCH64 114 115 116 // virtual method calling 117 void MacroAssembler::lookup_virtual_method(Register recv_klass, 118 Register vtable_index, 119 Register method_result) { 120 const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); 121 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 122 add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); 123 ldr(method_result, Address(recv_klass, base_offset)); 124 } 125 126 127 // Simplified, combined version, good for typical uses. 128 // Falls through on failure. 129 void MacroAssembler::check_klass_subtype(Register sub_klass, 130 Register super_klass, 131 Register temp_reg, 132 Register temp_reg2, 133 Register temp_reg3, 134 Label& L_success) { 135 Label L_failure; 136 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); 137 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); 138 bind(L_failure); 139 }; 140 141 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 142 Register super_klass, 143 Register temp_reg, 144 Register temp_reg2, 145 Label* L_success, 146 Label* L_failure, 147 Label* L_slow_path) { 148 149 assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); 150 const Register super_check_offset = temp_reg2; 151 152 Label L_fallthrough; 153 int label_nulls = 0; 154 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 155 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 156 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 157 assert(label_nulls <= 1, "at most one NULL in the batch"); 158 159 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 160 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 161 Address super_check_offset_addr(super_klass, sco_offset); 162 163 // If the pointers are equal, we are done (e.g., String[] elements). 164 // This self-check enables sharing of secondary supertype arrays among 165 // non-primary types such as array-of-interface. Otherwise, each such 166 // type would need its own customized SSA. 167 // We move this check to the front of the fast path because many 168 // type checks are in fact trivially successful in this manner, 169 // so we get a nicely predicted branch right at the start of the check. 170 cmp(sub_klass, super_klass); 171 b(*L_success, eq); 172 173 // Check the supertype display: 174 ldr_u32(super_check_offset, super_check_offset_addr); 175 176 Address super_check_addr(sub_klass, super_check_offset); 177 ldr(temp_reg, super_check_addr); 178 cmp(super_klass, temp_reg); // load displayed supertype 179 180 // This check has worked decisively for primary supers. 181 // Secondary supers are sought in the super_cache ('super_cache_addr'). 182 // (Secondary supers are interfaces and very deeply nested subtypes.) 183 // This works in the same check above because of a tricky aliasing 184 // between the super_cache and the primary super display elements. 185 // (The 'super_check_addr' can address either, as the case requires.) 186 // Note that the cache is updated below if it does not help us find 187 // what we need immediately. 188 // So if it was a primary super, we can just fail immediately. 189 // Otherwise, it's the slow path for us (no success at this point). 190 191 b(*L_success, eq); 192 cmp_32(super_check_offset, sc_offset); 193 if (L_failure == &L_fallthrough) { 194 b(*L_slow_path, eq); 195 } else { 196 b(*L_failure, ne); 197 if (L_slow_path != &L_fallthrough) { 198 b(*L_slow_path); 199 } 200 } 201 202 bind(L_fallthrough); 203 } 204 205 206 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 207 Register super_klass, 208 Register temp_reg, 209 Register temp2_reg, 210 Register temp3_reg, 211 Label* L_success, 212 Label* L_failure, 213 bool set_cond_codes) { 214 #ifdef AARCH64 215 NOT_IMPLEMENTED(); 216 #else 217 // Note: if used by code that expects a register to be 0 on success, 218 // this register must be temp_reg and set_cond_codes must be true 219 220 Register saved_reg = noreg; 221 222 // get additional tmp registers 223 if (temp3_reg == noreg) { 224 saved_reg = temp3_reg = LR; 225 push(saved_reg); 226 } 227 228 assert(temp2_reg != noreg, "need all the temporary registers"); 229 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); 230 231 Register cmp_temp = temp_reg; 232 Register scan_temp = temp3_reg; 233 Register count_temp = temp2_reg; 234 235 Label L_fallthrough; 236 int label_nulls = 0; 237 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 238 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 239 assert(label_nulls <= 1, "at most one NULL in the batch"); 240 241 // a couple of useful fields in sub_klass: 242 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 243 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 244 Address secondary_supers_addr(sub_klass, ss_offset); 245 Address super_cache_addr( sub_klass, sc_offset); 246 247 #ifndef PRODUCT 248 inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); 249 #endif 250 251 // We will consult the secondary-super array. 252 ldr(scan_temp, Address(sub_klass, ss_offset)); 253 254 assert(! UseCompressedOops, "search_key must be the compressed super_klass"); 255 // else search_key is the 256 Register search_key = super_klass; 257 258 // Load the array length. 259 ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes())); 260 add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes()); 261 262 add(count_temp, count_temp, 1); 263 264 Label L_loop, L_setnz_and_fail, L_fail; 265 266 // Top of search loop 267 bind(L_loop); 268 // Notes: 269 // scan_temp starts at the array elements 270 // count_temp is 1+size 271 subs(count_temp, count_temp, 1); 272 if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { 273 // direct jump to L_failure if failed and no cleanup needed 274 b(*L_failure, eq); // not found and 275 } else { 276 b(L_fail, eq); // not found in the array 277 } 278 279 // Load next super to check 280 // In the array of super classes elements are pointer sized. 281 int element_size = wordSize; 282 ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); 283 284 // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list 285 subs(cmp_temp, cmp_temp, search_key); 286 287 // A miss means we are NOT a subtype and need to keep looping 288 b(L_loop, ne); 289 290 // Falling out the bottom means we found a hit; we ARE a subtype 291 292 // Note: temp_reg/cmp_temp is already 0 and flag Z is set 293 294 // Success. Cache the super we found and proceed in triumph. 295 str(super_klass, Address(sub_klass, sc_offset)); 296 297 if (saved_reg != noreg) { 298 // Return success 299 pop(saved_reg); 300 } 301 302 b(*L_success); 303 304 bind(L_fail); 305 // Note1: check "b(*L_failure, eq)" above if adding extra instructions here 306 if (set_cond_codes) { 307 movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed 308 } 309 if (saved_reg != noreg) { 310 pop(saved_reg); 311 } 312 if (L_failure != &L_fallthrough) { 313 b(*L_failure); 314 } 315 316 bind(L_fallthrough); 317 #endif 318 } 319 320 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. 321 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { 322 assert_different_registers(params_base, params_count); 323 add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); 324 return Address(tmp, -Interpreter::stackElementSize); 325 } 326 327 328 void MacroAssembler::align(int modulus) { 329 while (offset() % modulus != 0) { 330 nop(); 331 } 332 } 333 334 int MacroAssembler::set_last_Java_frame(Register last_java_sp, 335 Register last_java_fp, 336 bool save_last_java_pc, 337 Register tmp) { 338 int pc_offset; 339 if (last_java_fp != noreg) { 340 // optional 341 str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); 342 _fp_saved = true; 343 } else { 344 _fp_saved = false; 345 } 346 if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM 347 #ifdef AARCH64 348 pc_offset = mov_pc_to(tmp); 349 str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); 350 #else 351 str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); 352 pc_offset = offset() + VM_Version::stored_pc_adjustment(); 353 #endif 354 _pc_saved = true; 355 } else { 356 _pc_saved = false; 357 pc_offset = -1; 358 } 359 // According to comment in javaFrameAnchorm SP must be saved last, so that other 360 // entries are valid when SP is set. 361 362 // However, this is probably not a strong constrainst since for instance PC is 363 // sometimes read from the stack at SP... but is pushed later (by the call). Hence, 364 // we now write the fields in the expected order but we have not added a StoreStore 365 // barrier. 366 367 // XXX: if the ordering is really important, PC should always be saved (without forgetting 368 // to update oop_map offsets) and a StoreStore barrier might be needed. 369 370 if (last_java_sp == noreg) { 371 last_java_sp = SP; // always saved 372 } 373 #ifdef AARCH64 374 if (last_java_sp == SP) { 375 mov(tmp, SP); 376 str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); 377 } else { 378 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 379 } 380 #else 381 str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); 382 #endif 383 384 return pc_offset; // for oopmaps 385 } 386 387 void MacroAssembler::reset_last_Java_frame(Register tmp) { 388 const Register Rzero = zero_register(tmp); 389 str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); 390 if (_fp_saved) { 391 str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); 392 } 393 if (_pc_saved) { 394 str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); 395 } 396 } 397 398 399 // Implementation of call_VM versions 400 401 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { 402 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 403 assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); 404 405 #ifndef AARCH64 406 // Safer to save R9 here since callers may have been written 407 // assuming R9 survives. This is suboptimal but is not worth 408 // optimizing for the few platforms where R9 is scratched. 409 push(RegisterSet(R4) | R9ifScratched); 410 mov(R4, SP); 411 bic(SP, SP, StackAlignmentInBytes - 1); 412 #endif // AARCH64 413 call(entry_point, relocInfo::runtime_call_type); 414 #ifndef AARCH64 415 mov(SP, R4); 416 pop(RegisterSet(R4) | R9ifScratched); 417 #endif // AARCH64 418 } 419 420 421 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 422 assert(number_of_arguments >= 0, "cannot have negative number of arguments"); 423 assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); 424 425 const Register tmp = Rtemp; 426 assert_different_registers(oop_result, tmp); 427 428 set_last_Java_frame(SP, FP, true, tmp); 429 430 #ifdef ASSERT 431 AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); 432 #endif // ASSERT 433 434 #ifndef AARCH64 435 #if R9_IS_SCRATCHED 436 // Safer to save R9 here since callers may have been written 437 // assuming R9 survives. This is suboptimal but is not worth 438 // optimizing for the few platforms where R9 is scratched. 439 440 // Note: cannot save R9 above the saved SP (some calls expect for 441 // instance the Java stack top at the saved SP) 442 // => once saved (with set_last_Java_frame), decrease SP before rounding to 443 // ensure the slot at SP will be free for R9). 444 sub(SP, SP, 4); 445 bic(SP, SP, StackAlignmentInBytes - 1); 446 str(R9, Address(SP, 0)); 447 #else 448 bic(SP, SP, StackAlignmentInBytes - 1); 449 #endif // R9_IS_SCRATCHED 450 #endif 451 452 mov(R0, Rthread); 453 call(entry_point, relocInfo::runtime_call_type); 454 455 #ifndef AARCH64 456 #if R9_IS_SCRATCHED 457 ldr(R9, Address(SP, 0)); 458 #endif 459 ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); 460 #endif 461 462 reset_last_Java_frame(tmp); 463 464 // C++ interp handles this in the interpreter 465 check_and_handle_popframe(); 466 check_and_handle_earlyret(); 467 468 if (check_exceptions) { 469 // check for pending exceptions 470 ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); 471 #ifdef AARCH64 472 Label L; 473 cbz(tmp, L); 474 mov_pc_to(Rexception_pc); 475 b(StubRoutines::forward_exception_entry()); 476 bind(L); 477 #else 478 cmp(tmp, 0); 479 mov(Rexception_pc, PC, ne); 480 b(StubRoutines::forward_exception_entry(), ne); 481 #endif // AARCH64 482 } 483 484 // get oop result if there is one and reset the value in the thread 485 if (oop_result->is_valid()) { 486 get_vm_result(oop_result, tmp); 487 } 488 } 489 490 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 491 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 492 } 493 494 495 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { 496 assert (arg_1 == R1, "fixed register for arg_1"); 497 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 498 } 499 500 501 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 502 assert (arg_1 == R1, "fixed register for arg_1"); 503 assert (arg_2 == R2, "fixed register for arg_2"); 504 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 505 } 506 507 508 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 509 assert (arg_1 == R1, "fixed register for arg_1"); 510 assert (arg_2 == R2, "fixed register for arg_2"); 511 assert (arg_3 == R3, "fixed register for arg_3"); 512 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 513 } 514 515 516 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { 517 // Not used on ARM 518 Unimplemented(); 519 } 520 521 522 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { 523 // Not used on ARM 524 Unimplemented(); 525 } 526 527 528 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { 529 // Not used on ARM 530 Unimplemented(); 531 } 532 533 534 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { 535 // Not used on ARM 536 Unimplemented(); 537 } 538 539 // Raw call, without saving/restoring registers, exception handling, etc. 540 // Mainly used from various stubs. 541 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { 542 const Register tmp = Rtemp; // Rtemp free since scratched by call 543 set_last_Java_frame(SP, FP, true, tmp); 544 #if R9_IS_SCRATCHED 545 if (save_R9_if_scratched) { 546 // Note: Saving also R10 for alignment. 547 push(RegisterSet(R9, R10)); 548 } 549 #endif 550 mov(R0, Rthread); 551 call(entry_point, relocInfo::runtime_call_type); 552 #if R9_IS_SCRATCHED 553 if (save_R9_if_scratched) { 554 pop(RegisterSet(R9, R10)); 555 } 556 #endif 557 reset_last_Java_frame(tmp); 558 } 559 560 void MacroAssembler::call_VM_leaf(address entry_point) { 561 call_VM_leaf_helper(entry_point, 0); 562 } 563 564 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 565 assert (arg_1 == R0, "fixed register for arg_1"); 566 call_VM_leaf_helper(entry_point, 1); 567 } 568 569 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 570 assert (arg_1 == R0, "fixed register for arg_1"); 571 assert (arg_2 == R1, "fixed register for arg_2"); 572 call_VM_leaf_helper(entry_point, 2); 573 } 574 575 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 576 assert (arg_1 == R0, "fixed register for arg_1"); 577 assert (arg_2 == R1, "fixed register for arg_2"); 578 assert (arg_3 == R2, "fixed register for arg_3"); 579 call_VM_leaf_helper(entry_point, 3); 580 } 581 582 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { 583 assert (arg_1 == R0, "fixed register for arg_1"); 584 assert (arg_2 == R1, "fixed register for arg_2"); 585 assert (arg_3 == R2, "fixed register for arg_3"); 586 assert (arg_4 == R3, "fixed register for arg_4"); 587 call_VM_leaf_helper(entry_point, 4); 588 } 589 590 void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { 591 assert_different_registers(oop_result, tmp); 592 ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); 593 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); 594 verify_oop(oop_result); 595 } 596 597 void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { 598 assert_different_registers(metadata_result, tmp); 599 ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); 600 str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); 601 } 602 603 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { 604 if (arg2.is_register()) { 605 add(dst, arg1, arg2.as_register()); 606 } else { 607 add(dst, arg1, arg2.as_constant()); 608 } 609 } 610 611 void MacroAssembler::add_slow(Register rd, Register rn, int c) { 612 #ifdef AARCH64 613 if (c == 0) { 614 if (rd != rn) { 615 mov(rd, rn); 616 } 617 return; 618 } 619 if (c < 0) { 620 sub_slow(rd, rn, -c); 621 return; 622 } 623 if (c > right_n_bits(24)) { 624 guarantee(rd != rn, "no large add_slow with only one register"); 625 mov_slow(rd, c); 626 add(rd, rn, rd); 627 } else { 628 int lo = c & right_n_bits(12); 629 int hi = (c >> 12) & right_n_bits(12); 630 if (lo != 0) { 631 add(rd, rn, lo, lsl0); 632 } 633 if (hi != 0) { 634 add(rd, (lo == 0) ? rn : rd, hi, lsl12); 635 } 636 } 637 #else 638 // This function is used in compiler for handling large frame offsets 639 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 640 return sub(rd, rn, (-c)); 641 } 642 int low = c & 0x3fc; 643 if (low != 0) { 644 add(rd, rn, low); 645 rn = rd; 646 } 647 if (c & ~0x3fc) { 648 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); 649 add(rd, rn, c & ~0x3fc); 650 } else if (rd != rn) { 651 assert(c == 0, ""); 652 mov(rd, rn); // need to generate at least one move! 653 } 654 #endif // AARCH64 655 } 656 657 void MacroAssembler::sub_slow(Register rd, Register rn, int c) { 658 #ifdef AARCH64 659 if (c <= 0) { 660 add_slow(rd, rn, -c); 661 return; 662 } 663 if (c > right_n_bits(24)) { 664 guarantee(rd != rn, "no large sub_slow with only one register"); 665 mov_slow(rd, c); 666 sub(rd, rn, rd); 667 } else { 668 int lo = c & right_n_bits(12); 669 int hi = (c >> 12) & right_n_bits(12); 670 if (lo != 0) { 671 sub(rd, rn, lo, lsl0); 672 } 673 if (hi != 0) { 674 sub(rd, (lo == 0) ? rn : rd, hi, lsl12); 675 } 676 } 677 #else 678 // This function is used in compiler for handling large frame offsets 679 if ((c < 0) && (((-c) & ~0x3fc) == 0)) { 680 return add(rd, rn, (-c)); 681 } 682 int low = c & 0x3fc; 683 if (low != 0) { 684 sub(rd, rn, low); 685 rn = rd; 686 } 687 if (c & ~0x3fc) { 688 assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); 689 sub(rd, rn, c & ~0x3fc); 690 } else if (rd != rn) { 691 assert(c == 0, ""); 692 mov(rd, rn); // need to generate at least one move! 693 } 694 #endif // AARCH64 695 } 696 697 void MacroAssembler::mov_slow(Register rd, address addr) { 698 // do *not* call the non relocated mov_related_address 699 mov_slow(rd, (intptr_t)addr); 700 } 701 702 void MacroAssembler::mov_slow(Register rd, const char *str) { 703 mov_slow(rd, (intptr_t)str); 704 } 705 706 #ifdef AARCH64 707 708 // Common code for mov_slow and instr_count_for_mov_slow. 709 // Returns number of instructions of mov_slow pattern, 710 // generating it if non-null MacroAssembler is given. 711 int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { 712 // This code pattern is matched in NativeIntruction::is_mov_slow. 713 // Update it at modifications. 714 715 const intx mask = right_n_bits(16); 716 // 1 movz instruction 717 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 718 if ((c & ~(mask << base_shift)) == 0) { 719 if (masm != NULL) { 720 masm->movz(rd, ((uintx)c) >> base_shift, base_shift); 721 } 722 return 1; 723 } 724 } 725 // 1 movn instruction 726 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 727 if (((~c) & ~(mask << base_shift)) == 0) { 728 if (masm != NULL) { 729 masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); 730 } 731 return 1; 732 } 733 } 734 // 1 orr instruction 735 { 736 LogicalImmediate imm(c, false); 737 if (imm.is_encoded()) { 738 if (masm != NULL) { 739 masm->orr(rd, ZR, imm); 740 } 741 return 1; 742 } 743 } 744 // 1 movz/movn + up to 3 movk instructions 745 int zeroes = 0; 746 int ones = 0; 747 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 748 int part = (c >> base_shift) & mask; 749 if (part == 0) { 750 ++zeroes; 751 } else if (part == mask) { 752 ++ones; 753 } 754 } 755 int def_bits = 0; 756 if (ones > zeroes) { 757 def_bits = mask; 758 } 759 int inst_count = 0; 760 for (int base_shift = 0; base_shift < 64; base_shift += 16) { 761 int part = (c >> base_shift) & mask; 762 if (part != def_bits) { 763 if (masm != NULL) { 764 if (inst_count > 0) { 765 masm->movk(rd, part, base_shift); 766 } else { 767 if (def_bits == 0) { 768 masm->movz(rd, part, base_shift); 769 } else { 770 masm->movn(rd, ~part & mask, base_shift); 771 } 772 } 773 } 774 inst_count++; 775 } 776 } 777 assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); 778 return inst_count; 779 } 780 781 void MacroAssembler::mov_slow(Register rd, intptr_t c) { 782 #ifdef ASSERT 783 int off = offset(); 784 #endif 785 (void) mov_slow_helper(rd, c, this); 786 assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); 787 } 788 789 // Counts instructions generated by mov_slow(rd, c). 790 int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { 791 return mov_slow_helper(noreg, c, NULL); 792 } 793 794 int MacroAssembler::instr_count_for_mov_slow(address c) { 795 return mov_slow_helper(noreg, (intptr_t)c, NULL); 796 } 797 798 #else 799 800 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { 801 if (AsmOperand::is_rotated_imm(c)) { 802 mov(rd, c, cond); 803 } else if (AsmOperand::is_rotated_imm(~c)) { 804 mvn(rd, ~c, cond); 805 } else if (VM_Version::supports_movw()) { 806 movw(rd, c & 0xffff, cond); 807 if ((unsigned int)c >> 16) { 808 movt(rd, (unsigned int)c >> 16, cond); 809 } 810 } else { 811 // Find first non-zero bit 812 int shift = 0; 813 while ((c & (3 << shift)) == 0) { 814 shift += 2; 815 } 816 // Put the least significant part of the constant 817 int mask = 0xff << shift; 818 mov(rd, c & mask, cond); 819 // Add up to 3 other parts of the constant; 820 // each of them can be represented as rotated_imm 821 if (c & (mask << 8)) { 822 orr(rd, rd, c & (mask << 8), cond); 823 } 824 if (c & (mask << 16)) { 825 orr(rd, rd, c & (mask << 16), cond); 826 } 827 if (c & (mask << 24)) { 828 orr(rd, rd, c & (mask << 24), cond); 829 } 830 } 831 } 832 833 #endif // AARCH64 834 835 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, 836 #ifdef AARCH64 837 bool patchable 838 #else 839 AsmCondition cond 840 #endif 841 ) { 842 843 if (o == NULL) { 844 #ifdef AARCH64 845 if (patchable) { 846 nop(); 847 } 848 mov(rd, ZR); 849 #else 850 mov(rd, 0, cond); 851 #endif 852 return; 853 } 854 855 if (oop_index == 0) { 856 oop_index = oop_recorder()->allocate_oop_index(o); 857 } 858 relocate(oop_Relocation::spec(oop_index)); 859 860 #ifdef AARCH64 861 if (patchable) { 862 nop(); 863 } 864 ldr(rd, pc()); 865 #else 866 if (VM_Version::supports_movw()) { 867 movw(rd, 0, cond); 868 movt(rd, 0, cond); 869 } else { 870 ldr(rd, Address(PC), cond); 871 // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). 872 nop(); 873 } 874 #endif 875 } 876 877 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { 878 if (o == NULL) { 879 #ifdef AARCH64 880 if (patchable) { 881 nop(); 882 } 883 #endif 884 mov(rd, 0); 885 return; 886 } 887 888 if (metadata_index == 0) { 889 metadata_index = oop_recorder()->allocate_metadata_index(o); 890 } 891 relocate(metadata_Relocation::spec(metadata_index)); 892 893 #ifdef AARCH64 894 if (patchable) { 895 nop(); 896 } 897 #ifdef COMPILER2 898 if (!patchable && VM_Version::prefer_moves_over_load_literal()) { 899 mov_slow(rd, (address)o); 900 return; 901 } 902 #endif 903 ldr(rd, pc()); 904 #else 905 if (VM_Version::supports_movw()) { 906 movw(rd, ((int)o) & 0xffff); 907 movt(rd, (unsigned int)o >> 16); 908 } else { 909 ldr(rd, Address(PC)); 910 // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). 911 nop(); 912 } 913 #endif // AARCH64 914 } 915 916 void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { 917 Label skip_constant; 918 union { 919 jfloat f; 920 jint i; 921 } accessor; 922 accessor.f = c; 923 924 #ifdef AARCH64 925 // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow 926 Label L; 927 ldr_s(fd, target(L)); 928 b(skip_constant); 929 bind(L); 930 emit_int32(accessor.i); 931 bind(skip_constant); 932 #else 933 flds(fd, Address(PC), cond); 934 b(skip_constant); 935 emit_int32(accessor.i); 936 bind(skip_constant); 937 #endif // AARCH64 938 } 939 940 void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { 941 Label skip_constant; 942 union { 943 jdouble d; 944 jint i[2]; 945 } accessor; 946 accessor.d = c; 947 948 #ifdef AARCH64 949 // TODO-AARCH64 - try to optimize loading of double constants with fmov 950 Label L; 951 ldr_d(fd, target(L)); 952 b(skip_constant); 953 align(wordSize); 954 bind(L); 955 emit_int32(accessor.i[0]); 956 emit_int32(accessor.i[1]); 957 bind(skip_constant); 958 #else 959 fldd(fd, Address(PC), cond); 960 b(skip_constant); 961 emit_int32(accessor.i[0]); 962 emit_int32(accessor.i[1]); 963 bind(skip_constant); 964 #endif // AARCH64 965 } 966 967 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { 968 intptr_t addr = (intptr_t) address_of_global; 969 #ifdef AARCH64 970 assert((addr & 0x3) == 0, "address should be aligned"); 971 972 // FIXME: TODO 973 if (false && page_reachable_from_cache(address_of_global)) { 974 assert(false,"TODO: relocate"); 975 //relocate(); 976 adrp(reg, address_of_global); 977 ldrsw(reg, Address(reg, addr & 0xfff)); 978 } else { 979 mov_slow(reg, addr & ~0x3fff); 980 ldrsw(reg, Address(reg, addr & 0x3fff)); 981 } 982 #else 983 mov_slow(reg, addr & ~0xfff); 984 ldr(reg, Address(reg, addr & 0xfff)); 985 #endif 986 } 987 988 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { 989 #ifdef AARCH64 990 intptr_t addr = (intptr_t) address_of_global; 991 assert ((addr & 0x7) == 0, "address should be aligned"); 992 mov_slow(reg, addr & ~0x7fff); 993 ldr(reg, Address(reg, addr & 0x7fff)); 994 #else 995 ldr_global_s32(reg, address_of_global); 996 #endif 997 } 998 999 void MacroAssembler::ldrb_global(Register reg, address address_of_global) { 1000 intptr_t addr = (intptr_t) address_of_global; 1001 mov_slow(reg, addr & ~0xfff); 1002 ldrb(reg, Address(reg, addr & 0xfff)); 1003 } 1004 1005 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { 1006 #ifdef AARCH64 1007 switch (bits) { 1008 case 8: uxtb(rd, rn); break; 1009 case 16: uxth(rd, rn); break; 1010 case 32: mov_w(rd, rn); break; 1011 default: ShouldNotReachHere(); 1012 } 1013 #else 1014 if (bits <= 8) { 1015 andr(rd, rn, (1 << bits) - 1); 1016 } else if (bits >= 24) { 1017 bic(rd, rn, -1 << bits); 1018 } else { 1019 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1020 mov(rd, AsmOperand(rd, lsr, 32 - bits)); 1021 } 1022 #endif 1023 } 1024 1025 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { 1026 #ifdef AARCH64 1027 switch (bits) { 1028 case 8: sxtb(rd, rn); break; 1029 case 16: sxth(rd, rn); break; 1030 case 32: sxtw(rd, rn); break; 1031 default: ShouldNotReachHere(); 1032 } 1033 #else 1034 mov(rd, AsmOperand(rn, lsl, 32 - bits)); 1035 mov(rd, AsmOperand(rd, asr, 32 - bits)); 1036 #endif 1037 } 1038 1039 #ifndef AARCH64 1040 1041 void MacroAssembler::long_move(Register rd_lo, Register rd_hi, 1042 Register rn_lo, Register rn_hi, 1043 AsmCondition cond) { 1044 if (rd_lo != rn_hi) { 1045 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1046 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1047 } else if (rd_hi != rn_lo) { 1048 if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } 1049 if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } 1050 } else { 1051 eor(rd_lo, rd_hi, rd_lo, cond); 1052 eor(rd_hi, rd_lo, rd_hi, cond); 1053 eor(rd_lo, rd_hi, rd_lo, cond); 1054 } 1055 } 1056 1057 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1058 Register rn_lo, Register rn_hi, 1059 AsmShift shift, Register count) { 1060 Register tmp; 1061 if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { 1062 tmp = rd_lo; 1063 } else { 1064 tmp = rd_hi; 1065 } 1066 assert_different_registers(tmp, count, rn_lo, rn_hi); 1067 1068 subs(tmp, count, 32); 1069 if (shift == lsl) { 1070 assert_different_registers(rd_hi, rn_lo); 1071 assert_different_registers(count, rd_hi); 1072 mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); 1073 rsb(tmp, count, 32, mi); 1074 if (rd_hi == rn_hi) { 1075 mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1076 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1077 } else { 1078 mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); 1079 orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); 1080 } 1081 mov(rd_lo, AsmOperand(rn_lo, shift, count)); 1082 } else { 1083 assert_different_registers(rd_lo, rn_hi); 1084 assert_different_registers(rd_lo, count); 1085 mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); 1086 rsb(tmp, count, 32, mi); 1087 if (rd_lo == rn_lo) { 1088 mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1089 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1090 } else { 1091 mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); 1092 orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); 1093 } 1094 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1095 } 1096 } 1097 1098 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, 1099 Register rn_lo, Register rn_hi, 1100 AsmShift shift, int count) { 1101 assert(count != 0 && (count & ~63) == 0, "must be"); 1102 1103 if (shift == lsl) { 1104 assert_different_registers(rd_hi, rn_lo); 1105 if (count >= 32) { 1106 mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); 1107 mov(rd_lo, 0); 1108 } else { 1109 mov(rd_hi, AsmOperand(rn_hi, lsl, count)); 1110 orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); 1111 mov(rd_lo, AsmOperand(rn_lo, lsl, count)); 1112 } 1113 } else { 1114 assert_different_registers(rd_lo, rn_hi); 1115 if (count >= 32) { 1116 if (count == 32) { 1117 mov(rd_lo, rn_hi); 1118 } else { 1119 mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); 1120 } 1121 if (shift == asr) { 1122 mov(rd_hi, AsmOperand(rn_hi, asr, 0)); 1123 } else { 1124 mov(rd_hi, 0); 1125 } 1126 } else { 1127 mov(rd_lo, AsmOperand(rn_lo, lsr, count)); 1128 orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); 1129 mov(rd_hi, AsmOperand(rn_hi, shift, count)); 1130 } 1131 } 1132 } 1133 #endif // !AARCH64 1134 1135 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { 1136 // This code pattern is matched in NativeIntruction::skip_verify_oop. 1137 // Update it at modifications. 1138 if (!VerifyOops) return; 1139 1140 char buffer[64]; 1141 #ifdef COMPILER1 1142 if (CommentedAssembly) { 1143 snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); 1144 block_comment(buffer); 1145 } 1146 #endif 1147 const char* msg_buffer = NULL; 1148 { 1149 ResourceMark rm; 1150 stringStream ss; 1151 ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); 1152 msg_buffer = code_string(ss.as_string()); 1153 } 1154 1155 save_all_registers(); 1156 1157 if (reg != R2) { 1158 mov(R2, reg); // oop to verify 1159 } 1160 mov(R1, SP); // register save area 1161 1162 Label done; 1163 InlinedString Lmsg(msg_buffer); 1164 ldr_literal(R0, Lmsg); // message 1165 1166 // call indirectly to solve generation ordering problem 1167 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1168 call(Rtemp); 1169 1170 restore_all_registers(); 1171 1172 b(done); 1173 #ifdef COMPILER2 1174 int off = offset(); 1175 #endif 1176 bind_literal(Lmsg); 1177 #ifdef COMPILER2 1178 if (offset() - off == 1 * wordSize) { 1179 // no padding, so insert nop for worst-case sizing 1180 nop(); 1181 } 1182 #endif 1183 bind(done); 1184 } 1185 1186 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { 1187 if (!VerifyOops) return; 1188 1189 const char* msg_buffer = NULL; 1190 { 1191 ResourceMark rm; 1192 stringStream ss; 1193 if ((addr.base() == SP) && (addr.index()==noreg)) { 1194 ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); 1195 } else { 1196 ss.print("verify_oop_addr: %s", s); 1197 } 1198 ss.print(" (%s:%d)", file, line); 1199 msg_buffer = code_string(ss.as_string()); 1200 } 1201 1202 int push_size = save_all_registers(); 1203 1204 if (addr.base() == SP) { 1205 // computes an addr that takes into account the push 1206 if (addr.index() != noreg) { 1207 Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index 1208 add(new_base, SP, push_size); 1209 addr = addr.rebase(new_base); 1210 } else { 1211 addr = addr.plus_disp(push_size); 1212 } 1213 } 1214 1215 ldr(R2, addr); // oop to verify 1216 mov(R1, SP); // register save area 1217 1218 Label done; 1219 InlinedString Lmsg(msg_buffer); 1220 ldr_literal(R0, Lmsg); // message 1221 1222 // call indirectly to solve generation ordering problem 1223 ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); 1224 call(Rtemp); 1225 1226 restore_all_registers(); 1227 1228 b(done); 1229 bind_literal(Lmsg); 1230 bind(done); 1231 } 1232 1233 void MacroAssembler::null_check(Register reg, Register tmp, int offset) { 1234 if (needs_explicit_null_check(offset)) { 1235 #ifdef AARCH64 1236 ldr(ZR, Address(reg)); 1237 #else 1238 assert_different_registers(reg, tmp); 1239 if (tmp == noreg) { 1240 tmp = Rtemp; 1241 assert((! Thread::current()->is_Compiler_thread()) || 1242 (! (ciEnv::current()->task() == NULL)) || 1243 (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), 1244 "Rtemp not available in C2"); // explicit tmp register required 1245 // XXX: could we mark the code buffer as not compatible with C2 ? 1246 } 1247 ldr(tmp, Address(reg)); 1248 #endif 1249 } 1250 } 1251 1252 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1253 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, 1254 RegisterOrConstant size_expression, Label& slow_case) { 1255 if (!Universe::heap()->supports_inline_contig_alloc()) { 1256 b(slow_case); 1257 return; 1258 } 1259 1260 CollectedHeap* ch = Universe::heap(); 1261 1262 const Register top_addr = tmp1; 1263 const Register heap_end = tmp2; 1264 1265 if (size_expression.is_register()) { 1266 assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); 1267 } else { 1268 assert_different_registers(obj, obj_end, top_addr, heap_end); 1269 } 1270 1271 bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance 1272 if (load_const) { 1273 mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); 1274 } else { 1275 ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); 1276 } 1277 // Calculate new heap_top by adding the size of the object 1278 Label retry; 1279 bind(retry); 1280 1281 #ifdef AARCH64 1282 ldxr(obj, top_addr); 1283 #else 1284 ldr(obj, Address(top_addr)); 1285 #endif // AARCH64 1286 1287 ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); 1288 add_rc(obj_end, obj, size_expression); 1289 // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. 1290 cmp(obj_end, obj); 1291 b(slow_case, lo); 1292 // Update heap_top if allocation succeeded 1293 cmp(obj_end, heap_end); 1294 b(slow_case, hi); 1295 1296 #ifdef AARCH64 1297 stxr(heap_end/*scratched*/, obj_end, top_addr); 1298 cbnz_w(heap_end, retry); 1299 #else 1300 atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); 1301 b(retry, ne); 1302 #endif // AARCH64 1303 } 1304 1305 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. 1306 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, 1307 RegisterOrConstant size_expression, Label& slow_case) { 1308 const Register tlab_end = tmp1; 1309 assert_different_registers(obj, obj_end, tlab_end); 1310 1311 ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); 1312 ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); 1313 add_rc(obj_end, obj, size_expression); 1314 cmp(obj_end, tlab_end); 1315 b(slow_case, hi); 1316 str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); 1317 } 1318 1319 void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2, 1320 Register tmp3, Register tmp4, 1321 Label& try_eden, Label& slow_case) { 1322 if (!Universe::heap()->supports_inline_contig_alloc()) { 1323 b(slow_case); 1324 return; 1325 } 1326 1327 InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr()); 1328 Label discard_tlab, do_refill; 1329 ldr(top, Address(Rthread, JavaThread::tlab_top_offset())); 1330 ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); 1331 ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); 1332 1333 // Calculate amount of free space 1334 sub(tmp1, tmp1, top); 1335 // Retain tlab and allocate in shared space 1336 // if the amount of free space in tlab is too large to discard 1337 cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize)); 1338 b(discard_tlab, ge); 1339 1340 // Increment waste limit to prevent getting stuck on this slow path 1341 mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment()); 1342 add(tmp2, tmp2, tmp3); 1343 str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); 1344 if (TLABStats) { 1345 ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); 1346 add_32(tmp2, tmp2, 1); 1347 str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); 1348 } 1349 b(try_eden); 1350 bind_literal(intArrayKlass_addr); 1351 1352 bind(discard_tlab); 1353 if (TLABStats) { 1354 ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); 1355 ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); 1356 add_32(tmp2, tmp2, 1); 1357 add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize)); 1358 str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); 1359 str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); 1360 } 1361 // If tlab is currently allocated (top or end != null) 1362 // then fill [top, end + alignment_reserve) with array object 1363 cbz(top, do_refill); 1364 1365 // Set up the mark word 1366 mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 1367 str(tmp2, Address(top, oopDesc::mark_offset_in_bytes())); 1368 // Set klass to intArrayKlass and the length to the remaining space 1369 ldr_literal(tmp2, intArrayKlass_addr); 1370 add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() - 1371 typeArrayOopDesc::header_size(T_INT) * HeapWordSize); 1372 Register klass = tmp2; 1373 ldr(klass, Address(tmp2)); 1374 logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint) 1375 str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes())); 1376 store_klass(klass, top); // blows klass: 1377 klass = noreg; 1378 1379 ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset())); 1380 sub(tmp1, top, tmp1); // size of tlab's allocated portion 1381 incr_allocated_bytes(tmp1, tmp2); 1382 1383 bind(do_refill); 1384 // Refill the tlab with an eden allocation 1385 ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset())); 1386 logical_shift_left(tmp4, tmp1, LogHeapWordSize); 1387 eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case); 1388 str(top, Address(Rthread, JavaThread::tlab_start_offset())); 1389 str(top, Address(Rthread, JavaThread::tlab_top_offset())); 1390 1391 #ifdef ASSERT 1392 // Verify that tmp1 contains tlab_end 1393 ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset())); 1394 add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize)); 1395 cmp(tmp1, tmp2); 1396 breakpoint(ne); 1397 #endif 1398 1399 sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 1400 str(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); 1401 1402 if (ZeroTLAB) { 1403 // clobbers start and tmp 1404 // top must be preserved! 1405 add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 1406 ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset())); 1407 zero_memory(tmp2, tmp1, tmp3); 1408 } 1409 } 1410 1411 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. 1412 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { 1413 Label loop; 1414 const Register ptr = start; 1415 1416 #ifdef AARCH64 1417 // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x 1418 const Register size = tmp; 1419 Label remaining, done; 1420 1421 sub(size, end, start); 1422 1423 #ifdef ASSERT 1424 { Label L; 1425 tst(size, wordSize - 1); 1426 b(L, eq); 1427 stop("size is not a multiple of wordSize"); 1428 bind(L); 1429 } 1430 #endif // ASSERT 1431 1432 subs(size, size, wordSize); 1433 b(remaining, le); 1434 1435 // Zero by 2 words per iteration. 1436 bind(loop); 1437 subs(size, size, 2*wordSize); 1438 stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); 1439 b(loop, gt); 1440 1441 bind(remaining); 1442 b(done, ne); 1443 str(ZR, Address(ptr)); 1444 bind(done); 1445 #else 1446 mov(tmp, 0); 1447 bind(loop); 1448 cmp(ptr, end); 1449 str(tmp, Address(ptr, wordSize, post_indexed), lo); 1450 b(loop, lo); 1451 #endif // AARCH64 1452 } 1453 1454 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { 1455 #ifdef AARCH64 1456 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1457 add_rc(tmp, tmp, size_in_bytes); 1458 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1459 #else 1460 // Bump total bytes allocated by this thread 1461 Label done; 1462 1463 ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1464 adds(tmp, tmp, size_in_bytes); 1465 str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); 1466 b(done, cc); 1467 1468 // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) 1469 // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by 1470 // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. 1471 Register low, high; 1472 // Select ether R0/R1 or R2/R3 1473 1474 if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { 1475 low = R2; 1476 high = R3; 1477 } else { 1478 low = R0; 1479 high = R1; 1480 } 1481 push(RegisterSet(low, high)); 1482 1483 ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1484 adds(low, low, size_in_bytes); 1485 adc(high, high, 0); 1486 strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); 1487 1488 pop(RegisterSet(low, high)); 1489 1490 bind(done); 1491 #endif // AARCH64 1492 } 1493 1494 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { 1495 // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM 1496 if (UseStackBanging) { 1497 const int page_size = os::vm_page_size(); 1498 1499 sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); 1500 strb(R0, Address(tmp)); 1501 #ifdef AARCH64 1502 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { 1503 sub(tmp, tmp, page_size); 1504 strb(R0, Address(tmp)); 1505 } 1506 #else 1507 for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { 1508 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1509 } 1510 #endif // AARCH64 1511 } 1512 } 1513 1514 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { 1515 if (UseStackBanging) { 1516 Label loop; 1517 1518 mov(tmp, SP); 1519 add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); 1520 #ifdef AARCH64 1521 sub(tmp, tmp, Rsize); 1522 bind(loop); 1523 subs(Rsize, Rsize, os::vm_page_size()); 1524 strb(ZR, Address(tmp, Rsize)); 1525 #else 1526 bind(loop); 1527 subs(Rsize, Rsize, 0xff0); 1528 strb(R0, Address(tmp, -0xff0, pre_indexed)); 1529 #endif // AARCH64 1530 b(loop, hi); 1531 } 1532 } 1533 1534 void MacroAssembler::stop(const char* msg) { 1535 // This code pattern is matched in NativeIntruction::is_stop. 1536 // Update it at modifications. 1537 #ifdef COMPILER1 1538 if (CommentedAssembly) { 1539 block_comment("stop"); 1540 } 1541 #endif 1542 1543 InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); 1544 InlinedString Lmsg(msg); 1545 1546 // save all registers for further inspection 1547 save_all_registers(); 1548 1549 ldr_literal(R0, Lmsg); // message 1550 mov(R1, SP); // register save area 1551 1552 #ifdef AARCH64 1553 ldr_literal(Rtemp, Ldebug); 1554 br(Rtemp); 1555 #else 1556 ldr_literal(PC, Ldebug); // call MacroAssembler::debug 1557 #endif // AARCH64 1558 1559 #if defined(COMPILER2) && defined(AARCH64) 1560 int off = offset(); 1561 #endif 1562 bind_literal(Lmsg); 1563 bind_literal(Ldebug); 1564 #if defined(COMPILER2) && defined(AARCH64) 1565 if (offset() - off == 2 * wordSize) { 1566 // no padding, so insert nop for worst-case sizing 1567 nop(); 1568 } 1569 #endif 1570 } 1571 1572 void MacroAssembler::warn(const char* msg) { 1573 #ifdef COMPILER1 1574 if (CommentedAssembly) { 1575 block_comment("warn"); 1576 } 1577 #endif 1578 1579 InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); 1580 InlinedString Lmsg(msg); 1581 Label done; 1582 1583 int push_size = save_caller_save_registers(); 1584 1585 #ifdef AARCH64 1586 // TODO-AARCH64 - get rid of extra debug parameters 1587 mov(R1, LR); 1588 mov(R2, FP); 1589 add(R3, SP, push_size); 1590 #endif 1591 1592 ldr_literal(R0, Lmsg); // message 1593 ldr_literal(LR, Lwarn); // call warning 1594 1595 call(LR); 1596 1597 restore_caller_save_registers(); 1598 1599 b(done); 1600 bind_literal(Lmsg); 1601 bind_literal(Lwarn); 1602 bind(done); 1603 } 1604 1605 1606 int MacroAssembler::save_all_registers() { 1607 // This code pattern is matched in NativeIntruction::is_save_all_registers. 1608 // Update it at modifications. 1609 #ifdef AARCH64 1610 const Register tmp = Rtemp; 1611 raw_push(R30, ZR); 1612 for (int i = 28; i >= 0; i -= 2) { 1613 raw_push(as_Register(i), as_Register(i+1)); 1614 } 1615 mov_pc_to(tmp); 1616 str(tmp, Address(SP, 31*wordSize)); 1617 ldr(tmp, Address(SP, tmp->encoding()*wordSize)); 1618 return 32*wordSize; 1619 #else 1620 push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); 1621 return 15*wordSize; 1622 #endif // AARCH64 1623 } 1624 1625 void MacroAssembler::restore_all_registers() { 1626 #ifdef AARCH64 1627 for (int i = 0; i <= 28; i += 2) { 1628 raw_pop(as_Register(i), as_Register(i+1)); 1629 } 1630 raw_pop(R30, ZR); 1631 #else 1632 pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers 1633 add(SP, SP, wordSize); // discard saved PC 1634 #endif // AARCH64 1635 } 1636 1637 int MacroAssembler::save_caller_save_registers() { 1638 #ifdef AARCH64 1639 for (int i = 0; i <= 16; i += 2) { 1640 raw_push(as_Register(i), as_Register(i+1)); 1641 } 1642 raw_push(R18, LR); 1643 return 20*wordSize; 1644 #else 1645 #if R9_IS_SCRATCHED 1646 // Save also R10 to preserve alignment 1647 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1648 return 8*wordSize; 1649 #else 1650 push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1651 return 6*wordSize; 1652 #endif 1653 #endif // AARCH64 1654 } 1655 1656 void MacroAssembler::restore_caller_save_registers() { 1657 #ifdef AARCH64 1658 raw_pop(R18, LR); 1659 for (int i = 16; i >= 0; i -= 2) { 1660 raw_pop(as_Register(i), as_Register(i+1)); 1661 } 1662 #else 1663 #if R9_IS_SCRATCHED 1664 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); 1665 #else 1666 pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); 1667 #endif 1668 #endif // AARCH64 1669 } 1670 1671 void MacroAssembler::debug(const char* msg, const intx* registers) { 1672 // In order to get locks to work, we need to fake a in_VM state 1673 JavaThread* thread = JavaThread::current(); 1674 thread->set_thread_state(_thread_in_vm); 1675 1676 if (ShowMessageBoxOnError) { 1677 ttyLocker ttyl; 1678 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1679 BytecodeCounter::print(); 1680 } 1681 if (os::message_box(msg, "Execution stopped, print registers?")) { 1682 #ifdef AARCH64 1683 // saved registers: R0-R30, PC 1684 const int nregs = 32; 1685 #else 1686 // saved registers: R0-R12, LR, PC 1687 const int nregs = 15; 1688 const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; 1689 #endif // AARCH64 1690 1691 for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { 1692 tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); 1693 } 1694 1695 #ifdef AARCH64 1696 tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); 1697 #endif // AARCH64 1698 1699 // derive original SP value from the address of register save area 1700 tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); 1701 } 1702 BREAKPOINT; 1703 } else { 1704 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1705 } 1706 assert(false, "DEBUG MESSAGE: %s", msg); 1707 fatal("%s", msg); // returning from MacroAssembler::debug is not supported 1708 } 1709 1710 void MacroAssembler::unimplemented(const char* what) { 1711 const char* buf = NULL; 1712 { 1713 ResourceMark rm; 1714 stringStream ss; 1715 ss.print("unimplemented: %s", what); 1716 buf = code_string(ss.as_string()); 1717 } 1718 stop(buf); 1719 } 1720 1721 1722 // Implementation of FixedSizeCodeBlock 1723 1724 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : 1725 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { 1726 } 1727 1728 FixedSizeCodeBlock::~FixedSizeCodeBlock() { 1729 if (_enabled) { 1730 address curr_pc = _masm->pc(); 1731 1732 assert(_start < curr_pc, "invalid current pc"); 1733 guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); 1734 1735 int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; 1736 for (int i = 0; i < nops_count; i++) { 1737 _masm->nop(); 1738 } 1739 } 1740 } 1741 1742 #ifdef AARCH64 1743 1744 // Serializes memory. 1745 // tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM 1746 void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { 1747 if (!os::is_MP()) return; 1748 1749 // TODO-AARCH64 investigate dsb vs dmb effects 1750 if (order_constraint == StoreStore) { 1751 dmb(DMB_st); 1752 } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { 1753 dmb(DMB_ld); 1754 } else { 1755 dmb(DMB_all); 1756 } 1757 } 1758 1759 #else 1760 1761 // Serializes memory. Potentially blows flags and reg. 1762 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) 1763 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. 1764 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. 1765 void MacroAssembler::membar(Membar_mask_bits order_constraint, 1766 Register tmp, 1767 bool preserve_flags, 1768 Register load_tgt) { 1769 if (!os::is_MP()) return; 1770 1771 if (order_constraint == StoreStore) { 1772 dmb(DMB_st, tmp); 1773 } else if ((order_constraint & StoreLoad) || 1774 (order_constraint & LoadLoad) || 1775 (order_constraint & StoreStore) || 1776 (load_tgt == noreg) || 1777 preserve_flags) { 1778 dmb(DMB_all, tmp); 1779 } else { 1780 // LoadStore: speculative stores reordeing is prohibited 1781 1782 // By providing an ordered load target register, we avoid an extra memory load reference 1783 Label not_taken; 1784 bind(not_taken); 1785 cmp(load_tgt, load_tgt); 1786 b(not_taken, ne); 1787 } 1788 } 1789 1790 #endif // AARCH64 1791 1792 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" 1793 // on failure, so fall-through can only mean success. 1794 // "one_shot" controls whether we loop and retry to mitigate spurious failures. 1795 // This is only needed for C2, which for some reason does not rety, 1796 // while C1/interpreter does. 1797 // TODO: measure if it makes a difference 1798 1799 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, 1800 Register base, Register tmp, Label &slow_case, 1801 bool allow_fallthrough_on_failure, bool one_shot) 1802 { 1803 1804 bool fallthrough_is_success = false; 1805 1806 // ARM Litmus Test example does prefetching here. 1807 // TODO: investigate if it helps performance 1808 1809 // The last store was to the displaced header, so to prevent 1810 // reordering we must issue a StoreStore or Release barrier before 1811 // the CAS store. 1812 1813 #ifdef AARCH64 1814 1815 Register Rscratch = tmp; 1816 Register Roop = base; 1817 Register mark = oldval; 1818 Register Rbox = newval; 1819 Label loop; 1820 1821 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1822 1823 // Instead of StoreStore here, we use store-release-exclusive below 1824 1825 bind(loop); 1826 1827 ldaxr(tmp, base); // acquire 1828 cmp(tmp, oldval); 1829 b(slow_case, ne); 1830 stlxr(tmp, newval, base); // release 1831 if (one_shot) { 1832 cmp_w(tmp, 0); 1833 } else { 1834 cbnz_w(tmp, loop); 1835 fallthrough_is_success = true; 1836 } 1837 1838 // MemBarAcquireLock would normally go here, but 1839 // we already do ldaxr+stlxr above, which has 1840 // Sequential Consistency 1841 1842 #else 1843 membar(MacroAssembler::StoreStore, noreg); 1844 1845 if (one_shot) { 1846 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1847 cmp(tmp, oldval); 1848 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1849 cmp(tmp, 0, eq); 1850 } else { 1851 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1852 } 1853 1854 // MemBarAcquireLock barrier 1855 // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, 1856 // but that doesn't prevent a load or store from floating up between 1857 // the load and store in the CAS sequence, so play it safe and 1858 // do a full fence. 1859 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); 1860 #endif 1861 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1862 b(slow_case, ne); 1863 } 1864 } 1865 1866 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, 1867 Register base, Register tmp, Label &slow_case, 1868 bool allow_fallthrough_on_failure, bool one_shot) 1869 { 1870 1871 bool fallthrough_is_success = false; 1872 1873 assert_different_registers(oldval,newval,base,tmp); 1874 1875 #ifdef AARCH64 1876 Label loop; 1877 1878 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); 1879 1880 bind(loop); 1881 ldxr(tmp, base); 1882 cmp(tmp, oldval); 1883 b(slow_case, ne); 1884 // MemBarReleaseLock barrier 1885 stlxr(tmp, newval, base); 1886 if (one_shot) { 1887 cmp_w(tmp, 0); 1888 } else { 1889 cbnz_w(tmp, loop); 1890 fallthrough_is_success = true; 1891 } 1892 #else 1893 // MemBarReleaseLock barrier 1894 // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, 1895 // but that doesn't prevent a load or store from floating down between 1896 // the load and store in the CAS sequence, so play it safe and 1897 // do a full fence. 1898 membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); 1899 1900 if (one_shot) { 1901 ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); 1902 cmp(tmp, oldval); 1903 strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); 1904 cmp(tmp, 0, eq); 1905 } else { 1906 atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); 1907 } 1908 #endif 1909 if (!fallthrough_is_success && !allow_fallthrough_on_failure) { 1910 b(slow_case, ne); 1911 } 1912 1913 // ExitEnter 1914 // According to JSR-133 Cookbook, this should be StoreLoad, the same 1915 // barrier that follows volatile store. 1916 // TODO: Should be able to remove on armv8 if volatile loads 1917 // use the load-acquire instruction. 1918 membar(StoreLoad, noreg); 1919 } 1920 1921 #ifndef PRODUCT 1922 1923 // Preserves flags and all registers. 1924 // On SMP the updated value might not be visible to external observers without a sychronization barrier 1925 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { 1926 if (counter_addr != NULL) { 1927 InlinedAddress counter_addr_literal((address)counter_addr); 1928 Label done, retry; 1929 if (cond != al) { 1930 b(done, inverse(cond)); 1931 } 1932 1933 #ifdef AARCH64 1934 raw_push(R0, R1); 1935 raw_push(R2, ZR); 1936 1937 ldr_literal(R0, counter_addr_literal); 1938 1939 bind(retry); 1940 ldxr_w(R1, R0); 1941 add_w(R1, R1, 1); 1942 stxr_w(R2, R1, R0); 1943 cbnz_w(R2, retry); 1944 1945 raw_pop(R2, ZR); 1946 raw_pop(R0, R1); 1947 #else 1948 push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1949 ldr_literal(R0, counter_addr_literal); 1950 1951 mrs(CPSR, Rtemp); 1952 1953 bind(retry); 1954 ldr_s32(R1, Address(R0)); 1955 add(R2, R1, 1); 1956 atomic_cas_bool(R1, R2, R0, 0, R3); 1957 b(retry, ne); 1958 1959 msr(CPSR_fsxc, Rtemp); 1960 1961 pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); 1962 #endif // AARCH64 1963 1964 b(done); 1965 bind_literal(counter_addr_literal); 1966 1967 bind(done); 1968 } 1969 } 1970 1971 #endif // !PRODUCT 1972 1973 1974 // Building block for CAS cases of biased locking: makes CAS and records statistics. 1975 // The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. 1976 void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, 1977 Register tmp, Label& slow_case, int* counter_addr) { 1978 1979 cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); 1980 #ifdef ASSERT 1981 breakpoint(ne); // Fallthrough only on success 1982 #endif 1983 #ifndef PRODUCT 1984 if (counter_addr != NULL) { 1985 cond_atomic_inc32(al, counter_addr); 1986 } 1987 #endif // !PRODUCT 1988 } 1989 1990 int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, 1991 bool swap_reg_contains_mark, 1992 Register tmp2, 1993 Label& done, Label& slow_case, 1994 BiasedLockingCounters* counters) { 1995 // obj_reg must be preserved (at least) if the bias locking fails 1996 // tmp_reg is a temporary register 1997 // swap_reg was used as a temporary but contained a value 1998 // that was used afterwards in some call pathes. Callers 1999 // have been fixed so that swap_reg no longer needs to be 2000 // saved. 2001 // Rtemp in no longer scratched 2002 2003 assert(UseBiasedLocking, "why call this otherwise?"); 2004 assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); 2005 guarantee(swap_reg!=tmp_reg, "invariant"); 2006 assert(tmp_reg != noreg, "must supply tmp_reg"); 2007 2008 #ifndef PRODUCT 2009 if (PrintBiasedLockingStatistics && (counters == NULL)) { 2010 counters = BiasedLocking::counters(); 2011 } 2012 #endif 2013 2014 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 2015 Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); 2016 2017 // Biased locking 2018 // See whether the lock is currently biased toward our thread and 2019 // whether the epoch is still valid 2020 // Note that the runtime guarantees sufficient alignment of JavaThread 2021 // pointers to allow age to be placed into low bits 2022 // First check to see whether biasing is even enabled for this object 2023 Label cas_label; 2024 2025 // The null check applies to the mark loading, if we need to load it. 2026 // If the mark has already been loaded in swap_reg then it has already 2027 // been performed and the offset is irrelevant. 2028 int null_check_offset = offset(); 2029 if (!swap_reg_contains_mark) { 2030 ldr(swap_reg, mark_addr); 2031 } 2032 2033 // On MP platform loads could return 'stale' values in some cases. 2034 // That is acceptable since either CAS or slow case path is taken in the worst case. 2035 2036 andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2037 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 2038 2039 b(cas_label, ne); 2040 2041 // The bias pattern is present in the object's header. Need to check 2042 // whether the bias owner and the epoch are both still current. 2043 load_klass(tmp_reg, obj_reg); 2044 ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 2045 orr(tmp_reg, tmp_reg, Rthread); 2046 eor(tmp_reg, tmp_reg, swap_reg); 2047 2048 #ifdef AARCH64 2049 ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); 2050 #else 2051 bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); 2052 #endif // AARCH64 2053 2054 #ifndef PRODUCT 2055 if (counters != NULL) { 2056 cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); 2057 } 2058 #endif // !PRODUCT 2059 2060 b(done, eq); 2061 2062 Label try_revoke_bias; 2063 Label try_rebias; 2064 2065 // At this point we know that the header has the bias pattern and 2066 // that we are not the bias owner in the current epoch. We need to 2067 // figure out more details about the state of the header in order to 2068 // know what operations can be legally performed on the object's 2069 // header. 2070 2071 // If the low three bits in the xor result aren't clear, that means 2072 // the prototype header is no longer biased and we have to revoke 2073 // the bias on this object. 2074 tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2075 b(try_revoke_bias, ne); 2076 2077 // Biasing is still enabled for this data type. See whether the 2078 // epoch of the current bias is still valid, meaning that the epoch 2079 // bits of the mark word are equal to the epoch bits of the 2080 // prototype header. (Note that the prototype header's epoch bits 2081 // only change at a safepoint.) If not, attempt to rebias the object 2082 // toward the current thread. Note that we must be absolutely sure 2083 // that the current epoch is invalid in order to do this because 2084 // otherwise the manipulations it performs on the mark word are 2085 // illegal. 2086 tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); 2087 b(try_rebias, ne); 2088 2089 // tmp_reg has the age, epoch and pattern bits cleared 2090 // The remaining (owner) bits are (Thread ^ current_owner) 2091 2092 // The epoch of the current bias is still valid but we know nothing 2093 // about the owner; it might be set or it might be clear. Try to 2094 // acquire the bias of the object using an atomic operation. If this 2095 // fails we will go in to the runtime to revoke the object's bias. 2096 // Note that we first construct the presumed unbiased header so we 2097 // don't accidentally blow away another thread's valid bias. 2098 2099 // Note that we know the owner is not ourself. Hence, success can 2100 // only happen when the owner bits is 0 2101 2102 #ifdef AARCH64 2103 // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has 2104 // cleared bit in the middle (cms bit). So it is loaded with separate instruction. 2105 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2106 andr(swap_reg, swap_reg, tmp2); 2107 #else 2108 // until the assembler can be made smarter, we need to make some assumptions about the values 2109 // so we can optimize this: 2110 assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); 2111 2112 mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); 2113 mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) 2114 #endif // AARCH64 2115 2116 orr(tmp_reg, swap_reg, Rthread); // new mark 2117 2118 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2119 (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); 2120 2121 // If the biasing toward our thread failed, this means that 2122 // another thread succeeded in biasing it toward itself and we 2123 // need to revoke that bias. The revocation will occur in the 2124 // interpreter runtime in the slow case. 2125 2126 b(done); 2127 2128 bind(try_rebias); 2129 2130 // At this point we know the epoch has expired, meaning that the 2131 // current "bias owner", if any, is actually invalid. Under these 2132 // circumstances _only_, we are allowed to use the current header's 2133 // value as the comparison value when doing the cas to acquire the 2134 // bias in the current epoch. In other words, we allow transfer of 2135 // the bias from one thread to another directly in this situation. 2136 2137 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2138 2139 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2140 2141 // owner bits 'random'. Set them to Rthread. 2142 #ifdef AARCH64 2143 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2144 andr(tmp_reg, tmp_reg, tmp2); 2145 #else 2146 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2147 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2148 #endif // AARCH64 2149 2150 orr(tmp_reg, tmp_reg, Rthread); // new mark 2151 2152 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, 2153 (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); 2154 2155 // If the biasing toward our thread failed, then another thread 2156 // succeeded in biasing it toward itself and we need to revoke that 2157 // bias. The revocation will occur in the runtime in the slow case. 2158 2159 b(done); 2160 2161 bind(try_revoke_bias); 2162 2163 // The prototype mark in the klass doesn't have the bias bit set any 2164 // more, indicating that objects of this data type are not supposed 2165 // to be biased any more. We are going to try to reset the mark of 2166 // this object to the prototype value and fall through to the 2167 // CAS-based locking scheme. Note that if our CAS fails, it means 2168 // that another thread raced us for the privilege of revoking the 2169 // bias of this particular object, so it's okay to continue in the 2170 // normal locking code. 2171 2172 // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) 2173 2174 eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) 2175 2176 // owner bits 'random'. Clear them 2177 #ifdef AARCH64 2178 mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); 2179 andr(tmp_reg, tmp_reg, tmp2); 2180 #else 2181 mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); 2182 mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); 2183 #endif // AARCH64 2184 2185 biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, 2186 (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); 2187 2188 // Fall through to the normal CAS-based lock, because no matter what 2189 // the result of the above CAS, some thread must have succeeded in 2190 // removing the bias bit from the object's header. 2191 2192 bind(cas_label); 2193 2194 return null_check_offset; 2195 } 2196 2197 2198 void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { 2199 assert(UseBiasedLocking, "why call this otherwise?"); 2200 2201 // Check for biased locking unlock case, which is a no-op 2202 // Note: we do not have to check the thread ID for two reasons. 2203 // First, the interpreter checks for IllegalMonitorStateException at 2204 // a higher level. Second, if the bias was revoked while we held the 2205 // lock, the object could not be rebiased toward another thread, so 2206 // the bias bit would be clear. 2207 ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 2208 2209 andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); 2210 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 2211 b(done, eq); 2212 } 2213 2214 2215 void MacroAssembler::resolve_jobject(Register value, 2216 Register tmp1, 2217 Register tmp2) { 2218 assert_different_registers(value, tmp1, tmp2); 2219 Label done, not_weak; 2220 cbz(value, done); // Use NULL as-is. 2221 STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); 2222 tbz(value, 0, not_weak); // Test for jweak tag. 2223 // Resolve jweak. 2224 ldr(value, Address(value, -JNIHandles::weak_tag_value)); 2225 verify_oop(value); 2226 #if INCLUDE_ALL_GCS 2227 if (UseG1GC) { 2228 g1_write_barrier_pre(noreg, // store_addr 2229 noreg, // new_val 2230 value, // pre_val 2231 tmp1, // tmp1 2232 tmp2); // tmp2 2233 } 2234 #endif // INCLUDE_ALL_GCS 2235 b(done); 2236 bind(not_weak); 2237 // Resolve (untagged) jobject. 2238 ldr(value, Address(value)); 2239 verify_oop(value); 2240 bind(done); 2241 } 2242 2243 2244 ////////////////////////////////////////////////////////////////////////////////// 2245 2246 #if INCLUDE_ALL_GCS 2247 2248 // G1 pre-barrier. 2249 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 2250 // If store_addr != noreg, then previous value is loaded from [store_addr]; 2251 // in such case store_addr and new_val registers are preserved; 2252 // otherwise pre_val register is preserved. 2253 void MacroAssembler::g1_write_barrier_pre(Register store_addr, 2254 Register new_val, 2255 Register pre_val, 2256 Register tmp1, 2257 Register tmp2) { 2258 Label done; 2259 Label runtime; 2260 2261 if (store_addr != noreg) { 2262 assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); 2263 } else { 2264 assert (new_val == noreg, "should be"); 2265 assert_different_registers(pre_val, tmp1, tmp2, noreg); 2266 } 2267 2268 Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 2269 SATBMarkQueue::byte_offset_of_active())); 2270 Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 2271 SATBMarkQueue::byte_offset_of_index())); 2272 Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + 2273 SATBMarkQueue::byte_offset_of_buf())); 2274 2275 // Is marking active? 2276 assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); 2277 ldrb(tmp1, in_progress); 2278 cbz(tmp1, done); 2279 2280 // Do we need to load the previous value? 2281 if (store_addr != noreg) { 2282 load_heap_oop(pre_val, Address(store_addr, 0)); 2283 } 2284 2285 // Is the previous value null? 2286 cbz(pre_val, done); 2287 2288 // Can we store original value in the thread's buffer? 2289 // Is index == 0? 2290 // (The index field is typed as size_t.) 2291 2292 ldr(tmp1, index); // tmp1 := *index_adr 2293 ldr(tmp2, buffer); 2294 2295 subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize 2296 b(runtime, lt); // If negative, goto runtime 2297 2298 str(tmp1, index); // *index_adr := tmp1 2299 2300 // Record the previous value 2301 str(pre_val, Address(tmp2, tmp1)); 2302 b(done); 2303 2304 bind(runtime); 2305 2306 // save the live input values 2307 #ifdef AARCH64 2308 if (store_addr != noreg) { 2309 raw_push(store_addr, new_val); 2310 } else { 2311 raw_push(pre_val, ZR); 2312 } 2313 #else 2314 if (store_addr != noreg) { 2315 // avoid raw_push to support any ordering of store_addr and new_val 2316 push(RegisterSet(store_addr) | RegisterSet(new_val)); 2317 } else { 2318 push(pre_val); 2319 } 2320 #endif // AARCH64 2321 2322 if (pre_val != R0) { 2323 mov(R0, pre_val); 2324 } 2325 mov(R1, Rthread); 2326 2327 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); 2328 2329 #ifdef AARCH64 2330 if (store_addr != noreg) { 2331 raw_pop(store_addr, new_val); 2332 } else { 2333 raw_pop(pre_val, ZR); 2334 } 2335 #else 2336 if (store_addr != noreg) { 2337 pop(RegisterSet(store_addr) | RegisterSet(new_val)); 2338 } else { 2339 pop(pre_val); 2340 } 2341 #endif // AARCH64 2342 2343 bind(done); 2344 } 2345 2346 // G1 post-barrier. 2347 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). 2348 void MacroAssembler::g1_write_barrier_post(Register store_addr, 2349 Register new_val, 2350 Register tmp1, 2351 Register tmp2, 2352 Register tmp3) { 2353 2354 Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + 2355 DirtyCardQueue::byte_offset_of_index())); 2356 Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + 2357 DirtyCardQueue::byte_offset_of_buf())); 2358 2359 BarrierSet* bs = Universe::heap()->barrier_set(); 2360 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 2361 Label done; 2362 Label runtime; 2363 2364 // Does store cross heap regions? 2365 2366 eor(tmp1, store_addr, new_val); 2367 #ifdef AARCH64 2368 logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); 2369 cbz(tmp1, done); 2370 #else 2371 movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); 2372 b(done, eq); 2373 #endif 2374 2375 // crosses regions, storing NULL? 2376 2377 cbz(new_val, done); 2378 2379 // storing region crossing non-NULL, is card already dirty? 2380 const Register card_addr = tmp1; 2381 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 2382 2383 mov_address(tmp2, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference); 2384 add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTableModRefBS::card_shift)); 2385 2386 ldrb(tmp2, Address(card_addr)); 2387 cmp(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val()); 2388 b(done, eq); 2389 2390 membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); 2391 2392 assert(CardTableModRefBS::dirty_card_val() == 0, "adjust this code"); 2393 ldrb(tmp2, Address(card_addr)); 2394 cbz(tmp2, done); 2395 2396 // storing a region crossing, non-NULL oop, card is clean. 2397 // dirty card and log. 2398 2399 strb(zero_register(tmp2), Address(card_addr)); 2400 2401 ldr(tmp2, queue_index); 2402 ldr(tmp3, buffer); 2403 2404 subs(tmp2, tmp2, wordSize); 2405 b(runtime, lt); // go to runtime if now negative 2406 2407 str(tmp2, queue_index); 2408 2409 str(card_addr, Address(tmp3, tmp2)); 2410 b(done); 2411 2412 bind(runtime); 2413 2414 if (card_addr != R0) { 2415 mov(R0, card_addr); 2416 } 2417 mov(R1, Rthread); 2418 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); 2419 2420 bind(done); 2421 } 2422 2423 #endif // INCLUDE_ALL_GCS 2424 2425 ////////////////////////////////////////////////////////////////////////////////// 2426 2427 #ifdef AARCH64 2428 2429 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { 2430 switch (size_in_bytes) { 2431 case 8: ldr(dst, src); break; 2432 case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; 2433 case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; 2434 case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; 2435 default: ShouldNotReachHere(); 2436 } 2437 } 2438 2439 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { 2440 switch (size_in_bytes) { 2441 case 8: str(src, dst); break; 2442 case 4: str_32(src, dst); break; 2443 case 2: strh(src, dst); break; 2444 case 1: strb(src, dst); break; 2445 default: ShouldNotReachHere(); 2446 } 2447 } 2448 2449 #else 2450 2451 void MacroAssembler::load_sized_value(Register dst, Address src, 2452 size_t size_in_bytes, bool is_signed, AsmCondition cond) { 2453 switch (size_in_bytes) { 2454 case 4: ldr(dst, src, cond); break; 2455 case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; 2456 case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; 2457 default: ShouldNotReachHere(); 2458 } 2459 } 2460 2461 2462 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { 2463 switch (size_in_bytes) { 2464 case 4: str(src, dst, cond); break; 2465 case 2: strh(src, dst, cond); break; 2466 case 1: strb(src, dst, cond); break; 2467 default: ShouldNotReachHere(); 2468 } 2469 } 2470 #endif // AARCH64 2471 2472 // Look up the method for a megamorphic invokeinterface call. 2473 // The target method is determined by <Rinterf, Rindex>. 2474 // The receiver klass is in Rklass. 2475 // On success, the result will be in method_result, and execution falls through. 2476 // On failure, execution transfers to the given label. 2477 void MacroAssembler::lookup_interface_method(Register Rklass, 2478 Register Rinterf, 2479 Register Rindex, 2480 Register method_result, 2481 Register temp_reg1, 2482 Register temp_reg2, 2483 Label& L_no_such_interface) { 2484 2485 assert_different_registers(Rklass, Rinterf, temp_reg1, temp_reg2, Rindex); 2486 2487 Register Ritable = temp_reg1; 2488 2489 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 2490 const int base = in_bytes(Klass::vtable_start_offset()); 2491 const int scale = exact_log2(vtableEntry::size_in_bytes()); 2492 ldr_s32(temp_reg2, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable 2493 add(Ritable, Rklass, base); 2494 add(Ritable, Ritable, AsmOperand(temp_reg2, lsl, scale)); 2495 2496 Label entry, search; 2497 2498 b(entry); 2499 2500 bind(search); 2501 add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize); 2502 2503 bind(entry); 2504 2505 // Check that the entry is non-null. A null entry means that the receiver 2506 // class doesn't implement the interface, and wasn't the same as the 2507 // receiver class checked when the interface was resolved. 2508 2509 ldr(temp_reg2, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes())); 2510 cbz(temp_reg2, L_no_such_interface); 2511 2512 cmp(Rinterf, temp_reg2); 2513 b(search, ne); 2514 2515 ldr_s32(temp_reg2, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes())); 2516 add(temp_reg2, temp_reg2, Rklass); // Add offset to Klass* 2517 assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); 2518 assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); 2519 2520 ldr(method_result, Address::indexed_ptr(temp_reg2, Rindex)); 2521 } 2522 2523 #ifdef COMPILER2 2524 // TODO: 8 bytes at a time? pre-fetch? 2525 // Compare char[] arrays aligned to 4 bytes. 2526 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, 2527 Register limit, Register result, 2528 Register chr1, Register chr2, Label& Ldone) { 2529 Label Lvector, Lloop; 2530 2531 // Note: limit contains number of bytes (2*char_elements) != 0. 2532 tst(limit, 0x2); // trailing character ? 2533 b(Lvector, eq); 2534 2535 // compare the trailing char 2536 sub(limit, limit, sizeof(jchar)); 2537 ldrh(chr1, Address(ary1, limit)); 2538 ldrh(chr2, Address(ary2, limit)); 2539 cmp(chr1, chr2); 2540 mov(result, 0, ne); // not equal 2541 b(Ldone, ne); 2542 2543 // only one char ? 2544 tst(limit, limit); 2545 mov(result, 1, eq); 2546 b(Ldone, eq); 2547 2548 // word by word compare, dont't need alignment check 2549 bind(Lvector); 2550 2551 // Shift ary1 and ary2 to the end of the arrays, negate limit 2552 add(ary1, limit, ary1); 2553 add(ary2, limit, ary2); 2554 neg(limit, limit); 2555 2556 bind(Lloop); 2557 ldr_u32(chr1, Address(ary1, limit)); 2558 ldr_u32(chr2, Address(ary2, limit)); 2559 cmp_32(chr1, chr2); 2560 mov(result, 0, ne); // not equal 2561 b(Ldone, ne); 2562 adds(limit, limit, 2*sizeof(jchar)); 2563 b(Lloop, ne); 2564 2565 // Caller should set it: 2566 // mov(result_reg, 1); //equal 2567 } 2568 #endif 2569 2570 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { 2571 mov_slow(tmpreg1, counter_addr); 2572 ldr_s32(tmpreg2, tmpreg1); 2573 add_32(tmpreg2, tmpreg2, 1); 2574 str_32(tmpreg2, tmpreg1); 2575 } 2576 2577 void MacroAssembler::floating_cmp(Register dst) { 2578 #ifdef AARCH64 2579 NOT_TESTED(); 2580 cset(dst, gt); // 1 if '>', else 0 2581 csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 2582 #else 2583 vmrs(dst, FPSCR); 2584 orr(dst, dst, 0x08000000); 2585 eor(dst, dst, AsmOperand(dst, lsl, 3)); 2586 mov(dst, AsmOperand(dst, asr, 30)); 2587 #endif 2588 } 2589 2590 void MacroAssembler::restore_default_fp_mode() { 2591 #ifdef AARCH64 2592 msr(SysReg_FPCR, ZR); 2593 #else 2594 #ifndef __SOFTFP__ 2595 // Round to Near mode, IEEE compatible, masked exceptions 2596 mov(Rtemp, 0); 2597 vmsr(FPSCR, Rtemp); 2598 #endif // !__SOFTFP__ 2599 #endif // AARCH64 2600 } 2601 2602 #ifndef AARCH64 2603 // 24-bit word range == 26-bit byte range 2604 bool check26(int offset) { 2605 // this could be simplified, but it mimics encoding and decoding 2606 // an actual branch insrtuction 2607 int off1 = offset << 6 >> 8; 2608 int encoded = off1 & ((1<<24)-1); 2609 int decoded = encoded << 8 >> 6; 2610 return offset == decoded; 2611 } 2612 #endif // !AARCH64 2613 2614 // Perform some slight adjustments so the default 32MB code cache 2615 // is fully reachable. 2616 static inline address first_cache_address() { 2617 return CodeCache::low_bound() + sizeof(HeapBlock::Header); 2618 } 2619 static inline address last_cache_address() { 2620 return CodeCache::high_bound() - Assembler::InstructionSize; 2621 } 2622 2623 #ifdef AARCH64 2624 // Can we reach target using ADRP? 2625 bool MacroAssembler::page_reachable_from_cache(address target) { 2626 intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; 2627 intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; 2628 intptr_t addr = (intptr_t)target & ~0xfff; 2629 2630 intptr_t loffset = addr - cl; 2631 intptr_t hoffset = addr - ch; 2632 return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); 2633 } 2634 #endif 2635 2636 // Can we reach target using unconditional branch or call from anywhere 2637 // in the code cache (because code can be relocated)? 2638 bool MacroAssembler::_reachable_from_cache(address target) { 2639 #ifdef __thumb__ 2640 if ((1 & (intptr_t)target) != 0) { 2641 // Return false to avoid 'b' if we need switching to THUMB mode. 2642 return false; 2643 } 2644 #endif 2645 2646 address cl = first_cache_address(); 2647 address ch = last_cache_address(); 2648 2649 if (ForceUnreachable) { 2650 // Only addresses from CodeCache can be treated as reachable. 2651 if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { 2652 return false; 2653 } 2654 } 2655 2656 intptr_t loffset = (intptr_t)target - (intptr_t)cl; 2657 intptr_t hoffset = (intptr_t)target - (intptr_t)ch; 2658 2659 #ifdef AARCH64 2660 return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); 2661 #else 2662 return check26(loffset - 8) && check26(hoffset - 8); 2663 #endif 2664 } 2665 2666 bool MacroAssembler::reachable_from_cache(address target) { 2667 assert(CodeCache::contains(pc()), "not supported"); 2668 return _reachable_from_cache(target); 2669 } 2670 2671 // Can we reach the entire code cache from anywhere else in the code cache? 2672 bool MacroAssembler::_cache_fully_reachable() { 2673 address cl = first_cache_address(); 2674 address ch = last_cache_address(); 2675 return _reachable_from_cache(cl) && _reachable_from_cache(ch); 2676 } 2677 2678 bool MacroAssembler::cache_fully_reachable() { 2679 assert(CodeCache::contains(pc()), "not supported"); 2680 return _cache_fully_reachable(); 2681 } 2682 2683 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2684 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2685 if (reachable_from_cache(target)) { 2686 relocate(rtype); 2687 b(target NOT_AARCH64_ARG(cond)); 2688 return; 2689 } 2690 2691 // Note: relocate is not needed for the code below, 2692 // encoding targets in absolute format. 2693 if (ignore_non_patchable_relocations()) { 2694 rtype = relocInfo::none; 2695 } 2696 2697 #ifdef AARCH64 2698 assert (scratch != noreg, "should be specified"); 2699 InlinedAddress address_literal(target, rtype); 2700 ldr_literal(scratch, address_literal); 2701 br(scratch); 2702 int off = offset(); 2703 bind_literal(address_literal); 2704 #ifdef COMPILER2 2705 if (offset() - off == wordSize) { 2706 // no padding, so insert nop for worst-case sizing 2707 nop(); 2708 } 2709 #endif 2710 #else 2711 if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { 2712 // Note: this version cannot be (atomically) patched 2713 mov_slow(scratch, (intptr_t)target, cond); 2714 bx(scratch, cond); 2715 } else { 2716 Label skip; 2717 InlinedAddress address_literal(target); 2718 if (cond != al) { 2719 b(skip, inverse(cond)); 2720 } 2721 relocate(rtype); 2722 ldr_literal(PC, address_literal); 2723 bind_literal(address_literal); 2724 bind(skip); 2725 } 2726 #endif // AARCH64 2727 } 2728 2729 // Similar to jump except that: 2730 // - near calls are valid only if any destination in the cache is near 2731 // - no movt/movw (not atomically patchable) 2732 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { 2733 assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); 2734 if (cache_fully_reachable()) { 2735 // Note: this assumes that all possible targets (the initial one 2736 // and the addressed patched to) are all in the code cache. 2737 assert(CodeCache::contains(target), "target might be too far"); 2738 relocate(rtype); 2739 b(target NOT_AARCH64_ARG(cond)); 2740 return; 2741 } 2742 2743 // Discard the relocation information if not needed for CacheCompiledCode 2744 // since the next encodings are all in absolute format. 2745 if (ignore_non_patchable_relocations()) { 2746 rtype = relocInfo::none; 2747 } 2748 2749 #ifdef AARCH64 2750 assert (scratch != noreg, "should be specified"); 2751 InlinedAddress address_literal(target); 2752 relocate(rtype); 2753 ldr_literal(scratch, address_literal); 2754 br(scratch); 2755 int off = offset(); 2756 bind_literal(address_literal); 2757 #ifdef COMPILER2 2758 if (offset() - off == wordSize) { 2759 // no padding, so insert nop for worst-case sizing 2760 nop(); 2761 } 2762 #endif 2763 #else 2764 { 2765 Label skip; 2766 InlinedAddress address_literal(target); 2767 if (cond != al) { 2768 b(skip, inverse(cond)); 2769 } 2770 relocate(rtype); 2771 ldr_literal(PC, address_literal); 2772 bind_literal(address_literal); 2773 bind(skip); 2774 } 2775 #endif // AARCH64 2776 } 2777 2778 void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { 2779 Register scratch = LR; 2780 assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); 2781 if (reachable_from_cache(target)) { 2782 relocate(rspec); 2783 bl(target NOT_AARCH64_ARG(cond)); 2784 return; 2785 } 2786 2787 // Note: relocate is not needed for the code below, 2788 // encoding targets in absolute format. 2789 if (ignore_non_patchable_relocations()) { 2790 // This assumes the information was needed only for relocating the code. 2791 rspec = RelocationHolder::none; 2792 } 2793 2794 #ifndef AARCH64 2795 if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { 2796 // Note: this version cannot be (atomically) patched 2797 mov_slow(scratch, (intptr_t)target, cond); 2798 blx(scratch, cond); 2799 return; 2800 } 2801 #endif 2802 2803 { 2804 Label ret_addr; 2805 #ifndef AARCH64 2806 if (cond != al) { 2807 b(ret_addr, inverse(cond)); 2808 } 2809 #endif 2810 2811 2812 #ifdef AARCH64 2813 // TODO-AARCH64: make more optimal implementation 2814 // [ Keep in sync with MacroAssembler::call_size ] 2815 assert(rspec.type() == relocInfo::none, "call reloc not implemented"); 2816 mov_slow(scratch, target); 2817 blr(scratch); 2818 #else 2819 InlinedAddress address_literal(target); 2820 relocate(rspec); 2821 adr(LR, ret_addr); 2822 ldr_literal(PC, address_literal); 2823 2824 bind_literal(address_literal); 2825 bind(ret_addr); 2826 #endif 2827 } 2828 } 2829 2830 #if defined(AARCH64) && defined(COMPILER2) 2831 int MacroAssembler::call_size(address target, bool far, bool patchable) { 2832 // FIXME: mov_slow is variable-length 2833 if (!far) return 1; // bl 2834 if (patchable) return 2; // ldr; blr 2835 return instr_count_for_mov_slow((intptr_t)target) + 1; 2836 } 2837 #endif 2838 2839 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { 2840 assert(rspec.type() == relocInfo::static_call_type || 2841 rspec.type() == relocInfo::none || 2842 rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); 2843 2844 // Always generate the relocation information, needed for patching 2845 relocate(rspec); // used by NativeCall::is_call_before() 2846 if (cache_fully_reachable()) { 2847 // Note: this assumes that all possible targets (the initial one 2848 // and the addresses patched to) are all in the code cache. 2849 assert(CodeCache::contains(target), "target might be too far"); 2850 bl(target); 2851 } else { 2852 #if defined(AARCH64) && defined(COMPILER2) 2853 if (c2) { 2854 // return address needs to match call_size(). 2855 // no need to trash Rtemp 2856 int off = offset(); 2857 Label skip_literal; 2858 InlinedAddress address_literal(target); 2859 ldr_literal(LR, address_literal); 2860 blr(LR); 2861 int ret_addr_offset = offset(); 2862 assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); 2863 b(skip_literal); 2864 int off2 = offset(); 2865 bind_literal(address_literal); 2866 if (offset() - off2 == wordSize) { 2867 // no padding, so insert nop for worst-case sizing 2868 nop(); 2869 } 2870 bind(skip_literal); 2871 return ret_addr_offset; 2872 } 2873 #endif 2874 Label ret_addr; 2875 InlinedAddress address_literal(target); 2876 #ifdef AARCH64 2877 ldr_literal(Rtemp, address_literal); 2878 adr(LR, ret_addr); 2879 br(Rtemp); 2880 #else 2881 adr(LR, ret_addr); 2882 ldr_literal(PC, address_literal); 2883 #endif 2884 bind_literal(address_literal); 2885 bind(ret_addr); 2886 } 2887 return offset(); 2888 } 2889 2890 // ((OopHandle)result).resolve(); 2891 void MacroAssembler::resolve_oop_handle(Register result) { 2892 // OopHandle::resolve is an indirection. 2893 ldr(result, Address(result, 0)); 2894 } 2895 2896 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { 2897 const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 2898 ldr(tmp, Address(method, Method::const_offset())); 2899 ldr(tmp, Address(tmp, ConstMethod::constants_offset())); 2900 ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); 2901 ldr(mirror, Address(tmp, mirror_offset)); 2902 } 2903 2904 2905 /////////////////////////////////////////////////////////////////////////////// 2906 2907 // Compressed pointers 2908 2909 #ifdef AARCH64 2910 2911 void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { 2912 if (UseCompressedClassPointers) { 2913 ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2914 decode_klass_not_null(dst_klass); 2915 } else { 2916 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); 2917 } 2918 } 2919 2920 #else 2921 2922 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { 2923 ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); 2924 } 2925 2926 #endif // AARCH64 2927 2928 // Blows src_klass. 2929 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { 2930 #ifdef AARCH64 2931 if (UseCompressedClassPointers) { 2932 assert(src_klass != dst_oop, "not enough registers"); 2933 encode_klass_not_null(src_klass); 2934 str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2935 return; 2936 } 2937 #endif // AARCH64 2938 str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); 2939 } 2940 2941 #ifdef AARCH64 2942 2943 void MacroAssembler::store_klass_gap(Register dst) { 2944 if (UseCompressedClassPointers) { 2945 str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2946 } 2947 } 2948 2949 #endif // AARCH64 2950 2951 2952 void MacroAssembler::load_heap_oop(Register dst, Address src) { 2953 #ifdef AARCH64 2954 if (UseCompressedOops) { 2955 ldr_w(dst, src); 2956 decode_heap_oop(dst); 2957 return; 2958 } 2959 #endif // AARCH64 2960 ldr(dst, src); 2961 } 2962 2963 // Blows src and flags. 2964 void MacroAssembler::store_heap_oop(Register src, Address dst) { 2965 #ifdef AARCH64 2966 if (UseCompressedOops) { 2967 assert(!dst.uses(src), "not enough registers"); 2968 encode_heap_oop(src); 2969 str_w(src, dst); 2970 return; 2971 } 2972 #endif // AARCH64 2973 str(src, dst); 2974 } 2975 2976 void MacroAssembler::store_heap_oop_null(Register src, Address dst) { 2977 #ifdef AARCH64 2978 if (UseCompressedOops) { 2979 str_w(src, dst); 2980 return; 2981 } 2982 #endif // AARCH64 2983 str(src, dst); 2984 } 2985 2986 2987 #ifdef AARCH64 2988 2989 // Algorithm must match oop.inline.hpp encode_heap_oop. 2990 void MacroAssembler::encode_heap_oop(Register dst, Register src) { 2991 // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. 2992 // Update it at modifications. 2993 assert (UseCompressedOops, "must be compressed"); 2994 assert (Universe::heap() != NULL, "java heap should be initialized"); 2995 #ifdef ASSERT 2996 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2997 #endif 2998 verify_oop(src); 2999 if (Universe::narrow_oop_base() == NULL) { 3000 if (Universe::narrow_oop_shift() != 0) { 3001 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 3002 _lsr(dst, src, Universe::narrow_oop_shift()); 3003 } else if (dst != src) { 3004 mov(dst, src); 3005 } 3006 } else { 3007 tst(src, src); 3008 csel(dst, Rheap_base, src, eq); 3009 sub(dst, dst, Rheap_base); 3010 if (Universe::narrow_oop_shift() != 0) { 3011 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 3012 _lsr(dst, dst, Universe::narrow_oop_shift()); 3013 } 3014 } 3015 } 3016 3017 // Same algorithm as oop.inline.hpp decode_heap_oop. 3018 void MacroAssembler::decode_heap_oop(Register dst, Register src) { 3019 #ifdef ASSERT 3020 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 3021 #endif 3022 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 3023 if (Universe::narrow_oop_base() != NULL) { 3024 tst(src, src); 3025 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 3026 csel(dst, dst, ZR, ne); 3027 } else { 3028 _lsl(dst, src, Universe::narrow_oop_shift()); 3029 } 3030 verify_oop(dst); 3031 } 3032 3033 #ifdef COMPILER2 3034 // Algorithm must match oop.inline.hpp encode_heap_oop. 3035 // Must preserve condition codes, or C2 encodeHeapOop_not_null rule 3036 // must be changed. 3037 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 3038 assert (UseCompressedOops, "must be compressed"); 3039 assert (Universe::heap() != NULL, "java heap should be initialized"); 3040 #ifdef ASSERT 3041 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 3042 #endif 3043 verify_oop(src); 3044 if (Universe::narrow_oop_base() == NULL) { 3045 if (Universe::narrow_oop_shift() != 0) { 3046 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 3047 _lsr(dst, src, Universe::narrow_oop_shift()); 3048 } else if (dst != src) { 3049 mov(dst, src); 3050 } 3051 } else { 3052 sub(dst, src, Rheap_base); 3053 if (Universe::narrow_oop_shift() != 0) { 3054 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 3055 _lsr(dst, dst, Universe::narrow_oop_shift()); 3056 } 3057 } 3058 } 3059 3060 // Same algorithm as oops.inline.hpp decode_heap_oop. 3061 // Must preserve condition codes, or C2 decodeHeapOop_not_null rule 3062 // must be changed. 3063 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 3064 #ifdef ASSERT 3065 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 3066 #endif 3067 assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 3068 if (Universe::narrow_oop_base() != NULL) { 3069 add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); 3070 } else { 3071 _lsl(dst, src, Universe::narrow_oop_shift()); 3072 } 3073 verify_oop(dst); 3074 } 3075 3076 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3077 assert(UseCompressedClassPointers, "should only be used for compressed header"); 3078 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3079 int klass_index = oop_recorder()->find_index(k); 3080 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 3081 3082 // Relocation with special format (see relocInfo_arm.hpp). 3083 relocate(rspec); 3084 narrowKlass encoded_k = Klass::encode_klass(k); 3085 movz(dst, encoded_k & 0xffff, 0); 3086 movk(dst, (encoded_k >> 16) & 0xffff, 16); 3087 } 3088 3089 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3090 assert(UseCompressedOops, "should only be used for compressed header"); 3091 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3092 int oop_index = oop_recorder()->find_index(obj); 3093 RelocationHolder rspec = oop_Relocation::spec(oop_index); 3094 3095 relocate(rspec); 3096 movz(dst, 0xffff, 0); 3097 movk(dst, 0xffff, 16); 3098 } 3099 3100 #endif // COMPILER2 3101 3102 // Must preserve condition codes, or C2 encodeKlass_not_null rule 3103 // must be changed. 3104 void MacroAssembler::encode_klass_not_null(Register r) { 3105 if (Universe::narrow_klass_base() != NULL) { 3106 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 3107 assert(r != Rheap_base, "Encoding a klass in Rheap_base"); 3108 mov_slow(Rheap_base, Universe::narrow_klass_base()); 3109 sub(r, r, Rheap_base); 3110 } 3111 if (Universe::narrow_klass_shift() != 0) { 3112 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3113 _lsr(r, r, Universe::narrow_klass_shift()); 3114 } 3115 if (Universe::narrow_klass_base() != NULL) { 3116 reinit_heapbase(); 3117 } 3118 } 3119 3120 // Must preserve condition codes, or C2 encodeKlass_not_null rule 3121 // must be changed. 3122 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3123 if (dst == src) { 3124 encode_klass_not_null(src); 3125 return; 3126 } 3127 if (Universe::narrow_klass_base() != NULL) { 3128 mov_slow(dst, (int64_t)Universe::narrow_klass_base()); 3129 sub(dst, src, dst); 3130 if (Universe::narrow_klass_shift() != 0) { 3131 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3132 _lsr(dst, dst, Universe::narrow_klass_shift()); 3133 } 3134 } else { 3135 if (Universe::narrow_klass_shift() != 0) { 3136 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3137 _lsr(dst, src, Universe::narrow_klass_shift()); 3138 } else { 3139 mov(dst, src); 3140 } 3141 } 3142 } 3143 3144 // Function instr_count_for_decode_klass_not_null() counts the instructions 3145 // generated by decode_klass_not_null(register r) and reinit_heapbase(), 3146 // when (Universe::heap() != NULL). Hence, if the instructions they 3147 // generate change, then this method needs to be updated. 3148 int MacroAssembler::instr_count_for_decode_klass_not_null() { 3149 assert(UseCompressedClassPointers, "only for compressed klass ptrs"); 3150 assert(Universe::heap() != NULL, "java heap should be initialized"); 3151 if (Universe::narrow_klass_base() != NULL) { 3152 return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow 3153 1 + // add 3154 instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow 3155 } else { 3156 if (Universe::narrow_klass_shift() != 0) { 3157 return 1; 3158 } 3159 } 3160 return 0; 3161 } 3162 3163 // Must preserve condition codes, or C2 decodeKlass_not_null rule 3164 // must be changed. 3165 void MacroAssembler::decode_klass_not_null(Register r) { 3166 int off = offset(); 3167 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 3168 assert(Universe::heap() != NULL, "java heap should be initialized"); 3169 assert(r != Rheap_base, "Decoding a klass in Rheap_base"); 3170 // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. 3171 // Also do not verify_oop as this is called by verify_oop. 3172 if (Universe::narrow_klass_base() != NULL) { 3173 // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. 3174 mov_slow(Rheap_base, Universe::narrow_klass_base()); 3175 add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); 3176 reinit_heapbase(); 3177 } else { 3178 if (Universe::narrow_klass_shift() != 0) { 3179 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3180 _lsl(r, r, Universe::narrow_klass_shift()); 3181 } 3182 } 3183 assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); 3184 } 3185 3186 // Must preserve condition codes, or C2 decodeKlass_not_null rule 3187 // must be changed. 3188 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 3189 if (src == dst) { 3190 decode_klass_not_null(src); 3191 return; 3192 } 3193 3194 assert(UseCompressedClassPointers, "should only be used for compressed headers"); 3195 assert(Universe::heap() != NULL, "java heap should be initialized"); 3196 assert(src != Rheap_base, "Decoding a klass in Rheap_base"); 3197 assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); 3198 // Also do not verify_oop as this is called by verify_oop. 3199 if (Universe::narrow_klass_base() != NULL) { 3200 mov_slow(dst, Universe::narrow_klass_base()); 3201 add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); 3202 } else { 3203 _lsl(dst, src, Universe::narrow_klass_shift()); 3204 } 3205 } 3206 3207 3208 void MacroAssembler::reinit_heapbase() { 3209 if (UseCompressedOops || UseCompressedClassPointers) { 3210 if (Universe::heap() != NULL) { 3211 mov_slow(Rheap_base, Universe::narrow_ptrs_base()); 3212 } else { 3213 ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); 3214 } 3215 } 3216 } 3217 3218 #ifdef ASSERT 3219 void MacroAssembler::verify_heapbase(const char* msg) { 3220 // This code pattern is matched in NativeIntruction::skip_verify_heapbase. 3221 // Update it at modifications. 3222 assert (UseCompressedOops, "should be compressed"); 3223 assert (Universe::heap() != NULL, "java heap should be initialized"); 3224 if (CheckCompressedOops) { 3225 Label ok; 3226 str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 3227 raw_push(Rtemp, ZR); 3228 mrs(Rtemp, Assembler::SysReg_NZCV); 3229 str(Rtemp, Address(SP, 1 * wordSize)); 3230 mov_slow(Rtemp, Universe::narrow_ptrs_base()); 3231 cmp(Rheap_base, Rtemp); 3232 b(ok, eq); 3233 stop(msg); 3234 bind(ok); 3235 ldr(Rtemp, Address(SP, 1 * wordSize)); 3236 msr(Assembler::SysReg_NZCV, Rtemp); 3237 raw_pop(Rtemp, ZR); 3238 str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset()))); 3239 } 3240 } 3241 #endif // ASSERT 3242 3243 #endif // AARCH64 3244 3245 #ifdef COMPILER2 3246 void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3247 { 3248 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3249 3250 Register Rmark = Rscratch2; 3251 3252 assert(Roop != Rscratch, ""); 3253 assert(Roop != Rmark, ""); 3254 assert(Rbox != Rscratch, ""); 3255 assert(Rbox != Rmark, ""); 3256 3257 Label fast_lock, done; 3258 3259 if (UseBiasedLocking && !UseOptoBiasInlining) { 3260 Label failed; 3261 #ifdef AARCH64 3262 biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); 3263 #else 3264 biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); 3265 #endif 3266 bind(failed); 3267 } 3268 3269 ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); 3270 tst(Rmark, markOopDesc::unlocked_value); 3271 b(fast_lock, ne); 3272 3273 // Check for recursive lock 3274 // See comments in InterpreterMacroAssembler::lock_object for 3275 // explanations on the fast recursive locking check. 3276 #ifdef AARCH64 3277 intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); 3278 Assembler::LogicalImmediate imm(mask, false); 3279 mov(Rscratch, SP); 3280 sub(Rscratch, Rmark, Rscratch); 3281 ands(Rscratch, Rscratch, imm); 3282 b(done, ne); // exit with failure 3283 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero 3284 b(done); 3285 3286 #else 3287 // -1- test low 2 bits 3288 movs(Rscratch, AsmOperand(Rmark, lsl, 30)); 3289 // -2- test (hdr - SP) if the low two bits are 0 3290 sub(Rscratch, Rmark, SP, eq); 3291 movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); 3292 // If still 'eq' then recursive locking OK 3293 str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero 3294 b(done); 3295 #endif 3296 3297 bind(fast_lock); 3298 str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3299 3300 bool allow_fallthrough_on_failure = true; 3301 bool one_shot = true; 3302 cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3303 3304 bind(done); 3305 3306 } 3307 3308 void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) 3309 { 3310 assert(VM_Version::supports_ldrex(), "unsupported, yet?"); 3311 3312 Register Rmark = Rscratch2; 3313 3314 assert(Roop != Rscratch, ""); 3315 assert(Roop != Rmark, ""); 3316 assert(Rbox != Rscratch, ""); 3317 assert(Rbox != Rmark, ""); 3318 3319 Label done; 3320 3321 if (UseBiasedLocking && !UseOptoBiasInlining) { 3322 biased_locking_exit(Roop, Rscratch, done); 3323 } 3324 3325 ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); 3326 // If hdr is NULL, we've got recursive locking and there's nothing more to do 3327 cmp(Rmark, 0); 3328 b(done, eq); 3329 3330 // Restore the object header 3331 bool allow_fallthrough_on_failure = true; 3332 bool one_shot = true; 3333 cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); 3334 3335 bind(done); 3336 3337 } 3338 #endif // COMPILER2 3339