1 /* 2 /* 3 * Copyright (c) 2013, Red Hat Inc. 4 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. 5 * All rights reserved. 6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 7 * 8 * This code is free software; you can redistribute it and/or modify it 9 * under the terms of the GNU General Public License version 2 only, as 10 * published by the Free Software Foundation. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 23 * or visit www.oracle.com if you need additional information or have any 24 * questions. 25 * 26 */ 27 28 #include <sys/types.h> 29 30 #include "precompiled.hpp" 31 #include "asm/assembler.hpp" 32 #include "asm/assembler.inline.hpp" 33 #include "interpreter/interpreter.hpp" 34 35 #include "compiler/disassembler.hpp" 36 #include "memory/resourceArea.hpp" 37 #include "runtime/biasedLocking.hpp" 38 #include "runtime/interfaceSupport.hpp" 39 #include "runtime/sharedRuntime.hpp" 40 41 // #include "gc_interface/collectedHeap.inline.hpp" 42 // #include "interpreter/interpreter.hpp" 43 // #include "memory/cardTableModRefBS.hpp" 44 // #include "prims/methodHandles.hpp" 45 // #include "runtime/biasedLocking.hpp" 46 // #include "runtime/interfaceSupport.hpp" 47 // #include "runtime/objectMonitor.hpp" 48 // #include "runtime/os.hpp" 49 // #include "runtime/sharedRuntime.hpp" 50 // #include "runtime/stubRoutines.hpp" 51 52 #if INCLUDE_ALL_GCS 53 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 54 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 55 #include "gc_implementation/g1/heapRegion.hpp" 56 #endif 57 58 #ifdef PRODUCT 59 #define BLOCK_COMMENT(str) /* nothing */ 60 #define STOP(error) stop(error) 61 #else 62 #define BLOCK_COMMENT(str) block_comment(str) 63 #define STOP(error) block_comment(error); stop(error) 64 #endif 65 66 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 67 68 // Patch any kind of instruction; there may be several instructions. 69 // Return the total length (in bytes) of the instructions. 70 int MacroAssembler::pd_patch_instruction_size(address branch, address target) { 71 int instructions = 1; 72 assert((uint64_t)target < (1ul << 48), "48-bit overflow in address constant"); 73 long offset = (target - branch) >> 2; 74 unsigned insn = *(unsigned*)branch; 75 if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) { 76 // Load register (literal) 77 Instruction_aarch64::spatch(branch, 23, 5, offset); 78 } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) { 79 // Unconditional branch (immediate) 80 Instruction_aarch64::spatch(branch, 25, 0, offset); 81 } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) { 82 // Conditional branch (immediate) 83 Instruction_aarch64::spatch(branch, 23, 5, offset); 84 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) { 85 // Compare & branch (immediate) 86 Instruction_aarch64::spatch(branch, 23, 5, offset); 87 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) { 88 // Test & branch (immediate) 89 Instruction_aarch64::spatch(branch, 18, 5, offset); 90 } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) { 91 // PC-rel. addressing 92 offset = target-branch; 93 int shift = Instruction_aarch64::extract(insn, 31, 31); 94 if (shift) { 95 u_int64_t dest = (u_int64_t)target; 96 uint64_t pc_page = (uint64_t)branch >> 12; 97 uint64_t adr_page = (uint64_t)target >> 12; 98 unsigned offset_lo = dest & 0xfff; 99 offset = adr_page - pc_page; 100 101 // We handle 3 types of PC relative addressing 102 // 1 - adrp Rx, target_page 103 // ldr/str Ry, [Rx, #offset_in_page] 104 // 2 - adrp Rx, target_page 105 // add Ry, Rx, #offset_in_page 106 // 3 - adrp Rx, target_page (page aligned reloc, offset == 0) 107 // In the first 2 cases we must check that Rx is the same in the adrp and the 108 // subsequent ldr/str or add instruction. Otherwise we could accidentally end 109 // up treating a type 3 relocation as a type 1 or 2 just because it happened 110 // to be followed by a random unrelated ldr/str or add instruction. 111 // 112 // In the case of a type 3 relocation, we know that these are only generated 113 // for the safepoint polling page, or for the card type byte map base so we 114 // assert as much and of course that the offset is 0. 115 // 116 unsigned insn2 = ((unsigned*)branch)[1]; 117 if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 && 118 Instruction_aarch64::extract(insn, 4, 0) == 119 Instruction_aarch64::extract(insn2, 9, 5)) { 120 // Load/store register (unsigned immediate) 121 unsigned size = Instruction_aarch64::extract(insn2, 31, 30); 122 Instruction_aarch64::patch(branch + sizeof (unsigned), 123 21, 10, offset_lo >> size); 124 guarantee(((dest >> size) << size) == dest, "misaligned target"); 125 instructions = 2; 126 } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 && 127 Instruction_aarch64::extract(insn, 4, 0) == 128 Instruction_aarch64::extract(insn2, 4, 0)) { 129 // add (immediate) 130 Instruction_aarch64::patch(branch + sizeof (unsigned), 131 21, 10, offset_lo); 132 instructions = 2; 133 } else { 134 assert((jbyte *)target == 135 ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base || 136 target == StubRoutines::crc_table_addr() || 137 (address)target == os::get_polling_page(), 138 "adrp must be polling page or byte map base"); 139 assert(offset_lo == 0, "offset must be 0 for polling page or byte map base"); 140 } 141 } 142 int offset_lo = offset & 3; 143 offset >>= 2; 144 Instruction_aarch64::spatch(branch, 23, 5, offset); 145 Instruction_aarch64::patch(branch, 30, 29, offset_lo); 146 } else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) { 147 u_int64_t dest = (u_int64_t)target; 148 // Move wide constant 149 assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch"); 150 assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch"); 151 Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff); 152 Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff); 153 Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff); 154 assert(pd_call_destination(branch) == target, "should be"); 155 instructions = 3; 156 } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 && 157 Instruction_aarch64::extract(insn, 4, 0) == 0b11111) { 158 // nothing to do 159 assert(target == 0, "did not expect to relocate target for polling page load"); 160 } else { 161 ShouldNotReachHere(); 162 } 163 return instructions * NativeInstruction::instruction_size; 164 } 165 166 int MacroAssembler::patch_oop(address insn_addr, address o) { 167 int instructions; 168 unsigned insn = *(unsigned*)insn_addr; 169 assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch"); 170 171 // OOPs are either narrow (32 bits) or wide (48 bits). We encode 172 // narrow OOPs by setting the upper 16 bits in the first 173 // instruction. 174 if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) { 175 // Move narrow OOP 176 narrowOop n = oopDesc::encode_heap_oop((oop)o); 177 Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16); 178 Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff); 179 instructions = 2; 180 } else { 181 // Move wide OOP 182 assert(nativeInstruction_at(insn_addr+8)->is_movk(), "wrong insns in patch"); 183 uintptr_t dest = (uintptr_t)o; 184 Instruction_aarch64::patch(insn_addr, 20, 5, dest & 0xffff); 185 Instruction_aarch64::patch(insn_addr+4, 20, 5, (dest >>= 16) & 0xffff); 186 Instruction_aarch64::patch(insn_addr+8, 20, 5, (dest >>= 16) & 0xffff); 187 instructions = 3; 188 } 189 return instructions * NativeInstruction::instruction_size; 190 } 191 192 address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) { 193 long offset = 0; 194 if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) { 195 // Load register (literal) 196 offset = Instruction_aarch64::sextract(insn, 23, 5); 197 return address(((uint64_t)insn_addr + (offset << 2))); 198 } else if (Instruction_aarch64::extract(insn, 30, 26) == 0b00101) { 199 // Unconditional branch (immediate) 200 offset = Instruction_aarch64::sextract(insn, 25, 0); 201 } else if (Instruction_aarch64::extract(insn, 31, 25) == 0b0101010) { 202 // Conditional branch (immediate) 203 offset = Instruction_aarch64::sextract(insn, 23, 5); 204 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011010) { 205 // Compare & branch (immediate) 206 offset = Instruction_aarch64::sextract(insn, 23, 5); 207 } else if (Instruction_aarch64::extract(insn, 30, 25) == 0b011011) { 208 // Test & branch (immediate) 209 offset = Instruction_aarch64::sextract(insn, 18, 5); 210 } else if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) { 211 // PC-rel. addressing 212 offset = Instruction_aarch64::extract(insn, 30, 29); 213 offset |= Instruction_aarch64::sextract(insn, 23, 5) << 2; 214 int shift = Instruction_aarch64::extract(insn, 31, 31) ? 12 : 0; 215 if (shift) { 216 offset <<= shift; 217 uint64_t target_page = ((uint64_t)insn_addr) + offset; 218 target_page &= ((uint64_t)-1) << shift; 219 // Return the target address for the following sequences 220 // 1 - adrp Rx, target_page 221 // ldr/str Ry, [Rx, #offset_in_page] 222 // [ 2 - adrp Rx, target_page ] Not handled 223 // [ add Ry, Rx, #offset_in_page ] 224 // 3 - adrp Rx, target_page (page aligned reloc, offset == 0) 225 // 226 // In the case of type 1 we check that the register is the same and 227 // return the target_page + the offset within the page. 228 // 229 // Otherwise we assume it is a page aligned relocation and return 230 // the target page only. The only cases this is generated is for 231 // the safepoint polling page or for the card table byte map base so 232 // we assert as much. 233 // 234 // Note: Strangely, we do not handle 'type 2' relocation (adrp followed 235 // by add) which is handled in pd_patch_instruction above. 236 // 237 unsigned insn2 = ((unsigned*)insn_addr)[1]; 238 if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 && 239 Instruction_aarch64::extract(insn, 4, 0) == 240 Instruction_aarch64::extract(insn2, 9, 5)) { 241 // Load/store register (unsigned immediate) 242 unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10); 243 unsigned int size = Instruction_aarch64::extract(insn2, 31, 30); 244 return address(target_page + (byte_offset << size)); 245 } else { 246 assert((jbyte *)target_page == 247 ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base || 248 (address)target_page == os::get_polling_page(), 249 "adrp must be polling page or byte map base"); 250 return (address)target_page; 251 } 252 } else { 253 ShouldNotReachHere(); 254 } 255 } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) { 256 u_int32_t *insns = (u_int32_t *)insn_addr; 257 // Move wide constant: movz, movk, movk. See movptr(). 258 assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch"); 259 assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch"); 260 return address(u_int64_t(Instruction_aarch64::extract(insns[0], 20, 5)) 261 + (u_int64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16) 262 + (u_int64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32)); 263 } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 && 264 Instruction_aarch64::extract(insn, 4, 0) == 0b11111) { 265 return 0; 266 } else { 267 ShouldNotReachHere(); 268 } 269 return address(((uint64_t)insn_addr + (offset << 2))); 270 } 271 272 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 273 dsb(Assembler::SY); 274 } 275 276 277 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 278 bool clear_pc) { 279 // we must set sp to zero to clear frame 280 str(zr, Address(rthread, JavaThread::last_Java_sp_offset())); 281 // must clear fp, so that compiled frames are not confused; it is 282 // possible that we need it only for debugging 283 if (clear_fp) { 284 str(zr, Address(rthread, JavaThread::last_Java_fp_offset())); 285 } 286 287 if (clear_pc) { 288 str(zr, Address(rthread, JavaThread::last_Java_pc_offset())); 289 } 290 } 291 292 // Calls to C land 293 // 294 // When entering C land, the rfp, & resp of the last Java frame have to be recorded 295 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 296 // has to be reset to 0. This is required to allow proper stack traversal. 297 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 298 Register last_java_fp, 299 Register last_java_pc, 300 Register scratch) { 301 302 if (last_java_pc->is_valid()) { 303 str(last_java_pc, Address(rthread, 304 JavaThread::frame_anchor_offset() 305 + JavaFrameAnchor::last_Java_pc_offset())); 306 } 307 308 // determine last_java_sp register 309 if (last_java_sp == sp) { 310 mov(scratch, sp); 311 last_java_sp = scratch; 312 } else if (!last_java_sp->is_valid()) { 313 last_java_sp = esp; 314 } 315 316 str(last_java_sp, Address(rthread, JavaThread::last_Java_sp_offset())); 317 318 // last_java_fp is optional 319 if (last_java_fp->is_valid()) { 320 str(last_java_fp, Address(rthread, JavaThread::last_Java_fp_offset())); 321 } 322 } 323 324 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 325 Register last_java_fp, 326 address last_java_pc, 327 Register scratch) { 328 if (last_java_pc != NULL) { 329 adr(scratch, last_java_pc); 330 } else { 331 // FIXME: This is almost never correct. We should delete all 332 // cases of set_last_Java_frame with last_java_pc=NULL and use the 333 // correct return address instead. 334 adr(scratch, pc()); 335 } 336 337 str(scratch, Address(rthread, 338 JavaThread::frame_anchor_offset() 339 + JavaFrameAnchor::last_Java_pc_offset())); 340 341 set_last_Java_frame(last_java_sp, last_java_fp, noreg, scratch); 342 } 343 344 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 345 Register last_java_fp, 346 Label &L, 347 Register scratch) { 348 if (L.is_bound()) { 349 set_last_Java_frame(last_java_sp, last_java_fp, target(L), scratch); 350 } else { 351 InstructionMark im(this); 352 L.add_patch_at(code(), locator()); 353 set_last_Java_frame(last_java_sp, last_java_fp, (address)NULL, scratch); 354 } 355 } 356 357 int MacroAssembler::biased_locking_enter(Register lock_reg, 358 Register obj_reg, 359 Register swap_reg, 360 Register tmp_reg, 361 bool swap_reg_contains_mark, 362 Label& done, 363 Label* slow_case, 364 BiasedLockingCounters* counters) { 365 assert(UseBiasedLocking, "why call this otherwise?"); 366 assert_different_registers(lock_reg, obj_reg, swap_reg); 367 368 if (PrintBiasedLockingStatistics && counters == NULL) 369 counters = BiasedLocking::counters(); 370 371 bool need_tmp_reg = false; 372 if (tmp_reg == noreg) { 373 tmp_reg = rscratch2; 374 } 375 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1); 376 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 377 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 378 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 379 Address saved_mark_addr(lock_reg, 0); 380 381 // Biased locking 382 // See whether the lock is currently biased toward our thread and 383 // whether the epoch is still valid 384 // Note that the runtime guarantees sufficient alignment of JavaThread 385 // pointers to allow age to be placed into low bits 386 // First check to see whether biasing is even enabled for this object 387 Label cas_label; 388 int null_check_offset = -1; 389 if (!swap_reg_contains_mark) { 390 null_check_offset = offset(); 391 ldr(swap_reg, mark_addr); 392 } 393 andr(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); 394 cmp(tmp_reg, markOopDesc::biased_lock_pattern); 395 br(Assembler::NE, cas_label); 396 // The bias pattern is present in the object's header. Need to check 397 // whether the bias owner and the epoch are both still current. 398 load_prototype_header(tmp_reg, obj_reg); 399 orr(tmp_reg, tmp_reg, rthread); 400 eor(tmp_reg, swap_reg, tmp_reg); 401 andr(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 402 if (counters != NULL) { 403 Label around; 404 cbnz(tmp_reg, around); 405 atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1); 406 b(done); 407 bind(around); 408 } else { 409 cbz(tmp_reg, done); 410 } 411 412 Label try_revoke_bias; 413 Label try_rebias; 414 415 // At this point we know that the header has the bias pattern and 416 // that we are not the bias owner in the current epoch. We need to 417 // figure out more details about the state of the header in order to 418 // know what operations can be legally performed on the object's 419 // header. 420 421 // If the low three bits in the xor result aren't clear, that means 422 // the prototype header is no longer biased and we have to revoke 423 // the bias on this object. 424 andr(rscratch1, tmp_reg, markOopDesc::biased_lock_mask_in_place); 425 cbnz(rscratch1, try_revoke_bias); 426 427 // Biasing is still enabled for this data type. See whether the 428 // epoch of the current bias is still valid, meaning that the epoch 429 // bits of the mark word are equal to the epoch bits of the 430 // prototype header. (Note that the prototype header's epoch bits 431 // only change at a safepoint.) If not, attempt to rebias the object 432 // toward the current thread. Note that we must be absolutely sure 433 // that the current epoch is invalid in order to do this because 434 // otherwise the manipulations it performs on the mark word are 435 // illegal. 436 andr(rscratch1, tmp_reg, markOopDesc::epoch_mask_in_place); 437 cbnz(rscratch1, try_rebias); 438 439 // The epoch of the current bias is still valid but we know nothing 440 // about the owner; it might be set or it might be clear. Try to 441 // acquire the bias of the object using an atomic operation. If this 442 // fails we will go in to the runtime to revoke the object's bias. 443 // Note that we first construct the presumed unbiased header so we 444 // don't accidentally blow away another thread's valid bias. 445 { 446 Label here; 447 mov(rscratch1, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 448 andr(swap_reg, swap_reg, rscratch1); 449 orr(tmp_reg, swap_reg, rthread); 450 cmpxchgptr(swap_reg, tmp_reg, obj_reg, rscratch1, here, slow_case); 451 // If the biasing toward our thread failed, this means that 452 // another thread succeeded in biasing it toward itself and we 453 // need to revoke that bias. The revocation will occur in the 454 // interpreter runtime in the slow case. 455 bind(here); 456 if (counters != NULL) { 457 atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), 458 tmp_reg, rscratch1); 459 } 460 } 461 b(done); 462 463 bind(try_rebias); 464 // At this point we know the epoch has expired, meaning that the 465 // current "bias owner", if any, is actually invalid. Under these 466 // circumstances _only_, we are allowed to use the current header's 467 // value as the comparison value when doing the cas to acquire the 468 // bias in the current epoch. In other words, we allow transfer of 469 // the bias from one thread to another directly in this situation. 470 // 471 // FIXME: due to a lack of registers we currently blow away the age 472 // bits in this situation. Should attempt to preserve them. 473 { 474 Label here; 475 load_prototype_header(tmp_reg, obj_reg); 476 orr(tmp_reg, rthread, tmp_reg); 477 cmpxchgptr(swap_reg, tmp_reg, obj_reg, rscratch1, here, slow_case); 478 // If the biasing toward our thread failed, then another thread 479 // succeeded in biasing it toward itself and we need to revoke that 480 // bias. The revocation will occur in the runtime in the slow case. 481 bind(here); 482 if (counters != NULL) { 483 atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), 484 tmp_reg, rscratch1); 485 } 486 } 487 b(done); 488 489 bind(try_revoke_bias); 490 // The prototype mark in the klass doesn't have the bias bit set any 491 // more, indicating that objects of this data type are not supposed 492 // to be biased any more. We are going to try to reset the mark of 493 // this object to the prototype value and fall through to the 494 // CAS-based locking scheme. Note that if our CAS fails, it means 495 // that another thread raced us for the privilege of revoking the 496 // bias of this particular object, so it's okay to continue in the 497 // normal locking code. 498 // 499 // FIXME: due to a lack of registers we currently blow away the age 500 // bits in this situation. Should attempt to preserve them. 501 { 502 Label here, nope; 503 load_prototype_header(tmp_reg, obj_reg); 504 cmpxchgptr(swap_reg, tmp_reg, obj_reg, rscratch1, here, &nope); 505 bind(here); 506 507 // Fall through to the normal CAS-based lock, because no matter what 508 // the result of the above CAS, some thread must have succeeded in 509 // removing the bias bit from the object's header. 510 if (counters != NULL) { 511 atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, 512 rscratch1); 513 } 514 bind(nope); 515 } 516 517 bind(cas_label); 518 519 return null_check_offset; 520 } 521 522 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 523 assert(UseBiasedLocking, "why call this otherwise?"); 524 525 // Check for biased locking unlock case, which is a no-op 526 // Note: we do not have to check the thread ID for two reasons. 527 // First, the interpreter checks for IllegalMonitorStateException at 528 // a higher level. Second, if the bias was revoked while we held the 529 // lock, the object could not be rebiased toward another thread, so 530 // the bias bit would be clear. 531 ldr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 532 andr(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); 533 cmp(temp_reg, markOopDesc::biased_lock_pattern); 534 br(Assembler::EQ, done); 535 } 536 537 538 // added to make this compile 539 540 REGISTER_DEFINITION(Register, noreg); 541 542 static void pass_arg0(MacroAssembler* masm, Register arg) { 543 if (c_rarg0 != arg ) { 544 masm->mov(c_rarg0, arg); 545 } 546 } 547 548 static void pass_arg1(MacroAssembler* masm, Register arg) { 549 if (c_rarg1 != arg ) { 550 masm->mov(c_rarg1, arg); 551 } 552 } 553 554 static void pass_arg2(MacroAssembler* masm, Register arg) { 555 if (c_rarg2 != arg ) { 556 masm->mov(c_rarg2, arg); 557 } 558 } 559 560 static void pass_arg3(MacroAssembler* masm, Register arg) { 561 if (c_rarg3 != arg ) { 562 masm->mov(c_rarg3, arg); 563 } 564 } 565 566 void MacroAssembler::call_VM_base(Register oop_result, 567 Register java_thread, 568 Register last_java_sp, 569 address entry_point, 570 int number_of_arguments, 571 bool check_exceptions) { 572 // determine java_thread register 573 if (!java_thread->is_valid()) { 574 java_thread = rthread; 575 } 576 577 // determine last_java_sp register 578 if (!last_java_sp->is_valid()) { 579 last_java_sp = esp; 580 } 581 582 // debugging support 583 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 584 assert(java_thread == rthread, "unexpected register"); 585 #ifdef ASSERT 586 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 587 // if ((UseCompressedOops || UseCompressedClassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?"); 588 #endif // ASSERT 589 590 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 591 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 592 593 // push java thread (becomes first argument of C function) 594 595 mov(c_rarg0, java_thread); 596 597 // set last Java frame before call 598 assert(last_java_sp != rfp, "can't use rfp"); 599 600 Label l; 601 set_last_Java_frame(last_java_sp, rfp, l, rscratch1); 602 603 // do the call, remove parameters 604 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); 605 606 // reset last Java frame 607 // Only interpreter should have to clear fp 608 reset_last_Java_frame(true, true); 609 610 // C++ interp handles this in the interpreter 611 check_and_handle_popframe(java_thread); 612 check_and_handle_earlyret(java_thread); 613 614 if (check_exceptions) { 615 // check for pending exceptions (java_thread is set upon return) 616 ldr(rscratch1, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); 617 Label ok; 618 cbz(rscratch1, ok); 619 lea(rscratch1, RuntimeAddress(StubRoutines::forward_exception_entry())); 620 br(rscratch1); 621 bind(ok); 622 } 623 624 // get oop result if there is one and reset the value in the thread 625 if (oop_result->is_valid()) { 626 get_vm_result(oop_result, java_thread); 627 } 628 } 629 630 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 631 call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); 632 } 633 634 void MacroAssembler::call(Address entry) { 635 if (true // reachable(entry) 636 ) { 637 bl(entry); 638 } else { 639 lea(rscratch1, entry); 640 blr(rscratch1); 641 } 642 } 643 644 void MacroAssembler::ic_call(address entry) { 645 RelocationHolder rh = virtual_call_Relocation::spec(pc()); 646 // address const_ptr = long_constant((jlong)Universe::non_oop_word()); 647 // unsigned long offset; 648 // ldr_constant(rscratch2, const_ptr); 649 movptr(rscratch2, (uintptr_t)Universe::non_oop_word()); 650 call(Address(entry, rh)); 651 } 652 653 // Implementation of call_VM versions 654 655 void MacroAssembler::call_VM(Register oop_result, 656 address entry_point, 657 bool check_exceptions) { 658 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 659 } 660 661 void MacroAssembler::call_VM(Register oop_result, 662 address entry_point, 663 Register arg_1, 664 bool check_exceptions) { 665 pass_arg1(this, arg_1); 666 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 667 } 668 669 void MacroAssembler::call_VM(Register oop_result, 670 address entry_point, 671 Register arg_1, 672 Register arg_2, 673 bool check_exceptions) { 674 assert(arg_1 != c_rarg2, "smashed arg"); 675 pass_arg2(this, arg_2); 676 pass_arg1(this, arg_1); 677 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 678 } 679 680 void MacroAssembler::call_VM(Register oop_result, 681 address entry_point, 682 Register arg_1, 683 Register arg_2, 684 Register arg_3, 685 bool check_exceptions) { 686 assert(arg_1 != c_rarg3, "smashed arg"); 687 assert(arg_2 != c_rarg3, "smashed arg"); 688 pass_arg3(this, arg_3); 689 690 assert(arg_1 != c_rarg2, "smashed arg"); 691 pass_arg2(this, arg_2); 692 693 pass_arg1(this, arg_1); 694 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 695 } 696 697 void MacroAssembler::call_VM(Register oop_result, 698 Register last_java_sp, 699 address entry_point, 700 int number_of_arguments, 701 bool check_exceptions) { 702 call_VM_base(oop_result, rthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 703 } 704 705 void MacroAssembler::call_VM(Register oop_result, 706 Register last_java_sp, 707 address entry_point, 708 Register arg_1, 709 bool check_exceptions) { 710 pass_arg1(this, arg_1); 711 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 712 } 713 714 void MacroAssembler::call_VM(Register oop_result, 715 Register last_java_sp, 716 address entry_point, 717 Register arg_1, 718 Register arg_2, 719 bool check_exceptions) { 720 721 assert(arg_1 != c_rarg2, "smashed arg"); 722 pass_arg2(this, arg_2); 723 pass_arg1(this, arg_1); 724 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 725 } 726 727 void MacroAssembler::call_VM(Register oop_result, 728 Register last_java_sp, 729 address entry_point, 730 Register arg_1, 731 Register arg_2, 732 Register arg_3, 733 bool check_exceptions) { 734 assert(arg_1 != c_rarg3, "smashed arg"); 735 assert(arg_2 != c_rarg3, "smashed arg"); 736 pass_arg3(this, arg_3); 737 assert(arg_1 != c_rarg2, "smashed arg"); 738 pass_arg2(this, arg_2); 739 pass_arg1(this, arg_1); 740 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 741 } 742 743 744 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 745 ldr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 746 str(zr, Address(java_thread, JavaThread::vm_result_offset())); 747 verify_oop(oop_result, "broken oop in call_VM_base"); 748 } 749 750 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 751 ldr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 752 str(zr, Address(java_thread, JavaThread::vm_result_2_offset())); 753 } 754 755 void MacroAssembler::align(int modulus) { 756 while (offset() % modulus != 0) nop(); 757 } 758 759 // these are no-ops overridden by InterpreterMacroAssembler 760 761 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { } 762 763 void MacroAssembler::check_and_handle_popframe(Register java_thread) { } 764 765 766 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 767 Register tmp, 768 int offset) { 769 intptr_t value = *delayed_value_addr; 770 if (value != 0) 771 return RegisterOrConstant(value + offset); 772 773 // load indirectly to solve generation ordering problem 774 ldr(tmp, ExternalAddress((address) delayed_value_addr)); 775 776 if (offset != 0) 777 add(tmp, tmp, offset); 778 779 return RegisterOrConstant(tmp); 780 } 781 782 783 void MacroAssembler:: notify(int type) { 784 if (type == bytecode_start) { 785 // set_last_Java_frame(esp, rfp, (address)NULL); 786 Assembler:: notify(type); 787 // reset_last_Java_frame(true, false); 788 } 789 else 790 Assembler:: notify(type); 791 } 792 793 // Look up the method for a megamorphic invokeinterface call. 794 // The target method is determined by <intf_klass, itable_index>. 795 // The receiver klass is in recv_klass. 796 // On success, the result will be in method_result, and execution falls through. 797 // On failure, execution transfers to the given label. 798 void MacroAssembler::lookup_interface_method(Register recv_klass, 799 Register intf_klass, 800 RegisterOrConstant itable_index, 801 Register method_result, 802 Register scan_temp, 803 Label& L_no_such_interface) { 804 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 805 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 806 "caller must use same register for non-constant itable index as for method"); 807 808 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 809 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 810 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 811 int scan_step = itableOffsetEntry::size() * wordSize; 812 int vte_size = vtableEntry::size() * wordSize; 813 assert(vte_size == wordSize, "else adjust times_vte_scale"); 814 815 ldrw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); 816 817 // %%% Could store the aligned, prescaled offset in the klassoop. 818 // lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 819 lea(scan_temp, Address(recv_klass, scan_temp, Address::lsl(3))); 820 add(scan_temp, scan_temp, vtable_base); 821 if (HeapWordsPerLong > 1) { 822 // Round up to align_object_offset boundary 823 // see code for instanceKlass::start_of_itable! 824 round_to(scan_temp, BytesPerLong); 825 } 826 827 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 828 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 829 // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 830 lea(recv_klass, Address(recv_klass, itable_index, Address::lsl(3))); 831 if (itentry_off) 832 add(recv_klass, recv_klass, itentry_off); 833 834 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 835 // if (scan->interface() == intf) { 836 // result = (klass + scan->offset() + itable_index); 837 // } 838 // } 839 Label search, found_method; 840 841 for (int peel = 1; peel >= 0; peel--) { 842 ldr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 843 cmp(intf_klass, method_result); 844 845 if (peel) { 846 br(Assembler::EQ, found_method); 847 } else { 848 br(Assembler::NE, search); 849 // (invert the test to fall through to found_method...) 850 } 851 852 if (!peel) break; 853 854 bind(search); 855 856 // Check that the previous entry is non-null. A null entry means that 857 // the receiver class doesn't implement the interface, and wasn't the 858 // same as when the caller was compiled. 859 cbz(method_result, L_no_such_interface); 860 add(scan_temp, scan_temp, scan_step); 861 } 862 863 bind(found_method); 864 865 // Got a hit. 866 ldr(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 867 ldr(method_result, Address(recv_klass, scan_temp)); 868 } 869 870 // virtual method calling 871 void MacroAssembler::lookup_virtual_method(Register recv_klass, 872 RegisterOrConstant vtable_index, 873 Register method_result) { 874 const int base = InstanceKlass::vtable_start_offset() * wordSize; 875 assert(vtableEntry::size() * wordSize == 8, 876 "adjust the scaling in the code below"); 877 int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); 878 879 if (vtable_index.is_register()) { 880 lea(method_result, Address(recv_klass, 881 vtable_index.as_register(), 882 Address::lsl(LogBytesPerWord))); 883 ldr(method_result, Address(method_result, vtable_offset_in_bytes)); 884 } else { 885 vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; 886 ldr(method_result, Address(recv_klass, vtable_offset_in_bytes)); 887 } 888 } 889 890 void MacroAssembler::check_klass_subtype(Register sub_klass, 891 Register super_klass, 892 Register temp_reg, 893 Label& L_success) { 894 Label L_failure; 895 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 896 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 897 bind(L_failure); 898 } 899 900 901 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 902 Register super_klass, 903 Register temp_reg, 904 Label* L_success, 905 Label* L_failure, 906 Label* L_slow_path, 907 RegisterOrConstant super_check_offset) { 908 assert_different_registers(sub_klass, super_klass, temp_reg); 909 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 910 if (super_check_offset.is_register()) { 911 assert_different_registers(sub_klass, super_klass, 912 super_check_offset.as_register()); 913 } else if (must_load_sco) { 914 assert(temp_reg != noreg, "supply either a temp or a register offset"); 915 } 916 917 Label L_fallthrough; 918 int label_nulls = 0; 919 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 920 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 921 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 922 assert(label_nulls <= 1, "at most one NULL in the batch"); 923 924 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 925 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 926 Address super_check_offset_addr(super_klass, sco_offset); 927 928 // Hacked jmp, which may only be used just before L_fallthrough. 929 #define final_jmp(label) \ 930 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 931 else b(label) /*omit semi*/ 932 933 // If the pointers are equal, we are done (e.g., String[] elements). 934 // This self-check enables sharing of secondary supertype arrays among 935 // non-primary types such as array-of-interface. Otherwise, each such 936 // type would need its own customized SSA. 937 // We move this check to the front of the fast path because many 938 // type checks are in fact trivially successful in this manner, 939 // so we get a nicely predicted branch right at the start of the check. 940 cmp(sub_klass, super_klass); 941 br(Assembler::EQ, *L_success); 942 943 // Check the supertype display: 944 if (must_load_sco) { 945 // Positive movl does right thing on LP64. 946 ldrw(temp_reg, super_check_offset_addr); 947 super_check_offset = RegisterOrConstant(temp_reg); 948 } 949 Address super_check_addr(sub_klass, super_check_offset); 950 ldr(rscratch1, super_check_addr); 951 cmp(super_klass, rscratch1); // load displayed supertype 952 953 // This check has worked decisively for primary supers. 954 // Secondary supers are sought in the super_cache ('super_cache_addr'). 955 // (Secondary supers are interfaces and very deeply nested subtypes.) 956 // This works in the same check above because of a tricky aliasing 957 // between the super_cache and the primary super display elements. 958 // (The 'super_check_addr' can address either, as the case requires.) 959 // Note that the cache is updated below if it does not help us find 960 // what we need immediately. 961 // So if it was a primary super, we can just fail immediately. 962 // Otherwise, it's the slow path for us (no success at this point). 963 964 if (super_check_offset.is_register()) { 965 br(Assembler::EQ, *L_success); 966 cmp(super_check_offset.as_register(), sc_offset); 967 if (L_failure == &L_fallthrough) { 968 br(Assembler::EQ, *L_slow_path); 969 } else { 970 br(Assembler::NE, *L_failure); 971 final_jmp(*L_slow_path); 972 } 973 } else if (super_check_offset.as_constant() == sc_offset) { 974 // Need a slow path; fast failure is impossible. 975 if (L_slow_path == &L_fallthrough) { 976 br(Assembler::EQ, *L_success); 977 } else { 978 br(Assembler::NE, *L_slow_path); 979 final_jmp(*L_success); 980 } 981 } else { 982 // No slow path; it's a fast decision. 983 if (L_failure == &L_fallthrough) { 984 br(Assembler::EQ, *L_success); 985 } else { 986 br(Assembler::NE, *L_failure); 987 final_jmp(*L_success); 988 } 989 } 990 991 bind(L_fallthrough); 992 993 #undef final_jmp 994 } 995 996 // These two are taken from x86, but they look generally useful 997 998 // scans count pointer sized words at [addr] for occurence of value, 999 // generic 1000 void MacroAssembler::repne_scan(Register addr, Register value, Register count, 1001 Register scratch) { 1002 Label Lloop, Lexit; 1003 cbz(count, Lexit); 1004 bind(Lloop); 1005 ldr(scratch, post(addr, wordSize)); 1006 cmp(value, scratch); 1007 br(EQ, Lexit); 1008 sub(count, count, 1); 1009 cbnz(count, Lloop); 1010 bind(Lexit); 1011 } 1012 1013 // scans count 4 byte words at [addr] for occurence of value, 1014 // generic 1015 void MacroAssembler::repne_scanw(Register addr, Register value, Register count, 1016 Register scratch) { 1017 Label Lloop, Lexit; 1018 cbz(count, Lexit); 1019 bind(Lloop); 1020 ldrw(scratch, post(addr, wordSize)); 1021 cmpw(value, scratch); 1022 br(EQ, Lexit); 1023 sub(count, count, 1); 1024 cbnz(count, Lloop); 1025 bind(Lexit); 1026 } 1027 1028 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 1029 Register super_klass, 1030 Register temp_reg, 1031 Register temp2_reg, 1032 Label* L_success, 1033 Label* L_failure, 1034 bool set_cond_codes) { 1035 assert_different_registers(sub_klass, super_klass, temp_reg); 1036 if (temp2_reg != noreg) 1037 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, rscratch1); 1038 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 1039 1040 Label L_fallthrough; 1041 int label_nulls = 0; 1042 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 1043 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 1044 assert(label_nulls <= 1, "at most one NULL in the batch"); 1045 1046 // a couple of useful fields in sub_klass: 1047 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 1048 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 1049 Address secondary_supers_addr(sub_klass, ss_offset); 1050 Address super_cache_addr( sub_klass, sc_offset); 1051 1052 BLOCK_COMMENT("check_klass_subtype_slow_path"); 1053 1054 // Do a linear scan of the secondary super-klass chain. 1055 // This code is rarely used, so simplicity is a virtue here. 1056 // The repne_scan instruction uses fixed registers, which we must spill. 1057 // Don't worry too much about pre-existing connections with the input regs. 1058 1059 assert(sub_klass != r0, "killed reg"); // killed by mov(r0, super) 1060 assert(sub_klass != r2, "killed reg"); // killed by lea(r2, &pst_counter) 1061 1062 // Get super_klass value into r0 (even if it was in r5 or r2). 1063 RegSet pushed_registers; 1064 if (!IS_A_TEMP(r2)) pushed_registers += r2; 1065 if (!IS_A_TEMP(r5)) pushed_registers += r5; 1066 1067 if (super_klass != r0 || UseCompressedOops) { 1068 if (!IS_A_TEMP(r0)) pushed_registers += r0; 1069 } 1070 1071 push(pushed_registers, sp); 1072 1073 #ifndef PRODUCT 1074 mov(rscratch2, (address)&SharedRuntime::_partial_subtype_ctr); 1075 Address pst_counter_addr(rscratch2); 1076 ldr(rscratch1, pst_counter_addr); 1077 add(rscratch1, rscratch1, 1); 1078 str(rscratch1, pst_counter_addr); 1079 #endif //PRODUCT 1080 1081 // We will consult the secondary-super array. 1082 ldr(r5, secondary_supers_addr); 1083 // Load the array length. (Positive movl does right thing on LP64.) 1084 ldrw(r2, Address(r5, Array<Klass*>::length_offset_in_bytes())); 1085 // Skip to start of data. 1086 add(r5, r5, Array<Klass*>::base_offset_in_bytes()); 1087 1088 cmp(sp, zr); // Clear Z flag; SP is never zero 1089 // Scan R2 words at [R5] for an occurrence of R0. 1090 // Set NZ/Z based on last compare. 1091 repne_scan(r5, r0, r2, rscratch1); 1092 1093 // Unspill the temp. registers: 1094 pop(pushed_registers, sp); 1095 1096 br(Assembler::NE, *L_failure); 1097 1098 // Success. Cache the super we found and proceed in triumph. 1099 str(super_klass, super_cache_addr); 1100 1101 if (L_success != &L_fallthrough) { 1102 b(*L_success); 1103 } 1104 1105 #undef IS_A_TEMP 1106 1107 bind(L_fallthrough); 1108 } 1109 1110 1111 void MacroAssembler::verify_oop(Register reg, const char* s) { 1112 if (!VerifyOops) return; 1113 1114 // Pass register number to verify_oop_subroutine 1115 const char* b = NULL; 1116 { 1117 ResourceMark rm; 1118 stringStream ss; 1119 ss.print("verify_oop: %s: %s", reg->name(), s); 1120 b = code_string(ss.as_string()); 1121 } 1122 BLOCK_COMMENT("verify_oop {"); 1123 1124 stp(r0, rscratch1, Address(pre(sp, -2 * wordSize))); 1125 stp(rscratch2, lr, Address(pre(sp, -2 * wordSize))); 1126 1127 mov(r0, reg); 1128 mov(rscratch1, (address)b); 1129 1130 // call indirectly to solve generation ordering problem 1131 lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 1132 ldr(rscratch2, Address(rscratch2)); 1133 blr(rscratch2); 1134 1135 ldp(rscratch2, lr, Address(post(sp, 2 * wordSize))); 1136 ldp(r0, rscratch1, Address(post(sp, 2 * wordSize))); 1137 1138 BLOCK_COMMENT("} verify_oop"); 1139 } 1140 1141 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 1142 if (!VerifyOops) return; 1143 1144 const char* b = NULL; 1145 { 1146 ResourceMark rm; 1147 stringStream ss; 1148 ss.print("verify_oop_addr: %s", s); 1149 b = code_string(ss.as_string()); 1150 } 1151 BLOCK_COMMENT("verify_oop_addr {"); 1152 1153 stp(r0, rscratch1, Address(pre(sp, -2 * wordSize))); 1154 stp(rscratch2, lr, Address(pre(sp, -2 * wordSize))); 1155 1156 // addr may contain sp so we will have to adjust it based on the 1157 // pushes that we just did. 1158 if (addr.uses(sp)) { 1159 lea(r0, addr); 1160 ldr(r0, Address(r0, 4 * wordSize)); 1161 } else { 1162 ldr(r0, addr); 1163 } 1164 mov(rscratch1, (address)b); 1165 1166 // call indirectly to solve generation ordering problem 1167 lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 1168 ldr(rscratch2, Address(rscratch2)); 1169 blr(rscratch2); 1170 1171 ldp(rscratch2, lr, Address(post(sp, 2 * wordSize))); 1172 ldp(r0, rscratch1, Address(post(sp, 2 * wordSize))); 1173 1174 BLOCK_COMMENT("} verify_oop_addr"); 1175 } 1176 1177 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 1178 int extra_slot_offset) { 1179 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 1180 int stackElementSize = Interpreter::stackElementSize; 1181 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 1182 #ifdef ASSERT 1183 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 1184 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 1185 #endif 1186 if (arg_slot.is_constant()) { 1187 return Address(esp, arg_slot.as_constant() * stackElementSize 1188 + offset); 1189 } else { 1190 add(rscratch1, esp, arg_slot.as_register(), 1191 ext::uxtx, exact_log2(stackElementSize)); 1192 return Address(rscratch1, offset); 1193 } 1194 } 1195 1196 void MacroAssembler::call_VM_leaf_base(address entry_point, 1197 int number_of_arguments, 1198 Label *retaddr) { 1199 call_VM_leaf_base1(entry_point, number_of_arguments, 0, ret_type_integral, retaddr); 1200 } 1201 1202 void MacroAssembler::call_VM_leaf_base1(address entry_point, 1203 int number_of_gp_arguments, 1204 int number_of_fp_arguments, 1205 ret_type type, 1206 Label *retaddr) { 1207 Label E, L; 1208 1209 stp(rscratch1, rmethod, Address(pre(sp, -2 * wordSize))); 1210 1211 // We add 1 to number_of_arguments because the thread in arg0 is 1212 // not counted 1213 mov(rscratch1, entry_point); 1214 blrt(rscratch1, number_of_gp_arguments + 1, number_of_fp_arguments, type); 1215 if (retaddr) 1216 bind(*retaddr); 1217 1218 ldp(rscratch1, rmethod, Address(post(sp, 2 * wordSize))); 1219 maybe_isb(); 1220 } 1221 1222 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 1223 call_VM_leaf_base(entry_point, number_of_arguments); 1224 } 1225 1226 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 1227 pass_arg0(this, arg_0); 1228 call_VM_leaf_base(entry_point, 1); 1229 } 1230 1231 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1232 pass_arg0(this, arg_0); 1233 pass_arg1(this, arg_1); 1234 call_VM_leaf_base(entry_point, 2); 1235 } 1236 1237 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, 1238 Register arg_1, Register arg_2) { 1239 pass_arg0(this, arg_0); 1240 pass_arg1(this, arg_1); 1241 pass_arg2(this, arg_2); 1242 call_VM_leaf_base(entry_point, 3); 1243 } 1244 1245 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 1246 pass_arg0(this, arg_0); 1247 MacroAssembler::call_VM_leaf_base(entry_point, 1); 1248 } 1249 1250 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1251 1252 assert(arg_0 != c_rarg1, "smashed arg"); 1253 pass_arg1(this, arg_1); 1254 pass_arg0(this, arg_0); 1255 MacroAssembler::call_VM_leaf_base(entry_point, 2); 1256 } 1257 1258 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 1259 assert(arg_0 != c_rarg2, "smashed arg"); 1260 assert(arg_1 != c_rarg2, "smashed arg"); 1261 pass_arg2(this, arg_2); 1262 assert(arg_0 != c_rarg1, "smashed arg"); 1263 pass_arg1(this, arg_1); 1264 pass_arg0(this, arg_0); 1265 MacroAssembler::call_VM_leaf_base(entry_point, 3); 1266 } 1267 1268 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 1269 assert(arg_0 != c_rarg3, "smashed arg"); 1270 assert(arg_1 != c_rarg3, "smashed arg"); 1271 assert(arg_2 != c_rarg3, "smashed arg"); 1272 pass_arg3(this, arg_3); 1273 assert(arg_0 != c_rarg2, "smashed arg"); 1274 assert(arg_1 != c_rarg2, "smashed arg"); 1275 pass_arg2(this, arg_2); 1276 assert(arg_0 != c_rarg1, "smashed arg"); 1277 pass_arg1(this, arg_1); 1278 pass_arg0(this, arg_0); 1279 MacroAssembler::call_VM_leaf_base(entry_point, 4); 1280 } 1281 1282 void MacroAssembler::null_check(Register reg, int offset) { 1283 if (needs_explicit_null_check(offset)) { 1284 // provoke OS NULL exception if reg = NULL by 1285 // accessing M[reg] w/o changing any registers 1286 // NOTE: this is plenty to provoke a segv 1287 ldr(zr, Address(reg)); 1288 } else { 1289 // nothing to do, (later) access of M[reg + offset] 1290 // will provoke OS NULL exception if reg = NULL 1291 } 1292 } 1293 1294 // MacroAssembler protected routines needed to implement 1295 // public methods 1296 1297 void MacroAssembler::mov(Register r, Address dest) { 1298 InstructionMark im(this); 1299 code_section()->relocate(inst_mark(), dest.rspec()); 1300 u_int64_t imm64 = (u_int64_t)dest.target(); 1301 movptr(r, imm64); 1302 } 1303 1304 // Move a constant pointer into r. In AArch64 mode the virtual 1305 // address space is 48 bits in size, so we only need three 1306 // instructions to create a patchable instruction sequence that can 1307 // reach anywhere. 1308 void MacroAssembler::movptr(Register r, uintptr_t imm64) { 1309 #ifndef PRODUCT 1310 { 1311 char buffer[64]; 1312 snprintf(buffer, sizeof(buffer), "0x%"PRIX64, imm64); 1313 block_comment(buffer); 1314 } 1315 #endif 1316 assert(imm64 < (1ul << 48), "48-bit overflow in address constant"); 1317 movz(r, imm64 & 0xffff); 1318 imm64 >>= 16; 1319 movk(r, imm64 & 0xffff, 16); 1320 imm64 >>= 16; 1321 movk(r, imm64 & 0xffff, 32); 1322 } 1323 1324 // Macro to mov replicated immediate to vector register. 1325 // Vd will get the following values for different arrangements in T 1326 // imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh 1327 // imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh 1328 // imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh 1329 // imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh 1330 // imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh 1331 // imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh 1332 // T1D/T2D: invalid 1333 void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) { 1334 assert(T != T1D && T != T2D, "invalid arrangement"); 1335 if (T == T8B || T == T16B) { 1336 assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)"); 1337 movi(Vd, T, imm32 & 0xff, 0); 1338 return; 1339 } 1340 u_int32_t nimm32 = ~imm32; 1341 if (T == T4H || T == T8H) { 1342 assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)"); 1343 imm32 &= 0xffff; 1344 nimm32 &= 0xffff; 1345 } 1346 u_int32_t x = imm32; 1347 int movi_cnt = 0; 1348 int movn_cnt = 0; 1349 while (x) { if (x & 0xff) movi_cnt++; x >>= 8; } 1350 x = nimm32; 1351 while (x) { if (x & 0xff) movn_cnt++; x >>= 8; } 1352 if (movn_cnt < movi_cnt) imm32 = nimm32; 1353 unsigned lsl = 0; 1354 while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } 1355 if (movn_cnt < movi_cnt) 1356 mvni(Vd, T, imm32 & 0xff, lsl); 1357 else 1358 movi(Vd, T, imm32 & 0xff, lsl); 1359 imm32 >>= 8; lsl += 8; 1360 while (imm32) { 1361 while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } 1362 if (movn_cnt < movi_cnt) 1363 bici(Vd, T, imm32 & 0xff, lsl); 1364 else 1365 orri(Vd, T, imm32 & 0xff, lsl); 1366 lsl += 8; imm32 >>= 8; 1367 } 1368 } 1369 1370 void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64) 1371 { 1372 #ifndef PRODUCT 1373 { 1374 char buffer[64]; 1375 snprintf(buffer, sizeof(buffer), "0x%"PRIX64, imm64); 1376 block_comment(buffer); 1377 } 1378 #endif 1379 if (operand_valid_for_logical_immediate(false, imm64)) { 1380 orr(dst, zr, imm64); 1381 } else { 1382 // we can use a combination of MOVZ or MOVN with 1383 // MOVK to build up the constant 1384 u_int64_t imm_h[4]; 1385 int zero_count = 0; 1386 int neg_count = 0; 1387 int i; 1388 for (i = 0; i < 4; i++) { 1389 imm_h[i] = ((imm64 >> (i * 16)) & 0xffffL); 1390 if (imm_h[i] == 0) { 1391 zero_count++; 1392 } else if (imm_h[i] == 0xffffL) { 1393 neg_count++; 1394 } 1395 } 1396 if (zero_count == 4) { 1397 // one MOVZ will do 1398 movz(dst, 0); 1399 } else if (neg_count == 4) { 1400 // one MOVN will do 1401 movn(dst, 0); 1402 } else if (zero_count == 3) { 1403 for (i = 0; i < 4; i++) { 1404 if (imm_h[i] != 0L) { 1405 movz(dst, (u_int32_t)imm_h[i], (i << 4)); 1406 break; 1407 } 1408 } 1409 } else if (neg_count == 3) { 1410 // one MOVN will do 1411 for (int i = 0; i < 4; i++) { 1412 if (imm_h[i] != 0xffffL) { 1413 movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4)); 1414 break; 1415 } 1416 } 1417 } else if (zero_count == 2) { 1418 // one MOVZ and one MOVK will do 1419 for (i = 0; i < 3; i++) { 1420 if (imm_h[i] != 0L) { 1421 movz(dst, (u_int32_t)imm_h[i], (i << 4)); 1422 i++; 1423 break; 1424 } 1425 } 1426 for (;i < 4; i++) { 1427 if (imm_h[i] != 0L) { 1428 movk(dst, (u_int32_t)imm_h[i], (i << 4)); 1429 } 1430 } 1431 } else if (neg_count == 2) { 1432 // one MOVN and one MOVK will do 1433 for (i = 0; i < 4; i++) { 1434 if (imm_h[i] != 0xffffL) { 1435 movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4)); 1436 i++; 1437 break; 1438 } 1439 } 1440 for (;i < 4; i++) { 1441 if (imm_h[i] != 0xffffL) { 1442 movk(dst, (u_int32_t)imm_h[i], (i << 4)); 1443 } 1444 } 1445 } else if (zero_count == 1) { 1446 // one MOVZ and two MOVKs will do 1447 for (i = 0; i < 4; i++) { 1448 if (imm_h[i] != 0L) { 1449 movz(dst, (u_int32_t)imm_h[i], (i << 4)); 1450 i++; 1451 break; 1452 } 1453 } 1454 for (;i < 4; i++) { 1455 if (imm_h[i] != 0x0L) { 1456 movk(dst, (u_int32_t)imm_h[i], (i << 4)); 1457 } 1458 } 1459 } else if (neg_count == 1) { 1460 // one MOVN and two MOVKs will do 1461 for (i = 0; i < 4; i++) { 1462 if (imm_h[i] != 0xffffL) { 1463 movn(dst, (u_int32_t)imm_h[i] ^ 0xffffL, (i << 4)); 1464 i++; 1465 break; 1466 } 1467 } 1468 for (;i < 4; i++) { 1469 if (imm_h[i] != 0xffffL) { 1470 movk(dst, (u_int32_t)imm_h[i], (i << 4)); 1471 } 1472 } 1473 } else { 1474 // use a MOVZ and 3 MOVKs (makes it easier to debug) 1475 movz(dst, (u_int32_t)imm_h[0], 0); 1476 for (i = 1; i < 4; i++) { 1477 movk(dst, (u_int32_t)imm_h[i], (i << 4)); 1478 } 1479 } 1480 } 1481 } 1482 1483 void MacroAssembler::mov_immediate32(Register dst, u_int32_t imm32) 1484 { 1485 #ifndef PRODUCT 1486 { 1487 char buffer[64]; 1488 snprintf(buffer, sizeof(buffer), "0x%"PRIX32, imm32); 1489 block_comment(buffer); 1490 } 1491 #endif 1492 if (operand_valid_for_logical_immediate(true, imm32)) { 1493 orrw(dst, zr, imm32); 1494 } else { 1495 // we can use MOVZ, MOVN or two calls to MOVK to build up the 1496 // constant 1497 u_int32_t imm_h[2]; 1498 imm_h[0] = imm32 & 0xffff; 1499 imm_h[1] = ((imm32 >> 16) & 0xffff); 1500 if (imm_h[0] == 0) { 1501 movzw(dst, imm_h[1], 16); 1502 } else if (imm_h[0] == 0xffff) { 1503 movnw(dst, imm_h[1] ^ 0xffff, 16); 1504 } else if (imm_h[1] == 0) { 1505 movzw(dst, imm_h[0], 0); 1506 } else if (imm_h[1] == 0xffff) { 1507 movnw(dst, imm_h[0] ^ 0xffff, 0); 1508 } else { 1509 // use a MOVZ and MOVK (makes it easier to debug) 1510 movzw(dst, imm_h[0], 0); 1511 movkw(dst, imm_h[1], 16); 1512 } 1513 } 1514 } 1515 1516 // Form an address from base + offset in Rd. Rd may or may 1517 // not actually be used: you must use the Address that is returned. 1518 // It is up to you to ensure that the shift provided matches the size 1519 // of your data. 1520 Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset, int shift) { 1521 if (Address::offset_ok_for_immed(byte_offset, shift)) 1522 // It fits; no need for any heroics 1523 return Address(base, byte_offset); 1524 1525 // Don't do anything clever with negative or misaligned offsets 1526 unsigned mask = (1 << shift) - 1; 1527 if (byte_offset < 0 || byte_offset & mask) { 1528 mov(Rd, byte_offset); 1529 add(Rd, base, Rd); 1530 return Address(Rd); 1531 } 1532 1533 // See if we can do this with two 12-bit offsets 1534 { 1535 unsigned long word_offset = byte_offset >> shift; 1536 unsigned long masked_offset = word_offset & 0xfff000; 1537 if (Address::offset_ok_for_immed(word_offset - masked_offset) 1538 && Assembler::operand_valid_for_add_sub_immediate(masked_offset << shift)) { 1539 add(Rd, base, masked_offset << shift); 1540 word_offset -= masked_offset; 1541 return Address(Rd, word_offset << shift); 1542 } 1543 } 1544 1545 // Do it the hard way 1546 mov(Rd, byte_offset); 1547 add(Rd, base, Rd); 1548 return Address(Rd); 1549 } 1550 1551 void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { 1552 Label retry_load; 1553 bind(retry_load); 1554 // flush and load exclusive from the memory location 1555 ldxrw(tmp, counter_addr); 1556 addw(tmp, tmp, 1); 1557 // if we store+flush with no intervening write tmp wil be zero 1558 stxrw(tmp, tmp, counter_addr); 1559 cbnzw(tmp, retry_load); 1560 } 1561 1562 1563 int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb, 1564 bool want_remainder, Register scratch) 1565 { 1566 // Full implementation of Java idiv and irem. The function 1567 // returns the (pc) offset of the div instruction - may be needed 1568 // for implicit exceptions. 1569 // 1570 // constraint : ra/rb =/= scratch 1571 // normal case 1572 // 1573 // input : ra: dividend 1574 // rb: divisor 1575 // 1576 // result: either 1577 // quotient (= ra idiv rb) 1578 // remainder (= ra irem rb) 1579 1580 assert(ra != scratch && rb != scratch, "reg cannot be scratch"); 1581 1582 int idivl_offset = offset(); 1583 if (! want_remainder) { 1584 sdivw(result, ra, rb); 1585 } else { 1586 sdivw(scratch, ra, rb); 1587 Assembler::msubw(result, scratch, rb, ra); 1588 } 1589 1590 return idivl_offset; 1591 } 1592 1593 int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb, 1594 bool want_remainder, Register scratch) 1595 { 1596 // Full implementation of Java ldiv and lrem. The function 1597 // returns the (pc) offset of the div instruction - may be needed 1598 // for implicit exceptions. 1599 // 1600 // constraint : ra/rb =/= scratch 1601 // normal case 1602 // 1603 // input : ra: dividend 1604 // rb: divisor 1605 // 1606 // result: either 1607 // quotient (= ra idiv rb) 1608 // remainder (= ra irem rb) 1609 1610 assert(ra != scratch && rb != scratch, "reg cannot be scratch"); 1611 1612 int idivq_offset = offset(); 1613 if (! want_remainder) { 1614 sdiv(result, ra, rb); 1615 } else { 1616 sdiv(scratch, ra, rb); 1617 Assembler::msub(result, scratch, rb, ra); 1618 } 1619 1620 return idivq_offset; 1621 } 1622 1623 // MacroAssembler routines found actually to be needed 1624 1625 void MacroAssembler::push(Register src) 1626 { 1627 str(src, Address(pre(esp, -1 * wordSize))); 1628 } 1629 1630 void MacroAssembler::pop(Register dst) 1631 { 1632 ldr(dst, Address(post(esp, 1 * wordSize))); 1633 } 1634 1635 // Note: load_unsigned_short used to be called load_unsigned_word. 1636 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 1637 int off = offset(); 1638 ldrh(dst, src); 1639 return off; 1640 } 1641 1642 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 1643 int off = offset(); 1644 ldrb(dst, src); 1645 return off; 1646 } 1647 1648 int MacroAssembler::load_signed_short(Register dst, Address src) { 1649 int off = offset(); 1650 ldrsh(dst, src); 1651 return off; 1652 } 1653 1654 int MacroAssembler::load_signed_byte(Register dst, Address src) { 1655 int off = offset(); 1656 ldrsb(dst, src); 1657 return off; 1658 } 1659 1660 int MacroAssembler::load_signed_short32(Register dst, Address src) { 1661 int off = offset(); 1662 ldrshw(dst, src); 1663 return off; 1664 } 1665 1666 int MacroAssembler::load_signed_byte32(Register dst, Address src) { 1667 int off = offset(); 1668 ldrsbw(dst, src); 1669 return off; 1670 } 1671 1672 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 1673 switch (size_in_bytes) { 1674 case 8: ldr(dst, src); break; 1675 case 4: ldrw(dst, src); break; 1676 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 1677 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 1678 default: ShouldNotReachHere(); 1679 } 1680 } 1681 1682 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 1683 switch (size_in_bytes) { 1684 case 8: str(src, dst); break; 1685 case 4: strw(src, dst); break; 1686 case 2: strh(src, dst); break; 1687 case 1: strb(src, dst); break; 1688 default: ShouldNotReachHere(); 1689 } 1690 } 1691 1692 void MacroAssembler::decrementw(Register reg, int value) 1693 { 1694 if (value < 0) { incrementw(reg, -value); return; } 1695 if (value == 0) { return; } 1696 if (value < (1 << 12)) { subw(reg, reg, value); return; } 1697 /* else */ { 1698 guarantee(reg != rscratch2, "invalid dst for register decrement"); 1699 movw(rscratch2, (unsigned)value); 1700 subw(reg, reg, rscratch2); 1701 } 1702 } 1703 1704 void MacroAssembler::decrement(Register reg, int value) 1705 { 1706 if (value < 0) { increment(reg, -value); return; } 1707 if (value == 0) { return; } 1708 if (value < (1 << 12)) { sub(reg, reg, value); return; } 1709 /* else */ { 1710 assert(reg != rscratch2, "invalid dst for register decrement"); 1711 mov(rscratch2, (unsigned long)value); 1712 sub(reg, reg, rscratch2); 1713 } 1714 } 1715 1716 void MacroAssembler::decrementw(Address dst, int value) 1717 { 1718 assert(!dst.uses(rscratch1), "invalid dst for address decrement"); 1719 ldrw(rscratch1, dst); 1720 decrementw(rscratch1, value); 1721 strw(rscratch1, dst); 1722 } 1723 1724 void MacroAssembler::decrement(Address dst, int value) 1725 { 1726 assert(!dst.uses(rscratch1), "invalid address for decrement"); 1727 ldr(rscratch1, dst); 1728 decrement(rscratch1, value); 1729 str(rscratch1, dst); 1730 } 1731 1732 void MacroAssembler::incrementw(Register reg, int value) 1733 { 1734 if (value < 0) { decrementw(reg, -value); return; } 1735 if (value == 0) { return; } 1736 if (value < (1 << 12)) { addw(reg, reg, value); return; } 1737 /* else */ { 1738 assert(reg != rscratch2, "invalid dst for register increment"); 1739 movw(rscratch2, (unsigned)value); 1740 addw(reg, reg, rscratch2); 1741 } 1742 } 1743 1744 void MacroAssembler::increment(Register reg, int value) 1745 { 1746 if (value < 0) { decrement(reg, -value); return; } 1747 if (value == 0) { return; } 1748 if (value < (1 << 12)) { add(reg, reg, value); return; } 1749 /* else */ { 1750 assert(reg != rscratch2, "invalid dst for register increment"); 1751 movw(rscratch2, (unsigned)value); 1752 add(reg, reg, rscratch2); 1753 } 1754 } 1755 1756 void MacroAssembler::incrementw(Address dst, int value) 1757 { 1758 assert(!dst.uses(rscratch1), "invalid dst for address increment"); 1759 ldrw(rscratch1, dst); 1760 incrementw(rscratch1, value); 1761 strw(rscratch1, dst); 1762 } 1763 1764 void MacroAssembler::increment(Address dst, int value) 1765 { 1766 assert(!dst.uses(rscratch1), "invalid dst for address increment"); 1767 ldr(rscratch1, dst); 1768 increment(rscratch1, value); 1769 str(rscratch1, dst); 1770 } 1771 1772 1773 void MacroAssembler::pusha() { 1774 push(0x7fffffff, sp); 1775 } 1776 1777 void MacroAssembler::popa() { 1778 pop(0x7fffffff, sp); 1779 } 1780 1781 // Push lots of registers in the bit set supplied. Don't push sp. 1782 // Return the number of words pushed 1783 int MacroAssembler::push(unsigned int bitset, Register stack) { 1784 int words_pushed = 0; 1785 1786 // Scan bitset to accumulate register pairs 1787 unsigned char regs[32]; 1788 int count = 0; 1789 for (int reg = 0; reg <= 30; reg++) { 1790 if (1 & bitset) 1791 regs[count++] = reg; 1792 bitset >>= 1; 1793 } 1794 regs[count++] = zr->encoding_nocheck(); 1795 count &= ~1; // Only push an even nuber of regs 1796 1797 if (count) { 1798 stp(as_Register(regs[0]), as_Register(regs[1]), 1799 Address(pre(stack, -count * wordSize))); 1800 words_pushed += 2; 1801 } 1802 for (int i = 2; i < count; i += 2) { 1803 stp(as_Register(regs[i]), as_Register(regs[i+1]), 1804 Address(stack, i * wordSize)); 1805 words_pushed += 2; 1806 } 1807 1808 assert(words_pushed == count, "oops, pushed != count"); 1809 1810 return count; 1811 } 1812 1813 int MacroAssembler::pop(unsigned int bitset, Register stack) { 1814 int words_pushed = 0; 1815 1816 // Scan bitset to accumulate register pairs 1817 unsigned char regs[32]; 1818 int count = 0; 1819 for (int reg = 0; reg <= 30; reg++) { 1820 if (1 & bitset) 1821 regs[count++] = reg; 1822 bitset >>= 1; 1823 } 1824 regs[count++] = zr->encoding_nocheck(); 1825 count &= ~1; 1826 1827 for (int i = 2; i < count; i += 2) { 1828 ldp(as_Register(regs[i]), as_Register(regs[i+1]), 1829 Address(stack, i * wordSize)); 1830 words_pushed += 2; 1831 } 1832 if (count) { 1833 ldp(as_Register(regs[0]), as_Register(regs[1]), 1834 Address(post(stack, count * wordSize))); 1835 words_pushed += 2; 1836 } 1837 1838 assert(words_pushed == count, "oops, pushed != count"); 1839 1840 return count; 1841 } 1842 #ifdef ASSERT 1843 void MacroAssembler::verify_heapbase(const char* msg) { 1844 #if 0 1845 assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); 1846 assert (Universe::heap() != NULL, "java heap should be initialized"); 1847 if (CheckCompressedOops) { 1848 Label ok; 1849 push(1 << rscratch1->encoding(), sp); // cmpptr trashes rscratch1 1850 cmpptr(rheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 1851 br(Assembler::EQ, ok); 1852 stop(msg); 1853 bind(ok); 1854 pop(1 << rscratch1->encoding(), sp); 1855 } 1856 #endif 1857 } 1858 #endif 1859 1860 void MacroAssembler::stop(const char* msg) { 1861 address ip = pc(); 1862 pusha(); 1863 mov(c_rarg0, (address)msg); 1864 mov(c_rarg1, (address)ip); 1865 mov(c_rarg2, sp); 1866 mov(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); 1867 // call(c_rarg3); 1868 blrt(c_rarg3, 3, 0, 1); 1869 hlt(0); 1870 } 1871 1872 // If a constant does not fit in an immediate field, generate some 1873 // number of MOV instructions and then perform the operation. 1874 void MacroAssembler::wrap_add_sub_imm_insn(Register Rd, Register Rn, unsigned imm, 1875 add_sub_imm_insn insn1, 1876 add_sub_reg_insn insn2) { 1877 assert(Rd != zr, "Rd = zr and not setting flags?"); 1878 if (operand_valid_for_add_sub_immediate((int)imm)) { 1879 (this->*insn1)(Rd, Rn, imm); 1880 } else { 1881 if (uabs(imm) < (1 << 24)) { 1882 (this->*insn1)(Rd, Rn, imm & -(1 << 12)); 1883 (this->*insn1)(Rd, Rd, imm & ((1 << 12)-1)); 1884 } else { 1885 assert_different_registers(Rd, Rn); 1886 mov(Rd, (uint64_t)imm); 1887 (this->*insn2)(Rd, Rn, Rd, LSL, 0); 1888 } 1889 } 1890 } 1891 1892 // Seperate vsn which sets the flags. Optimisations are more restricted 1893 // because we must set the flags correctly. 1894 void MacroAssembler::wrap_adds_subs_imm_insn(Register Rd, Register Rn, unsigned imm, 1895 add_sub_imm_insn insn1, 1896 add_sub_reg_insn insn2) { 1897 if (operand_valid_for_add_sub_immediate((int)imm)) { 1898 (this->*insn1)(Rd, Rn, imm); 1899 } else { 1900 assert_different_registers(Rd, Rn); 1901 assert(Rd != zr, "overflow in immediate operand"); 1902 mov(Rd, (uint64_t)imm); 1903 (this->*insn2)(Rd, Rn, Rd, LSL, 0); 1904 } 1905 } 1906 1907 1908 void MacroAssembler::add(Register Rd, Register Rn, RegisterOrConstant increment) { 1909 if (increment.is_register()) { 1910 add(Rd, Rn, increment.as_register()); 1911 } else { 1912 add(Rd, Rn, increment.as_constant()); 1913 } 1914 } 1915 1916 void MacroAssembler::addw(Register Rd, Register Rn, RegisterOrConstant increment) { 1917 if (increment.is_register()) { 1918 addw(Rd, Rn, increment.as_register()); 1919 } else { 1920 addw(Rd, Rn, increment.as_constant()); 1921 } 1922 } 1923 1924 void MacroAssembler::reinit_heapbase() 1925 { 1926 if (UseCompressedOops) { 1927 if (Universe::is_fully_initialized()) { 1928 mov(rheapbase, Universe::narrow_ptrs_base()); 1929 } else { 1930 lea(rheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 1931 ldr(rheapbase, Address(rheapbase)); 1932 } 1933 } 1934 } 1935 1936 // this simulates the behaviour of the x86 cmpxchg instruction using a 1937 // load linked/store conditional pair. we use the acquire/release 1938 // versions of these instructions so that we flush pending writes as 1939 // per Java semantics. 1940 1941 // n.b the x86 version assumes the old value to be compared against is 1942 // in rax and updates rax with the value located in memory if the 1943 // cmpxchg fails. we supply a register for the old value explicitly 1944 1945 // the aarch64 load linked/store conditional instructions do not 1946 // accept an offset. so, unlike x86, we must provide a plain register 1947 // to identify the memory word to be compared/exchanged rather than a 1948 // register+offset Address. 1949 1950 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 1951 Label &succeed, Label *fail) { 1952 // oldv holds comparison value 1953 // newv holds value to write in exchange 1954 // addr identifies memory word to compare against/update 1955 // tmp returns 0/1 for success/failure 1956 Label retry_load, nope; 1957 1958 bind(retry_load); 1959 // flush and load exclusive from the memory location 1960 // and fail if it is not what we expect 1961 ldaxr(tmp, addr); 1962 cmp(tmp, oldv); 1963 br(Assembler::NE, nope); 1964 // if we store+flush with no intervening write tmp wil be zero 1965 stlxr(tmp, newv, addr); 1966 cbzw(tmp, succeed); 1967 // retry so we only ever return after a load fails to compare 1968 // ensures we don't return a stale value after a failed write. 1969 b(retry_load); 1970 // if the memory word differs we return it in oldv and signal a fail 1971 bind(nope); 1972 membar(AnyAny); 1973 mov(oldv, tmp); 1974 if (fail) 1975 b(*fail); 1976 } 1977 1978 void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Register tmp, 1979 Label &succeed, Label *fail) { 1980 // oldv holds comparison value 1981 // newv holds value to write in exchange 1982 // addr identifies memory word to compare against/update 1983 // tmp returns 0/1 for success/failure 1984 Label retry_load, nope; 1985 1986 bind(retry_load); 1987 // flush and load exclusive from the memory location 1988 // and fail if it is not what we expect 1989 ldaxrw(tmp, addr); 1990 cmp(tmp, oldv); 1991 br(Assembler::NE, nope); 1992 // if we store+flush with no intervening write tmp wil be zero 1993 stlxrw(tmp, newv, addr); 1994 cbzw(tmp, succeed); 1995 // retry so we only ever return after a load fails to compare 1996 // ensures we don't return a stale value after a failed write. 1997 b(retry_load); 1998 // if the memory word differs we return it in oldv and signal a fail 1999 bind(nope); 2000 membar(AnyAny); 2001 mov(oldv, tmp); 2002 if (fail) 2003 b(*fail); 2004 } 2005 2006 static bool different(Register a, RegisterOrConstant b, Register c) { 2007 if (b.is_constant()) 2008 return a != c; 2009 else 2010 return a != b.as_register() && a != c && b.as_register() != c; 2011 } 2012 2013 #define ATOMIC_OP(LDXR, OP, STXR) \ 2014 void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \ 2015 Register result = rscratch2; \ 2016 if (prev->is_valid()) \ 2017 result = different(prev, incr, addr) ? prev : rscratch2; \ 2018 \ 2019 Label retry_load; \ 2020 bind(retry_load); \ 2021 LDXR(result, addr); \ 2022 OP(rscratch1, result, incr); \ 2023 STXR(rscratch1, rscratch1, addr); \ 2024 cbnzw(rscratch1, retry_load); \ 2025 if (prev->is_valid() && prev != result) \ 2026 mov(prev, result); \ 2027 } 2028 2029 ATOMIC_OP(ldxr, add, stxr) 2030 ATOMIC_OP(ldxrw, addw, stxrw) 2031 2032 #undef ATOMIC_OP 2033 2034 #define ATOMIC_XCHG(OP, LDXR, STXR) \ 2035 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ 2036 Register result = rscratch2; \ 2037 if (prev->is_valid()) \ 2038 result = different(prev, newv, addr) ? prev : rscratch2; \ 2039 \ 2040 Label retry_load; \ 2041 bind(retry_load); \ 2042 LDXR(result, addr); \ 2043 STXR(rscratch1, newv, addr); \ 2044 cbnzw(rscratch1, retry_load); \ 2045 if (prev->is_valid() && prev != result) \ 2046 mov(prev, result); \ 2047 } 2048 2049 ATOMIC_XCHG(xchg, ldxr, stxr) 2050 ATOMIC_XCHG(xchgw, ldxrw, stxrw) 2051 2052 #undef ATOMIC_XCHG 2053 2054 void MacroAssembler::incr_allocated_bytes(Register thread, 2055 Register var_size_in_bytes, 2056 int con_size_in_bytes, 2057 Register t1) { 2058 if (!thread->is_valid()) { 2059 thread = rthread; 2060 } 2061 assert(t1->is_valid(), "need temp reg"); 2062 2063 ldr(t1, Address(thread, in_bytes(JavaThread::allocated_bytes_offset()))); 2064 if (var_size_in_bytes->is_valid()) { 2065 add(t1, t1, var_size_in_bytes); 2066 } else { 2067 add(t1, t1, con_size_in_bytes); 2068 } 2069 str(t1, Address(thread, in_bytes(JavaThread::allocated_bytes_offset()))); 2070 } 2071 2072 #ifndef PRODUCT 2073 extern "C" void findpc(intptr_t x); 2074 #endif 2075 2076 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) 2077 { 2078 // In order to get locks to work, we need to fake a in_VM state 2079 if (ShowMessageBoxOnError ) { 2080 JavaThread* thread = JavaThread::current(); 2081 JavaThreadState saved_state = thread->thread_state(); 2082 thread->set_thread_state(_thread_in_vm); 2083 #ifndef PRODUCT 2084 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 2085 ttyLocker ttyl; 2086 BytecodeCounter::print(); 2087 } 2088 #endif 2089 if (os::message_box(msg, "Execution stopped, print registers?")) { 2090 ttyLocker ttyl; 2091 tty->print_cr(" pc = 0x%016lx", pc); 2092 #ifndef PRODUCT 2093 tty->cr(); 2094 findpc(pc); 2095 tty->cr(); 2096 #endif 2097 tty->print_cr(" r0 = 0x%016lx", regs[0]); 2098 tty->print_cr(" r1 = 0x%016lx", regs[1]); 2099 tty->print_cr(" r2 = 0x%016lx", regs[2]); 2100 tty->print_cr(" r3 = 0x%016lx", regs[3]); 2101 tty->print_cr(" r4 = 0x%016lx", regs[4]); 2102 tty->print_cr(" r5 = 0x%016lx", regs[5]); 2103 tty->print_cr(" r6 = 0x%016lx", regs[6]); 2104 tty->print_cr(" r7 = 0x%016lx", regs[7]); 2105 tty->print_cr(" r8 = 0x%016lx", regs[8]); 2106 tty->print_cr(" r9 = 0x%016lx", regs[9]); 2107 tty->print_cr("r10 = 0x%016lx", regs[10]); 2108 tty->print_cr("r11 = 0x%016lx", regs[11]); 2109 tty->print_cr("r12 = 0x%016lx", regs[12]); 2110 tty->print_cr("r13 = 0x%016lx", regs[13]); 2111 tty->print_cr("r14 = 0x%016lx", regs[14]); 2112 tty->print_cr("r15 = 0x%016lx", regs[15]); 2113 tty->print_cr("r16 = 0x%016lx", regs[16]); 2114 tty->print_cr("r17 = 0x%016lx", regs[17]); 2115 tty->print_cr("r18 = 0x%016lx", regs[18]); 2116 tty->print_cr("r19 = 0x%016lx", regs[19]); 2117 tty->print_cr("r20 = 0x%016lx", regs[20]); 2118 tty->print_cr("r21 = 0x%016lx", regs[21]); 2119 tty->print_cr("r22 = 0x%016lx", regs[22]); 2120 tty->print_cr("r23 = 0x%016lx", regs[23]); 2121 tty->print_cr("r24 = 0x%016lx", regs[24]); 2122 tty->print_cr("r25 = 0x%016lx", regs[25]); 2123 tty->print_cr("r26 = 0x%016lx", regs[26]); 2124 tty->print_cr("r27 = 0x%016lx", regs[27]); 2125 tty->print_cr("r28 = 0x%016lx", regs[28]); 2126 tty->print_cr("r30 = 0x%016lx", regs[30]); 2127 tty->print_cr("r31 = 0x%016lx", regs[31]); 2128 BREAKPOINT; 2129 } 2130 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 2131 } else { 2132 ttyLocker ttyl; 2133 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 2134 msg); 2135 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 2136 } 2137 } 2138 2139 #ifdef BUILTIN_SIM 2140 // routine to generate an x86 prolog for a stub function which 2141 // bootstraps into the generated ARM code which directly follows the 2142 // stub 2143 // 2144 // the argument encodes the number of general and fp registers 2145 // passed by the caller and the callng convention (currently just 2146 // the number of general registers and assumes C argument passing) 2147 2148 extern "C" { 2149 int aarch64_stub_prolog_size(); 2150 void aarch64_stub_prolog(); 2151 void aarch64_prolog(); 2152 } 2153 2154 void MacroAssembler::c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_type, 2155 address *prolog_ptr) 2156 { 2157 int calltype = (((ret_type & 0x3) << 8) | 2158 ((fp_arg_count & 0xf) << 4) | 2159 (gp_arg_count & 0xf)); 2160 2161 // the addresses for the x86 to ARM entry code we need to use 2162 address start = pc(); 2163 // printf("start = %lx\n", start); 2164 int byteCount = aarch64_stub_prolog_size(); 2165 // printf("byteCount = %x\n", byteCount); 2166 int instructionCount = (byteCount + 3)/ 4; 2167 // printf("instructionCount = %x\n", instructionCount); 2168 for (int i = 0; i < instructionCount; i++) { 2169 nop(); 2170 } 2171 2172 memcpy(start, (void*)aarch64_stub_prolog, byteCount); 2173 2174 // write the address of the setup routine and the call format at the 2175 // end of into the copied code 2176 u_int64_t *patch_end = (u_int64_t *)(start + byteCount); 2177 if (prolog_ptr) 2178 patch_end[-2] = (u_int64_t)prolog_ptr; 2179 patch_end[-1] = calltype; 2180 } 2181 #endif 2182 2183 void MacroAssembler::push_CPU_state() { 2184 push(0x3fffffff, sp); // integer registers except lr & sp 2185 2186 for (int i = 30; i >= 0; i -= 2) 2187 stpd(as_FloatRegister(i), as_FloatRegister(i+1), 2188 Address(pre(sp, -2 * wordSize))); 2189 } 2190 2191 void MacroAssembler::pop_CPU_state() { 2192 for (int i = 0; i < 32; i += 2) 2193 ldpd(as_FloatRegister(i), as_FloatRegister(i+1), 2194 Address(post(sp, 2 * wordSize))); 2195 2196 pop(0x3fffffff, sp); // integer registers except lr & sp 2197 } 2198 2199 /** 2200 * Helpers for multiply_to_len(). 2201 */ 2202 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 2203 Register src1, Register src2) { 2204 adds(dest_lo, dest_lo, src1); 2205 adc(dest_hi, dest_hi, zr); 2206 adds(dest_lo, dest_lo, src2); 2207 adc(final_dest_hi, dest_hi, zr); 2208 } 2209 2210 // Generate an address from (r + r1 extend offset). "size" is the 2211 // size of the operand. The result may be in rscratch2. 2212 Address MacroAssembler::offsetted_address(Register r, Register r1, 2213 Address::extend ext, int offset, int size) { 2214 if (offset || (ext.shift() % size != 0)) { 2215 lea(rscratch2, Address(r, r1, ext)); 2216 return Address(rscratch2, offset); 2217 } else { 2218 return Address(r, r1, ext); 2219 } 2220 } 2221 2222 /** 2223 * Multiply 64 bit by 64 bit first loop. 2224 */ 2225 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 2226 Register y, Register y_idx, Register z, 2227 Register carry, Register product, 2228 Register idx, Register kdx) { 2229 // 2230 // jlong carry, x[], y[], z[]; 2231 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { 2232 // huge_128 product = y[idx] * x[xstart] + carry; 2233 // z[kdx] = (jlong)product; 2234 // carry = (jlong)(product >>> 64); 2235 // } 2236 // z[xstart] = carry; 2237 // 2238 2239 Label L_first_loop, L_first_loop_exit; 2240 Label L_one_x, L_one_y, L_multiply; 2241 2242 subsw(xstart, xstart, 1); 2243 br(Assembler::MI, L_one_x); 2244 2245 lea(rscratch1, Address(x, xstart, Address::lsl(LogBytesPerInt))); 2246 ldr(x_xstart, Address(rscratch1)); 2247 ror(x_xstart, x_xstart, 32); // convert big-endian to little-endian 2248 2249 bind(L_first_loop); 2250 subsw(idx, idx, 1); 2251 br(Assembler::MI, L_first_loop_exit); 2252 subsw(idx, idx, 1); 2253 br(Assembler::MI, L_one_y); 2254 lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); 2255 ldr(y_idx, Address(rscratch1)); 2256 ror(y_idx, y_idx, 32); // convert big-endian to little-endian 2257 bind(L_multiply); 2258 2259 // AArch64 has a multiply-accumulate instruction that we can't use 2260 // here because it has no way to process carries, so we have to use 2261 // separate add and adc instructions. Bah. 2262 umulh(rscratch1, x_xstart, y_idx); // x_xstart * y_idx -> rscratch1:product 2263 mul(product, x_xstart, y_idx); 2264 adds(product, product, carry); 2265 adc(carry, rscratch1, zr); // x_xstart * y_idx + carry -> carry:product 2266 2267 subw(kdx, kdx, 2); 2268 ror(product, product, 32); // back to big-endian 2269 str(product, offsetted_address(z, kdx, Address::uxtw(LogBytesPerInt), 0, BytesPerLong)); 2270 2271 b(L_first_loop); 2272 2273 bind(L_one_y); 2274 ldrw(y_idx, Address(y, 0)); 2275 b(L_multiply); 2276 2277 bind(L_one_x); 2278 ldrw(x_xstart, Address(x, 0)); 2279 b(L_first_loop); 2280 2281 bind(L_first_loop_exit); 2282 } 2283 2284 /** 2285 * Multiply 128 bit by 128. Unrolled inner loop. 2286 * 2287 */ 2288 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, 2289 Register carry, Register carry2, 2290 Register idx, Register jdx, 2291 Register yz_idx1, Register yz_idx2, 2292 Register tmp, Register tmp3, Register tmp4, 2293 Register tmp6, Register product_hi) { 2294 2295 // jlong carry, x[], y[], z[]; 2296 // int kdx = ystart+1; 2297 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 2298 // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; 2299 // jlong carry2 = (jlong)(tmp3 >>> 64); 2300 // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; 2301 // carry = (jlong)(tmp4 >>> 64); 2302 // z[kdx+idx+1] = (jlong)tmp3; 2303 // z[kdx+idx] = (jlong)tmp4; 2304 // } 2305 // idx += 2; 2306 // if (idx > 0) { 2307 // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; 2308 // z[kdx+idx] = (jlong)yz_idx1; 2309 // carry = (jlong)(yz_idx1 >>> 64); 2310 // } 2311 // 2312 2313 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 2314 2315 lsrw(jdx, idx, 2); 2316 2317 bind(L_third_loop); 2318 2319 subsw(jdx, jdx, 1); 2320 br(Assembler::MI, L_third_loop_exit); 2321 subw(idx, idx, 4); 2322 2323 lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); 2324 2325 ldp(yz_idx2, yz_idx1, Address(rscratch1, 0)); 2326 2327 lea(tmp6, Address(z, idx, Address::uxtw(LogBytesPerInt))); 2328 2329 ror(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian 2330 ror(yz_idx2, yz_idx2, 32); 2331 2332 ldp(rscratch2, rscratch1, Address(tmp6, 0)); 2333 2334 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 2335 umulh(tmp4, product_hi, yz_idx1); 2336 2337 ror(rscratch1, rscratch1, 32); // convert big-endian to little-endian 2338 ror(rscratch2, rscratch2, 32); 2339 2340 mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp 2341 umulh(carry2, product_hi, yz_idx2); 2342 2343 // propagate sum of both multiplications into carry:tmp4:tmp3 2344 adds(tmp3, tmp3, carry); 2345 adc(tmp4, tmp4, zr); 2346 adds(tmp3, tmp3, rscratch1); 2347 adcs(tmp4, tmp4, tmp); 2348 adc(carry, carry2, zr); 2349 adds(tmp4, tmp4, rscratch2); 2350 adc(carry, carry, zr); 2351 2352 ror(tmp3, tmp3, 32); // convert little-endian to big-endian 2353 ror(tmp4, tmp4, 32); 2354 stp(tmp4, tmp3, Address(tmp6, 0)); 2355 2356 b(L_third_loop); 2357 bind (L_third_loop_exit); 2358 2359 andw (idx, idx, 0x3); 2360 cbz(idx, L_post_third_loop_done); 2361 2362 Label L_check_1; 2363 subsw(idx, idx, 2); 2364 br(Assembler::MI, L_check_1); 2365 2366 lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt))); 2367 ldr(yz_idx1, Address(rscratch1, 0)); 2368 ror(yz_idx1, yz_idx1, 32); 2369 mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 2370 umulh(tmp4, product_hi, yz_idx1); 2371 lea(rscratch1, Address(z, idx, Address::uxtw(LogBytesPerInt))); 2372 ldr(yz_idx2, Address(rscratch1, 0)); 2373 ror(yz_idx2, yz_idx2, 32); 2374 2375 add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2); 2376 2377 ror(tmp3, tmp3, 32); 2378 str(tmp3, Address(rscratch1, 0)); 2379 2380 bind (L_check_1); 2381 2382 andw (idx, idx, 0x1); 2383 subsw(idx, idx, 1); 2384 br(Assembler::MI, L_post_third_loop_done); 2385 ldrw(tmp4, Address(y, idx, Address::uxtw(LogBytesPerInt))); 2386 mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 2387 umulh(carry2, tmp4, product_hi); 2388 ldrw(tmp4, Address(z, idx, Address::uxtw(LogBytesPerInt))); 2389 2390 add2_with_carry(carry2, tmp3, tmp4, carry); 2391 2392 strw(tmp3, Address(z, idx, Address::uxtw(LogBytesPerInt))); 2393 extr(carry, carry2, tmp3, 32); 2394 2395 bind(L_post_third_loop_done); 2396 } 2397 2398 /** 2399 * Code for BigInteger::multiplyToLen() instrinsic. 2400 * 2401 * r0: x 2402 * r1: xlen 2403 * r2: y 2404 * r3: ylen 2405 * r4: z 2406 * r5: zlen 2407 * r10: tmp1 2408 * r11: tmp2 2409 * r12: tmp3 2410 * r13: tmp4 2411 * r14: tmp5 2412 * r15: tmp6 2413 * r16: tmp7 2414 * 2415 */ 2416 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, 2417 Register z, Register zlen, 2418 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 2419 Register tmp5, Register tmp6, Register product_hi) { 2420 2421 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 2422 2423 const Register idx = tmp1; 2424 const Register kdx = tmp2; 2425 const Register xstart = tmp3; 2426 2427 const Register y_idx = tmp4; 2428 const Register carry = tmp5; 2429 const Register product = xlen; 2430 const Register x_xstart = zlen; // reuse register 2431 2432 // First Loop. 2433 // 2434 // final static long LONG_MASK = 0xffffffffL; 2435 // int xstart = xlen - 1; 2436 // int ystart = ylen - 1; 2437 // long carry = 0; 2438 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { 2439 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; 2440 // z[kdx] = (int)product; 2441 // carry = product >>> 32; 2442 // } 2443 // z[xstart] = (int)carry; 2444 // 2445 2446 movw(idx, ylen); // idx = ylen; 2447 movw(kdx, zlen); // kdx = xlen+ylen; 2448 mov(carry, zr); // carry = 0; 2449 2450 Label L_done; 2451 2452 movw(xstart, xlen); 2453 subsw(xstart, xstart, 1); 2454 br(Assembler::MI, L_done); 2455 2456 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); 2457 2458 Label L_second_loop; 2459 cbzw(kdx, L_second_loop); 2460 2461 Label L_carry; 2462 subw(kdx, kdx, 1); 2463 cbzw(kdx, L_carry); 2464 2465 strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt))); 2466 lsr(carry, carry, 32); 2467 subw(kdx, kdx, 1); 2468 2469 bind(L_carry); 2470 strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt))); 2471 2472 // Second and third (nested) loops. 2473 // 2474 // for (int i = xstart-1; i >= 0; i--) { // Second loop 2475 // carry = 0; 2476 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 2477 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 2478 // (z[k] & LONG_MASK) + carry; 2479 // z[k] = (int)product; 2480 // carry = product >>> 32; 2481 // } 2482 // z[i] = (int)carry; 2483 // } 2484 // 2485 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi 2486 2487 const Register jdx = tmp1; 2488 2489 bind(L_second_loop); 2490 mov(carry, zr); // carry = 0; 2491 movw(jdx, ylen); // j = ystart+1 2492 2493 subsw(xstart, xstart, 1); // i = xstart-1; 2494 br(Assembler::MI, L_done); 2495 2496 str(z, Address(pre(sp, -4 * wordSize))); 2497 2498 Label L_last_x; 2499 lea(z, offsetted_address(z, xstart, Address::uxtw(LogBytesPerInt), 4, BytesPerInt)); // z = z + k - j 2500 subsw(xstart, xstart, 1); // i = xstart-1; 2501 br(Assembler::MI, L_last_x); 2502 2503 lea(rscratch1, Address(x, xstart, Address::uxtw(LogBytesPerInt))); 2504 ldr(product_hi, Address(rscratch1)); 2505 ror(product_hi, product_hi, 32); // convert big-endian to little-endian 2506 2507 Label L_third_loop_prologue; 2508 bind(L_third_loop_prologue); 2509 2510 str(ylen, Address(sp, wordSize)); 2511 stp(x, xstart, Address(sp, 2 * wordSize)); 2512 multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, 2513 tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); 2514 ldp(z, ylen, Address(post(sp, 2 * wordSize))); 2515 ldp(x, xlen, Address(post(sp, 2 * wordSize))); // copy old xstart -> xlen 2516 2517 addw(tmp3, xlen, 1); 2518 strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt))); 2519 subsw(tmp3, tmp3, 1); 2520 br(Assembler::MI, L_done); 2521 2522 lsr(carry, carry, 32); 2523 strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt))); 2524 b(L_second_loop); 2525 2526 // Next infrequent code is moved outside loops. 2527 bind(L_last_x); 2528 ldrw(product_hi, Address(x, 0)); 2529 b(L_third_loop_prologue); 2530 2531 bind(L_done); 2532 } 2533 2534 /** 2535 * Emits code to update CRC-32 with a byte value according to constants in table 2536 * 2537 * @param [in,out]crc Register containing the crc. 2538 * @param [in]val Register containing the byte to fold into the CRC. 2539 * @param [in]table Register containing the table of crc constants. 2540 * 2541 * uint32_t crc; 2542 * val = crc_table[(val ^ crc) & 0xFF]; 2543 * crc = val ^ (crc >> 8); 2544 * 2545 */ 2546 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 2547 eor(val, val, crc); 2548 andr(val, val, 0xff); 2549 ldrw(val, Address(table, val, Address::lsl(2))); 2550 eor(crc, val, crc, Assembler::LSR, 8); 2551 } 2552 2553 /** 2554 * Emits code to update CRC-32 with a 32-bit value according to tables 0 to 3 2555 * 2556 * @param [in,out]crc Register containing the crc. 2557 * @param [in]v Register containing the 32-bit to fold into the CRC. 2558 * @param [in]table0 Register containing table 0 of crc constants. 2559 * @param [in]table1 Register containing table 1 of crc constants. 2560 * @param [in]table2 Register containing table 2 of crc constants. 2561 * @param [in]table3 Register containing table 3 of crc constants. 2562 * 2563 * uint32_t crc; 2564 * v = crc ^ v 2565 * crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24] 2566 * 2567 */ 2568 void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp, 2569 Register table0, Register table1, Register table2, Register table3, 2570 bool upper) { 2571 eor(v, crc, v, upper ? LSR:LSL, upper ? 32:0); 2572 uxtb(tmp, v); 2573 ldrw(crc, Address(table3, tmp, Address::lsl(2))); 2574 ubfx(tmp, v, 8, 8); 2575 ldrw(tmp, Address(table2, tmp, Address::lsl(2))); 2576 eor(crc, crc, tmp); 2577 ubfx(tmp, v, 16, 8); 2578 ldrw(tmp, Address(table1, tmp, Address::lsl(2))); 2579 eor(crc, crc, tmp); 2580 ubfx(tmp, v, 24, 8); 2581 ldrw(tmp, Address(table0, tmp, Address::lsl(2))); 2582 eor(crc, crc, tmp); 2583 } 2584 2585 /** 2586 * @param crc register containing existing CRC (32-bit) 2587 * @param buf register pointing to input byte buffer (byte*) 2588 * @param len register containing number of bytes 2589 * @param table register that will contain address of CRC table 2590 * @param tmp scratch register 2591 */ 2592 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, 2593 Register table0, Register table1, Register table2, Register table3, 2594 Register tmp, Register tmp2, Register tmp3) { 2595 Label L_by16, L_by16_loop, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit; 2596 unsigned long offset; 2597 2598 ornw(crc, zr, crc); 2599 2600 if (UseCRC32) { 2601 Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop; 2602 2603 subs(len, len, 64); 2604 br(Assembler::GE, CRC_by64_loop); 2605 adds(len, len, 64-4); 2606 br(Assembler::GE, CRC_by4_loop); 2607 adds(len, len, 4); 2608 br(Assembler::GT, CRC_by1_loop); 2609 b(L_exit); 2610 2611 BIND(CRC_by4_loop); 2612 ldrw(tmp, Address(post(buf, 4))); 2613 subs(len, len, 4); 2614 crc32w(crc, crc, tmp); 2615 br(Assembler::GE, CRC_by4_loop); 2616 adds(len, len, 4); 2617 br(Assembler::LE, L_exit); 2618 BIND(CRC_by1_loop); 2619 ldrb(tmp, Address(post(buf, 1))); 2620 subs(len, len, 1); 2621 crc32b(crc, crc, tmp); 2622 br(Assembler::GT, CRC_by1_loop); 2623 b(L_exit); 2624 2625 align(CodeEntryAlignment); 2626 BIND(CRC_by64_loop); 2627 subs(len, len, 64); 2628 ldp(tmp, tmp3, Address(post(buf, 16))); 2629 crc32x(crc, crc, tmp); 2630 crc32x(crc, crc, tmp3); 2631 ldp(tmp, tmp3, Address(post(buf, 16))); 2632 crc32x(crc, crc, tmp); 2633 crc32x(crc, crc, tmp3); 2634 ldp(tmp, tmp3, Address(post(buf, 16))); 2635 crc32x(crc, crc, tmp); 2636 crc32x(crc, crc, tmp3); 2637 ldp(tmp, tmp3, Address(post(buf, 16))); 2638 crc32x(crc, crc, tmp); 2639 crc32x(crc, crc, tmp3); 2640 br(Assembler::GE, CRC_by64_loop); 2641 adds(len, len, 64-4); 2642 br(Assembler::GE, CRC_by4_loop); 2643 adds(len, len, 4); 2644 br(Assembler::GT, CRC_by1_loop); 2645 BIND(L_exit); 2646 ornw(crc, zr, crc); 2647 return; 2648 } 2649 2650 adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset); 2651 if (offset) add(table0, table0, offset); 2652 add(table1, table0, 1*256*sizeof(juint)); 2653 add(table2, table0, 2*256*sizeof(juint)); 2654 add(table3, table0, 3*256*sizeof(juint)); 2655 2656 if (UseNeon) { 2657 cmp(len, 64); 2658 br(Assembler::LT, L_by16); 2659 eor(v16, T16B, v16, v16); 2660 2661 Label L_fold; 2662 2663 add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants 2664 2665 ld1(v0, v1, T2D, post(buf, 32)); 2666 ld1r(v4, T2D, post(tmp, 8)); 2667 ld1r(v5, T2D, post(tmp, 8)); 2668 ld1r(v6, T2D, post(tmp, 8)); 2669 ld1r(v7, T2D, post(tmp, 8)); 2670 mov(v16, T4S, 0, crc); 2671 2672 eor(v0, T16B, v0, v16); 2673 sub(len, len, 64); 2674 2675 BIND(L_fold); 2676 pmull(v22, T8H, v0, v5, T8B); 2677 pmull(v20, T8H, v0, v7, T8B); 2678 pmull(v23, T8H, v0, v4, T8B); 2679 pmull(v21, T8H, v0, v6, T8B); 2680 2681 pmull2(v18, T8H, v0, v5, T16B); 2682 pmull2(v16, T8H, v0, v7, T16B); 2683 pmull2(v19, T8H, v0, v4, T16B); 2684 pmull2(v17, T8H, v0, v6, T16B); 2685 2686 uzp1(v24, v20, v22, T8H); 2687 uzp2(v25, v20, v22, T8H); 2688 eor(v20, T16B, v24, v25); 2689 2690 uzp1(v26, v16, v18, T8H); 2691 uzp2(v27, v16, v18, T8H); 2692 eor(v16, T16B, v26, v27); 2693 2694 ushll2(v22, T4S, v20, T8H, 8); 2695 ushll(v20, T4S, v20, T4H, 8); 2696 2697 ushll2(v18, T4S, v16, T8H, 8); 2698 ushll(v16, T4S, v16, T4H, 8); 2699 2700 eor(v22, T16B, v23, v22); 2701 eor(v18, T16B, v19, v18); 2702 eor(v20, T16B, v21, v20); 2703 eor(v16, T16B, v17, v16); 2704 2705 uzp1(v17, v16, v20, T2D); 2706 uzp2(v21, v16, v20, T2D); 2707 eor(v17, T16B, v17, v21); 2708 2709 ushll2(v20, T2D, v17, T4S, 16); 2710 ushll(v16, T2D, v17, T2S, 16); 2711 2712 eor(v20, T16B, v20, v22); 2713 eor(v16, T16B, v16, v18); 2714 2715 uzp1(v17, v20, v16, T2D); 2716 uzp2(v21, v20, v16, T2D); 2717 eor(v28, T16B, v17, v21); 2718 2719 pmull(v22, T8H, v1, v5, T8B); 2720 pmull(v20, T8H, v1, v7, T8B); 2721 pmull(v23, T8H, v1, v4, T8B); 2722 pmull(v21, T8H, v1, v6, T8B); 2723 2724 pmull2(v18, T8H, v1, v5, T16B); 2725 pmull2(v16, T8H, v1, v7, T16B); 2726 pmull2(v19, T8H, v1, v4, T16B); 2727 pmull2(v17, T8H, v1, v6, T16B); 2728 2729 ld1(v0, v1, T2D, post(buf, 32)); 2730 2731 uzp1(v24, v20, v22, T8H); 2732 uzp2(v25, v20, v22, T8H); 2733 eor(v20, T16B, v24, v25); 2734 2735 uzp1(v26, v16, v18, T8H); 2736 uzp2(v27, v16, v18, T8H); 2737 eor(v16, T16B, v26, v27); 2738 2739 ushll2(v22, T4S, v20, T8H, 8); 2740 ushll(v20, T4S, v20, T4H, 8); 2741 2742 ushll2(v18, T4S, v16, T8H, 8); 2743 ushll(v16, T4S, v16, T4H, 8); 2744 2745 eor(v22, T16B, v23, v22); 2746 eor(v18, T16B, v19, v18); 2747 eor(v20, T16B, v21, v20); 2748 eor(v16, T16B, v17, v16); 2749 2750 uzp1(v17, v16, v20, T2D); 2751 uzp2(v21, v16, v20, T2D); 2752 eor(v16, T16B, v17, v21); 2753 2754 ushll2(v20, T2D, v16, T4S, 16); 2755 ushll(v16, T2D, v16, T2S, 16); 2756 2757 eor(v20, T16B, v22, v20); 2758 eor(v16, T16B, v16, v18); 2759 2760 uzp1(v17, v20, v16, T2D); 2761 uzp2(v21, v20, v16, T2D); 2762 eor(v20, T16B, v17, v21); 2763 2764 shl(v16, T2D, v28, 1); 2765 shl(v17, T2D, v20, 1); 2766 2767 eor(v0, T16B, v0, v16); 2768 eor(v1, T16B, v1, v17); 2769 2770 subs(len, len, 32); 2771 br(Assembler::GE, L_fold); 2772 2773 mov(crc, 0); 2774 mov(tmp, v0, T1D, 0); 2775 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); 2776 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); 2777 mov(tmp, v0, T1D, 1); 2778 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); 2779 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); 2780 mov(tmp, v1, T1D, 0); 2781 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); 2782 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); 2783 mov(tmp, v1, T1D, 1); 2784 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); 2785 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); 2786 2787 add(len, len, 32); 2788 } 2789 2790 BIND(L_by16); 2791 subs(len, len, 16); 2792 br(Assembler::GE, L_by16_loop); 2793 adds(len, len, 16-4); 2794 br(Assembler::GE, L_by4_loop); 2795 adds(len, len, 4); 2796 br(Assembler::GT, L_by1_loop); 2797 b(L_exit); 2798 2799 BIND(L_by4_loop); 2800 ldrw(tmp, Address(post(buf, 4))); 2801 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3); 2802 subs(len, len, 4); 2803 br(Assembler::GE, L_by4_loop); 2804 adds(len, len, 4); 2805 br(Assembler::LE, L_exit); 2806 BIND(L_by1_loop); 2807 subs(len, len, 1); 2808 ldrb(tmp, Address(post(buf, 1))); 2809 update_byte_crc32(crc, tmp, table0); 2810 br(Assembler::GT, L_by1_loop); 2811 b(L_exit); 2812 2813 align(CodeEntryAlignment); 2814 BIND(L_by16_loop); 2815 subs(len, len, 16); 2816 ldp(tmp, tmp3, Address(post(buf, 16))); 2817 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); 2818 update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); 2819 update_word_crc32(crc, tmp3, tmp2, table0, table1, table2, table3, false); 2820 update_word_crc32(crc, tmp3, tmp2, table0, table1, table2, table3, true); 2821 br(Assembler::GE, L_by16_loop); 2822 adds(len, len, 16-4); 2823 br(Assembler::GE, L_by4_loop); 2824 adds(len, len, 4); 2825 br(Assembler::GT, L_by1_loop); 2826 BIND(L_exit); 2827 ornw(crc, zr, crc); 2828 } 2829 2830 SkipIfEqual::SkipIfEqual( 2831 MacroAssembler* masm, const bool* flag_addr, bool value) { 2832 _masm = masm; 2833 unsigned long offset; 2834 _masm->adrp(rscratch1, ExternalAddress((address)flag_addr), offset); 2835 _masm->ldrb(rscratch1, Address(rscratch1, offset)); 2836 _masm->cbzw(rscratch1, _label); 2837 } 2838 2839 SkipIfEqual::~SkipIfEqual() { 2840 _masm->bind(_label); 2841 } 2842 2843 void MacroAssembler::cmpptr(Register src1, Address src2) { 2844 unsigned long offset; 2845 adrp(rscratch1, src2, offset); 2846 ldr(rscratch1, Address(rscratch1, offset)); 2847 cmp(src1, rscratch1); 2848 } 2849 2850 void MacroAssembler::store_check(Register obj) { 2851 // Does a store check for the oop in register obj. The content of 2852 // register obj is destroyed afterwards. 2853 store_check_part_1(obj); 2854 store_check_part_2(obj); 2855 } 2856 2857 void MacroAssembler::store_check(Register obj, Address dst) { 2858 store_check(obj); 2859 } 2860 2861 2862 // split the store check operation so that other instructions can be scheduled inbetween 2863 void MacroAssembler::store_check_part_1(Register obj) { 2864 BarrierSet* bs = Universe::heap()->barrier_set(); 2865 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 2866 lsr(obj, obj, CardTableModRefBS::card_shift); 2867 } 2868 2869 void MacroAssembler::store_check_part_2(Register obj) { 2870 BarrierSet* bs = Universe::heap()->barrier_set(); 2871 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 2872 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 2873 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 2874 2875 // The calculation for byte_map_base is as follows: 2876 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 2877 // So this essentially converts an address to a displacement and 2878 // it will never need to be relocated. 2879 intptr_t disp = (intptr_t) ct->byte_map_base; 2880 mov(rscratch1, disp); 2881 add(rscratch1, rscratch1, obj); 2882 mov(obj, zr); 2883 stlrb(obj, rscratch1); 2884 } 2885 2886 void MacroAssembler::load_klass(Register dst, Register src) { 2887 if (UseCompressedClassPointers) { 2888 ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2889 decode_klass_not_null(dst); 2890 } else { 2891 ldr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 2892 } 2893 } 2894 2895 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) { 2896 if (UseCompressedClassPointers) { 2897 ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); 2898 if (Universe::narrow_klass_base() == NULL) { 2899 cmp(trial_klass, tmp, LSL, Universe::narrow_klass_shift()); 2900 return; 2901 } else if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 2902 && Universe::narrow_klass_shift() == 0) { 2903 // Only the bottom 32 bits matter 2904 cmpw(trial_klass, tmp); 2905 return; 2906 } 2907 decode_klass_not_null(tmp); 2908 } else { 2909 ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); 2910 } 2911 cmp(trial_klass, tmp); 2912 } 2913 2914 void MacroAssembler::load_prototype_header(Register dst, Register src) { 2915 load_klass(dst, src); 2916 ldr(dst, Address(dst, Klass::prototype_header_offset())); 2917 } 2918 2919 void MacroAssembler::store_klass(Register dst, Register src) { 2920 // FIXME: Should this be a store release? concurrent gcs assumes 2921 // klass length is valid if klass field is not null. 2922 if (UseCompressedClassPointers) { 2923 encode_klass_not_null(src); 2924 strw(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2925 } else { 2926 str(src, Address(dst, oopDesc::klass_offset_in_bytes())); 2927 } 2928 } 2929 2930 void MacroAssembler::store_klass_gap(Register dst, Register src) { 2931 if (UseCompressedClassPointers) { 2932 // Store to klass gap in destination 2933 strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); 2934 } 2935 } 2936 2937 // Algorithm must match oop.inline.hpp encode_heap_oop. 2938 void MacroAssembler::encode_heap_oop(Register d, Register s) { 2939 #ifdef ASSERT 2940 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 2941 #endif 2942 verify_oop(s, "broken oop in encode_heap_oop"); 2943 if (Universe::narrow_oop_base() == NULL) { 2944 if (Universe::narrow_oop_shift() != 0) { 2945 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2946 lsr(d, s, LogMinObjAlignmentInBytes); 2947 } else { 2948 mov(d, s); 2949 } 2950 } else { 2951 subs(d, s, rheapbase); 2952 csel(d, d, zr, Assembler::HS); 2953 lsr(d, d, LogMinObjAlignmentInBytes); 2954 2955 /* Old algorithm: is this any worse? 2956 Label nonnull; 2957 cbnz(r, nonnull); 2958 sub(r, r, rheapbase); 2959 bind(nonnull); 2960 lsr(r, r, LogMinObjAlignmentInBytes); 2961 */ 2962 } 2963 } 2964 2965 void MacroAssembler::encode_heap_oop_not_null(Register r) { 2966 #ifdef ASSERT 2967 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 2968 if (CheckCompressedOops) { 2969 Label ok; 2970 cbnz(r, ok); 2971 stop("null oop passed to encode_heap_oop_not_null"); 2972 bind(ok); 2973 } 2974 #endif 2975 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 2976 if (Universe::narrow_oop_base() != NULL) { 2977 sub(r, r, rheapbase); 2978 } 2979 if (Universe::narrow_oop_shift() != 0) { 2980 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 2981 lsr(r, r, LogMinObjAlignmentInBytes); 2982 } 2983 } 2984 2985 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 2986 #ifdef ASSERT 2987 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 2988 if (CheckCompressedOops) { 2989 Label ok; 2990 cbnz(src, ok); 2991 stop("null oop passed to encode_heap_oop_not_null2"); 2992 bind(ok); 2993 } 2994 #endif 2995 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 2996 2997 Register data = src; 2998 if (Universe::narrow_oop_base() != NULL) { 2999 sub(dst, src, rheapbase); 3000 data = dst; 3001 } 3002 if (Universe::narrow_oop_shift() != 0) { 3003 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 3004 lsr(dst, data, LogMinObjAlignmentInBytes); 3005 data = dst; 3006 } 3007 if (data == src) 3008 mov(dst, src); 3009 } 3010 3011 void MacroAssembler::decode_heap_oop(Register d, Register s) { 3012 #ifdef ASSERT 3013 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 3014 #endif 3015 if (Universe::narrow_oop_base() == NULL) { 3016 if (Universe::narrow_oop_shift() != 0 || d != s) { 3017 lsl(d, s, Universe::narrow_oop_shift()); 3018 } 3019 } else { 3020 Label done; 3021 if (d != s) 3022 mov(d, s); 3023 cbz(s, done); 3024 add(d, rheapbase, s, Assembler::LSL, LogMinObjAlignmentInBytes); 3025 bind(done); 3026 } 3027 verify_oop(d, "broken oop in decode_heap_oop"); 3028 } 3029 3030 void MacroAssembler::decode_heap_oop_not_null(Register r) { 3031 assert (UseCompressedOops, "should only be used for compressed headers"); 3032 assert (Universe::heap() != NULL, "java heap should be initialized"); 3033 // Cannot assert, unverified entry point counts instructions (see .ad file) 3034 // vtableStubs also counts instructions in pd_code_size_limit. 3035 // Also do not verify_oop as this is called by verify_oop. 3036 if (Universe::narrow_oop_shift() != 0) { 3037 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 3038 if (Universe::narrow_oop_base() != NULL) { 3039 add(r, rheapbase, r, Assembler::LSL, LogMinObjAlignmentInBytes); 3040 } else { 3041 add(r, zr, r, Assembler::LSL, LogMinObjAlignmentInBytes); 3042 } 3043 } else { 3044 assert (Universe::narrow_oop_base() == NULL, "sanity"); 3045 } 3046 } 3047 3048 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 3049 assert (UseCompressedOops, "should only be used for compressed headers"); 3050 assert (Universe::heap() != NULL, "java heap should be initialized"); 3051 // Cannot assert, unverified entry point counts instructions (see .ad file) 3052 // vtableStubs also counts instructions in pd_code_size_limit. 3053 // Also do not verify_oop as this is called by verify_oop. 3054 if (Universe::narrow_oop_shift() != 0) { 3055 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 3056 if (Universe::narrow_oop_base() != NULL) { 3057 add(dst, rheapbase, src, Assembler::LSL, LogMinObjAlignmentInBytes); 3058 } else { 3059 add(dst, zr, src, Assembler::LSL, LogMinObjAlignmentInBytes); 3060 } 3061 } else { 3062 assert (Universe::narrow_oop_base() == NULL, "sanity"); 3063 if (dst != src) { 3064 mov(dst, src); 3065 } 3066 } 3067 } 3068 3069 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 3070 if (Universe::narrow_klass_base() == NULL) { 3071 if (Universe::narrow_klass_shift() != 0) { 3072 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3073 lsr(dst, src, LogKlassAlignmentInBytes); 3074 } else { 3075 if (dst != src) mov(dst, src); 3076 } 3077 return; 3078 } 3079 3080 if (use_XOR_for_compressed_class_base) { 3081 if (Universe::narrow_klass_shift() != 0) { 3082 eor(dst, src, (uint64_t)Universe::narrow_klass_base()); 3083 lsr(dst, dst, LogKlassAlignmentInBytes); 3084 } else { 3085 eor(dst, src, (uint64_t)Universe::narrow_klass_base()); 3086 } 3087 return; 3088 } 3089 3090 if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 3091 && Universe::narrow_klass_shift() == 0) { 3092 movw(dst, src); 3093 return; 3094 } 3095 3096 #ifdef ASSERT 3097 verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); 3098 #endif 3099 3100 Register rbase = dst; 3101 if (dst == src) rbase = rheapbase; 3102 mov(rbase, (uint64_t)Universe::narrow_klass_base()); 3103 sub(dst, src, rbase); 3104 if (Universe::narrow_klass_shift() != 0) { 3105 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3106 lsr(dst, dst, LogKlassAlignmentInBytes); 3107 } 3108 if (dst == src) reinit_heapbase(); 3109 } 3110 3111 void MacroAssembler::encode_klass_not_null(Register r) { 3112 encode_klass_not_null(r, r); 3113 } 3114 3115 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 3116 Register rbase = dst; 3117 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 3118 3119 if (Universe::narrow_klass_base() == NULL) { 3120 if (Universe::narrow_klass_shift() != 0) { 3121 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3122 lsl(dst, src, LogKlassAlignmentInBytes); 3123 } else { 3124 if (dst != src) mov(dst, src); 3125 } 3126 return; 3127 } 3128 3129 if (use_XOR_for_compressed_class_base) { 3130 if (Universe::narrow_klass_shift() != 0) { 3131 lsl(dst, src, LogKlassAlignmentInBytes); 3132 eor(dst, dst, (uint64_t)Universe::narrow_klass_base()); 3133 } else { 3134 eor(dst, src, (uint64_t)Universe::narrow_klass_base()); 3135 } 3136 return; 3137 } 3138 3139 if (((uint64_t)Universe::narrow_klass_base() & 0xffffffff) == 0 3140 && Universe::narrow_klass_shift() == 0) { 3141 if (dst != src) 3142 movw(dst, src); 3143 movk(dst, (uint64_t)Universe::narrow_klass_base() >> 32, 32); 3144 return; 3145 } 3146 3147 // Cannot assert, unverified entry point counts instructions (see .ad file) 3148 // vtableStubs also counts instructions in pd_code_size_limit. 3149 // Also do not verify_oop as this is called by verify_oop. 3150 if (dst == src) rbase = rheapbase; 3151 mov(rbase, (uint64_t)Universe::narrow_klass_base()); 3152 if (Universe::narrow_klass_shift() != 0) { 3153 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 3154 add(dst, rbase, src, Assembler::LSL, LogKlassAlignmentInBytes); 3155 } else { 3156 add(dst, rbase, src); 3157 } 3158 if (dst == src) reinit_heapbase(); 3159 } 3160 3161 void MacroAssembler::decode_klass_not_null(Register r) { 3162 decode_klass_not_null(r, r); 3163 } 3164 3165 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 3166 assert (UseCompressedOops, "should only be used for compressed oops"); 3167 assert (Universe::heap() != NULL, "java heap should be initialized"); 3168 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3169 3170 int oop_index = oop_recorder()->find_index(obj); 3171 assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); 3172 3173 InstructionMark im(this); 3174 RelocationHolder rspec = oop_Relocation::spec(oop_index); 3175 code_section()->relocate(inst_mark(), rspec); 3176 movz(dst, 0xDEAD, 16); 3177 movk(dst, 0xBEEF); 3178 } 3179 3180 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 3181 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 3182 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3183 int index = oop_recorder()->find_index(k); 3184 assert(! Universe::heap()->is_in_reserved(k), "should not be an oop"); 3185 3186 InstructionMark im(this); 3187 RelocationHolder rspec = metadata_Relocation::spec(index); 3188 code_section()->relocate(inst_mark(), rspec); 3189 narrowKlass nk = Klass::encode_klass(k); 3190 movz(dst, (nk >> 16), 16); 3191 movk(dst, nk & 0xffff); 3192 } 3193 3194 void MacroAssembler::load_heap_oop(Register dst, Address src) 3195 { 3196 if (UseCompressedOops) { 3197 ldrw(dst, src); 3198 decode_heap_oop(dst); 3199 } else { 3200 ldr(dst, src); 3201 } 3202 } 3203 3204 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) 3205 { 3206 if (UseCompressedOops) { 3207 ldrw(dst, src); 3208 decode_heap_oop_not_null(dst); 3209 } else { 3210 ldr(dst, src); 3211 } 3212 } 3213 3214 void MacroAssembler::store_heap_oop(Address dst, Register src) { 3215 if (UseCompressedOops) { 3216 assert(!dst.uses(src), "not enough registers"); 3217 encode_heap_oop(src); 3218 strw(src, dst); 3219 } else 3220 str(src, dst); 3221 } 3222 3223 // Used for storing NULLs. 3224 void MacroAssembler::store_heap_oop_null(Address dst) { 3225 if (UseCompressedOops) { 3226 strw(zr, dst); 3227 } else 3228 str(zr, dst); 3229 } 3230 3231 #if INCLUDE_ALL_GCS 3232 void MacroAssembler::g1_write_barrier_pre(Register obj, 3233 Register pre_val, 3234 Register thread, 3235 Register tmp, 3236 bool tosca_live, 3237 bool expand_call) { 3238 // If expand_call is true then we expand the call_VM_leaf macro 3239 // directly to skip generating the check by 3240 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 3241 3242 #ifdef _LP64 3243 assert(thread == rthread, "must be"); 3244 #endif // _LP64 3245 3246 Label done; 3247 Label runtime; 3248 3249 assert(pre_val != noreg, "check this code"); 3250 3251 if (obj != noreg) 3252 assert_different_registers(obj, pre_val, tmp); 3253 3254 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3255 PtrQueue::byte_offset_of_active())); 3256 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3257 PtrQueue::byte_offset_of_index())); 3258 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3259 PtrQueue::byte_offset_of_buf())); 3260 3261 3262 // Is marking active? 3263 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 3264 ldrw(tmp, in_progress); 3265 } else { 3266 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 3267 ldrb(tmp, in_progress); 3268 } 3269 cbzw(tmp, done); 3270 3271 // Do we need to load the previous value? 3272 if (obj != noreg) { 3273 load_heap_oop(pre_val, Address(obj, 0)); 3274 } 3275 3276 // Is the previous value null? 3277 cbz(pre_val, done); 3278 3279 // Can we store original value in the thread's buffer? 3280 // Is index == 0? 3281 // (The index field is typed as size_t.) 3282 3283 ldr(tmp, index); // tmp := *index_adr 3284 cbz(tmp, runtime); // tmp == 0? 3285 // If yes, goto runtime 3286 3287 sub(tmp, tmp, wordSize); // tmp := tmp - wordSize 3288 str(tmp, index); // *index_adr := tmp 3289 ldr(rscratch1, buffer); 3290 add(tmp, tmp, rscratch1); // tmp := tmp + *buffer_adr 3291 3292 // Record the previous value 3293 str(pre_val, Address(tmp, 0)); 3294 b(done); 3295 3296 bind(runtime); 3297 // save the live input values 3298 push(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp); 3299 3300 // Calling the runtime using the regular call_VM_leaf mechanism generates 3301 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 3302 // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. 3303 // 3304 // If we care generating the pre-barrier without a frame (e.g. in the 3305 // intrinsified Reference.get() routine) then ebp might be pointing to 3306 // the caller frame and so this check will most likely fail at runtime. 3307 // 3308 // Expanding the call directly bypasses the generation of the check. 3309 // So when we do not have have a full interpreter frame on the stack 3310 // expand_call should be passed true. 3311 3312 if (expand_call) { 3313 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 3314 pass_arg1(this, thread); 3315 pass_arg0(this, pre_val); 3316 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 3317 } else { 3318 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 3319 } 3320 3321 pop(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp); 3322 3323 bind(done); 3324 } 3325 3326 void MacroAssembler::g1_write_barrier_post(Register store_addr, 3327 Register new_val, 3328 Register thread, 3329 Register tmp, 3330 Register tmp2) { 3331 #ifdef _LP64 3332 assert(thread == rthread, "must be"); 3333 #endif // _LP64 3334 3335 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 3336 PtrQueue::byte_offset_of_index())); 3337 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 3338 PtrQueue::byte_offset_of_buf())); 3339 3340 BarrierSet* bs = Universe::heap()->barrier_set(); 3341 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 3342 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 3343 3344 Label done; 3345 Label runtime; 3346 3347 // Does store cross heap regions? 3348 3349 eor(tmp, store_addr, new_val); 3350 lsr(tmp, tmp, HeapRegion::LogOfHRGrainBytes); 3351 cbz(tmp, done); 3352 3353 // crosses regions, storing NULL? 3354 3355 cbz(new_val, done); 3356 3357 // storing region crossing non-NULL, is card already dirty? 3358 3359 ExternalAddress cardtable((address) ct->byte_map_base); 3360 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 3361 const Register card_addr = tmp; 3362 3363 lsr(card_addr, store_addr, CardTableModRefBS::card_shift); 3364 3365 unsigned long offset; 3366 adrp(tmp2, cardtable, offset); 3367 3368 // get the address of the card 3369 add(card_addr, card_addr, tmp2); 3370 ldrb(tmp2, Address(card_addr, offset)); 3371 cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val()); 3372 br(Assembler::EQ, done); 3373 3374 assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0"); 3375 3376 membar(Assembler::Assembler::StoreLoad); 3377 3378 ldrb(tmp2, Address(card_addr, offset)); 3379 cbzw(tmp2, done); 3380 3381 // storing a region crossing, non-NULL oop, card is clean. 3382 // dirty card and log. 3383 3384 strb(zr, Address(card_addr, offset)); 3385 3386 ldr(rscratch1, queue_index); 3387 cbz(rscratch1, runtime); 3388 sub(rscratch1, rscratch1, wordSize); 3389 str(rscratch1, queue_index); 3390 3391 ldr(tmp2, buffer); 3392 str(card_addr, Address(tmp2, rscratch1)); 3393 b(done); 3394 3395 bind(runtime); 3396 // save the live input values 3397 push(store_addr->bit(true) | new_val->bit(true), sp); 3398 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 3399 pop(store_addr->bit(true) | new_val->bit(true), sp); 3400 3401 bind(done); 3402 } 3403 3404 #endif // INCLUDE_ALL_GCS 3405 3406 Address MacroAssembler::allocate_metadata_address(Metadata* obj) { 3407 assert(oop_recorder() != NULL, "this assembler needs a Recorder"); 3408 int index = oop_recorder()->allocate_metadata_index(obj); 3409 RelocationHolder rspec = metadata_Relocation::spec(index); 3410 return Address((address)obj, rspec); 3411 } 3412 3413 // Move an oop into a register. immediate is true if we want 3414 // immediate instrcutions, i.e. we are not going to patch this 3415 // instruction while the code is being executed by another thread. In 3416 // that case we can use move immediates rather than the constant pool. 3417 void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { 3418 int oop_index; 3419 if (obj == NULL) { 3420 oop_index = oop_recorder()->allocate_oop_index(obj); 3421 } else { 3422 oop_index = oop_recorder()->find_index(obj); 3423 assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); 3424 } 3425 RelocationHolder rspec = oop_Relocation::spec(oop_index); 3426 if (! immediate) { 3427 address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address 3428 ldr_constant(dst, Address(dummy, rspec)); 3429 } else 3430 mov(dst, Address((address)obj, rspec)); 3431 } 3432 3433 // Move a metadata address into a register. 3434 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 3435 int oop_index; 3436 if (obj == NULL) { 3437 oop_index = oop_recorder()->allocate_metadata_index(obj); 3438 } else { 3439 oop_index = oop_recorder()->find_index(obj); 3440 } 3441 RelocationHolder rspec = metadata_Relocation::spec(oop_index); 3442 mov(dst, Address((address)obj, rspec)); 3443 } 3444 3445 Address MacroAssembler::constant_oop_address(jobject obj) { 3446 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 3447 assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "not an oop"); 3448 int oop_index = oop_recorder()->find_index(obj); 3449 return Address((address)obj, oop_Relocation::spec(oop_index)); 3450 } 3451 3452 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 3453 void MacroAssembler::tlab_allocate(Register obj, 3454 Register var_size_in_bytes, 3455 int con_size_in_bytes, 3456 Register t1, 3457 Register t2, 3458 Label& slow_case) { 3459 assert_different_registers(obj, t2); 3460 assert_different_registers(obj, var_size_in_bytes); 3461 Register end = t2; 3462 3463 // verify_tlab(); 3464 3465 ldr(obj, Address(rthread, JavaThread::tlab_top_offset())); 3466 if (var_size_in_bytes == noreg) { 3467 lea(end, Address(obj, con_size_in_bytes)); 3468 } else { 3469 lea(end, Address(obj, var_size_in_bytes)); 3470 } 3471 ldr(rscratch1, Address(rthread, JavaThread::tlab_end_offset())); 3472 cmp(end, rscratch1); 3473 br(Assembler::HI, slow_case); 3474 3475 // update the tlab top pointer 3476 str(end, Address(rthread, JavaThread::tlab_top_offset())); 3477 3478 // recover var_size_in_bytes if necessary 3479 if (var_size_in_bytes == end) { 3480 sub(var_size_in_bytes, var_size_in_bytes, obj); 3481 } 3482 // verify_tlab(); 3483 } 3484 3485 // Preserves r19, and r3. 3486 Register MacroAssembler::tlab_refill(Label& retry, 3487 Label& try_eden, 3488 Label& slow_case) { 3489 Register top = r0; 3490 Register t1 = r2; 3491 Register t2 = r4; 3492 assert_different_registers(top, rthread, t1, t2, /* preserve: */ r19, r3); 3493 Label do_refill, discard_tlab; 3494 3495 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 3496 // No allocation in the shared eden. 3497 b(slow_case); 3498 } 3499 3500 ldr(top, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); 3501 ldr(t1, Address(rthread, in_bytes(JavaThread::tlab_end_offset()))); 3502 3503 // calculate amount of free space 3504 sub(t1, t1, top); 3505 lsr(t1, t1, LogHeapWordSize); 3506 3507 // Retain tlab and allocate object in shared space if 3508 // the amount free in the tlab is too large to discard. 3509 3510 ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 3511 cmp(t1, rscratch1); 3512 br(Assembler::LE, discard_tlab); 3513 3514 // Retain 3515 // ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 3516 mov(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 3517 add(rscratch1, rscratch1, t2); 3518 str(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 3519 3520 if (TLABStats) { 3521 // increment number of slow_allocations 3522 addmw(Address(rthread, in_bytes(JavaThread::tlab_slow_allocations_offset())), 3523 1, rscratch1); 3524 } 3525 b(try_eden); 3526 3527 bind(discard_tlab); 3528 if (TLABStats) { 3529 // increment number of refills 3530 addmw(Address(rthread, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1, 3531 rscratch1); 3532 // accumulate wastage -- t1 is amount free in tlab 3533 addmw(Address(rthread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1, 3534 rscratch1); 3535 } 3536 3537 // if tlab is currently allocated (top or end != null) then 3538 // fill [top, end + alignment_reserve) with array object 3539 cbz(top, do_refill); 3540 3541 // set up the mark word 3542 mov(rscratch1, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 3543 str(rscratch1, Address(top, oopDesc::mark_offset_in_bytes())); 3544 // set the length to the remaining space 3545 sub(t1, t1, typeArrayOopDesc::header_size(T_INT)); 3546 add(t1, t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 3547 lsl(t1, t1, log2_intptr(HeapWordSize/sizeof(jint))); 3548 strw(t1, Address(top, arrayOopDesc::length_offset_in_bytes())); 3549 // set klass to intArrayKlass 3550 { 3551 unsigned long offset; 3552 // dubious reloc why not an oop reloc? 3553 adrp(rscratch1, ExternalAddress((address)Universe::intArrayKlassObj_addr()), 3554 offset); 3555 ldr(t1, Address(rscratch1, offset)); 3556 } 3557 // store klass last. concurrent gcs assumes klass length is valid if 3558 // klass field is not null. 3559 store_klass(top, t1); 3560 3561 mov(t1, top); 3562 ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_start_offset()))); 3563 sub(t1, t1, rscratch1); 3564 incr_allocated_bytes(rthread, t1, 0, rscratch1); 3565 3566 // refill the tlab with an eden allocation 3567 bind(do_refill); 3568 ldr(t1, Address(rthread, in_bytes(JavaThread::tlab_size_offset()))); 3569 lsl(t1, t1, LogHeapWordSize); 3570 // allocate new tlab, address returned in top 3571 eden_allocate(top, t1, 0, t2, slow_case); 3572 3573 // Check that t1 was preserved in eden_allocate. 3574 #ifdef ASSERT 3575 if (UseTLAB) { 3576 Label ok; 3577 Register tsize = r4; 3578 assert_different_registers(tsize, rthread, t1); 3579 str(tsize, Address(pre(sp, -16))); 3580 ldr(tsize, Address(rthread, in_bytes(JavaThread::tlab_size_offset()))); 3581 lsl(tsize, tsize, LogHeapWordSize); 3582 cmp(t1, tsize); 3583 br(Assembler::EQ, ok); 3584 STOP("assert(t1 != tlab size)"); 3585 should_not_reach_here(); 3586 3587 bind(ok); 3588 ldr(tsize, Address(post(sp, 16))); 3589 } 3590 #endif 3591 str(top, Address(rthread, in_bytes(JavaThread::tlab_start_offset()))); 3592 str(top, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); 3593 add(top, top, t1); 3594 sub(top, top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 3595 str(top, Address(rthread, in_bytes(JavaThread::tlab_end_offset()))); 3596 verify_tlab(); 3597 b(retry); 3598 3599 return rthread; // for use by caller 3600 } 3601 3602 // Defines obj, preserves var_size_in_bytes 3603 void MacroAssembler::eden_allocate(Register obj, 3604 Register var_size_in_bytes, 3605 int con_size_in_bytes, 3606 Register t1, 3607 Label& slow_case) { 3608 assert_different_registers(obj, var_size_in_bytes, t1); 3609 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 3610 b(slow_case); 3611 } else { 3612 Register end = t1; 3613 Register heap_end = rscratch2; 3614 Label retry; 3615 bind(retry); 3616 { 3617 unsigned long offset; 3618 adrp(rscratch1, ExternalAddress((address) Universe::heap()->end_addr()), offset); 3619 ldr(heap_end, Address(rscratch1, offset)); 3620 } 3621 3622 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 3623 3624 // Get the current top of the heap 3625 { 3626 unsigned long offset; 3627 adrp(rscratch1, heap_top, offset); 3628 // Use add() here after ARDP, rather than lea(). 3629 // lea() does not generate anything if its offset is zero. 3630 // However, relocs expect to find either an ADD or a load/store 3631 // insn after an ADRP. add() always generates an ADD insn, even 3632 // for add(Rn, Rn, 0). 3633 add(rscratch1, rscratch1, offset); 3634 ldaxr(obj, rscratch1); 3635 } 3636 3637 // Adjust it my the size of our new object 3638 if (var_size_in_bytes == noreg) { 3639 lea(end, Address(obj, con_size_in_bytes)); 3640 } else { 3641 lea(end, Address(obj, var_size_in_bytes)); 3642 } 3643 3644 // if end < obj then we wrapped around high memory 3645 cmp(end, obj); 3646 br(Assembler::LO, slow_case); 3647 3648 cmp(end, heap_end); 3649 br(Assembler::HI, slow_case); 3650 3651 // If heap_top hasn't been changed by some other thread, update it. 3652 stlxr(rscratch1, end, rscratch1); 3653 cbnzw(rscratch1, retry); 3654 } 3655 } 3656 3657 void MacroAssembler::verify_tlab() { 3658 #ifdef ASSERT 3659 if (UseTLAB && VerifyOops) { 3660 Label next, ok; 3661 3662 stp(rscratch2, rscratch1, Address(pre(sp, -16))); 3663 3664 ldr(rscratch2, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); 3665 ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_start_offset()))); 3666 cmp(rscratch2, rscratch1); 3667 br(Assembler::HS, next); 3668 STOP("assert(top >= start)"); 3669 should_not_reach_here(); 3670 3671 bind(next); 3672 ldr(rscratch2, Address(rthread, in_bytes(JavaThread::tlab_end_offset()))); 3673 ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); 3674 cmp(rscratch2, rscratch1); 3675 br(Assembler::HS, ok); 3676 STOP("assert(top <= end)"); 3677 should_not_reach_here(); 3678 3679 bind(ok); 3680 ldp(rscratch2, rscratch1, Address(post(sp, 16))); 3681 } 3682 #endif 3683 } 3684 3685 // Writes to stack successive pages until offset reached to check for 3686 // stack overflow + shadow pages. This clobbers tmp. 3687 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 3688 assert_different_registers(tmp, size, rscratch1); 3689 mov(tmp, sp); 3690 // Bang stack for total size given plus shadow page size. 3691 // Bang one page at a time because large size can bang beyond yellow and 3692 // red zones. 3693 Label loop; 3694 mov(rscratch1, os::vm_page_size()); 3695 bind(loop); 3696 lea(tmp, Address(tmp, -os::vm_page_size())); 3697 subsw(size, size, rscratch1); 3698 str(size, Address(tmp)); 3699 br(Assembler::GT, loop); 3700 3701 // Bang down shadow pages too. 3702 // The -1 because we already subtracted 1 page. 3703 for (int i = 0; i< StackShadowPages-1; i++) { 3704 // this could be any sized move but this is can be a debugging crumb 3705 // so the bigger the better. 3706 lea(tmp, Address(tmp, -os::vm_page_size())); 3707 str(size, Address(tmp)); 3708 } 3709 } 3710 3711 3712 address MacroAssembler::read_polling_page(Register r, address page, relocInfo::relocType rtype) { 3713 unsigned long off; 3714 adrp(r, Address(page, rtype), off); 3715 InstructionMark im(this); 3716 code_section()->relocate(inst_mark(), rtype); 3717 ldrw(zr, Address(r, off)); 3718 return inst_mark(); 3719 } 3720 3721 address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) { 3722 InstructionMark im(this); 3723 code_section()->relocate(inst_mark(), rtype); 3724 ldrw(zr, Address(r, 0)); 3725 return inst_mark(); 3726 } 3727 3728 void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) { 3729 relocInfo::relocType rtype = dest.rspec().reloc()->type(); 3730 if (uabs(pc() - dest.target()) >= (1LL << 32)) { 3731 guarantee(rtype == relocInfo::none 3732 || rtype == relocInfo::external_word_type 3733 || rtype == relocInfo::poll_type 3734 || rtype == relocInfo::poll_return_type, 3735 "can only use a fixed address with an ADRP"); 3736 // Out of range. This doesn't happen very often, but we have to 3737 // handle it 3738 mov(reg1, dest); 3739 byte_offset = 0; 3740 } else { 3741 InstructionMark im(this); 3742 code_section()->relocate(inst_mark(), dest.rspec()); 3743 byte_offset = (uint64_t)dest.target() & 0xfff; 3744 _adrp(reg1, dest.target()); 3745 } 3746 } 3747 3748 bool MacroAssembler::use_acq_rel_for_volatile_fields() { 3749 #ifdef PRODUCT 3750 return false; 3751 #else 3752 return UseAcqRelForVolatileFields; 3753 #endif 3754 } 3755 3756 void MacroAssembler::build_frame(int framesize) { 3757 if (framesize == 0) { 3758 // Is this even possible? 3759 stp(rfp, lr, Address(pre(sp, -2 * wordSize))); 3760 } else if (framesize < ((1 << 9) + 2 * wordSize)) { 3761 sub(sp, sp, framesize); 3762 stp(rfp, lr, Address(sp, framesize - 2 * wordSize)); 3763 } else { 3764 stp(rfp, lr, Address(pre(sp, -2 * wordSize))); 3765 if (framesize < ((1 << 12) + 2 * wordSize)) 3766 sub(sp, sp, framesize - 2 * wordSize); 3767 else { 3768 mov(rscratch1, framesize - 2 * wordSize); 3769 sub(sp, sp, rscratch1); 3770 } 3771 } 3772 } 3773 3774 void MacroAssembler::remove_frame(int framesize) { 3775 if (framesize == 0) { 3776 ldp(rfp, lr, Address(post(sp, 2 * wordSize))); 3777 } else if (framesize < ((1 << 9) + 2 * wordSize)) { 3778 ldp(rfp, lr, Address(sp, framesize - 2 * wordSize)); 3779 add(sp, sp, framesize); 3780 } else { 3781 if (framesize < ((1 << 12) + 2 * wordSize)) 3782 add(sp, sp, framesize - 2 * wordSize); 3783 else { 3784 mov(rscratch1, framesize - 2 * wordSize); 3785 add(sp, sp, rscratch1); 3786 } 3787 ldp(rfp, lr, Address(post(sp, 2 * wordSize))); 3788 } 3789 } 3790 3791 // Search for str1 in str2 and return index or -1 3792 void MacroAssembler::string_indexof(Register str2, Register str1, 3793 Register cnt2, Register cnt1, 3794 Register tmp1, Register tmp2, 3795 Register tmp3, Register tmp4, 3796 int icnt1, Register result) { 3797 Label BM, LINEARSEARCH, DONE, NOMATCH, MATCH; 3798 3799 Register ch1 = rscratch1; 3800 Register ch2 = rscratch2; 3801 Register cnt1tmp = tmp1; 3802 Register cnt2tmp = tmp2; 3803 Register cnt1_neg = cnt1; 3804 Register cnt2_neg = cnt2; 3805 Register result_tmp = tmp4; 3806 3807 // Note, inline_string_indexOf() generates checks: 3808 // if (substr.count > string.count) return -1; 3809 // if (substr.count == 0) return 0; 3810 3811 // We have two strings, a source string in str2, cnt2 and a pattern string 3812 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1. 3813 3814 // For larger pattern and source we use a simplified Boyer Moore algorithm. 3815 // With a small pattern and source we use linear scan. 3816 3817 if (icnt1 == -1) { 3818 cmp(cnt1, 256); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256 3819 ccmp(cnt1, 8, 0b0000, LO); // Can't handle skip >= 256 because we use 3820 br(LO, LINEARSEARCH); // a byte array. 3821 cmp(cnt1, cnt2, LSR, 2); // Source must be 4 * pattern for BM 3822 br(HS, LINEARSEARCH); 3823 } 3824 3825 // The Boyer Moore alogorithm is based on the description here:- 3826 // 3827 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm 3828 // 3829 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule 3830 // and the 'Good Suffix' rule. 3831 // 3832 // These rules are essentially heuristics for how far we can shift the 3833 // pattern along the search string. 3834 // 3835 // The implementation here uses the 'Bad Character' rule only because of the 3836 // complexity of initialisation for the 'Good Suffix' rule. 3837 // 3838 // This is also known as the Boyer-Moore-Horspool algorithm:- 3839 // 3840 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm 3841 // 3842 // #define ASIZE 128 3843 // 3844 // int bm(unsigned char *x, int m, unsigned char *y, int n) { 3845 // int i, j; 3846 // unsigned c; 3847 // unsigned char bc[ASIZE]; 3848 // 3849 // /* Preprocessing */ 3850 // for (i = 0; i < ASIZE; ++i) 3851 // bc[i] = 0; 3852 // for (i = 0; i < m - 1; ) { 3853 // c = x[i]; 3854 // ++i; 3855 // if (c < ASIZE) bc[c] = i; 3856 // } 3857 // 3858 // /* Searching */ 3859 // j = 0; 3860 // while (j <= n - m) { 3861 // c = y[i+j]; 3862 // if (x[m-1] == c) 3863 // for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i); 3864 // if (i < 0) return j; 3865 // if (c < ASIZE) 3866 // j = j - bc[y[j+m-1]] + m; 3867 // else 3868 // j += 1; // Advance by 1 only if char >= ASIZE 3869 // } 3870 // } 3871 3872 if (icnt1 == -1) { 3873 BIND(BM); 3874 3875 Label ZLOOP, BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP; 3876 Label BMADV, BMMATCH, BMCHECKEND; 3877 3878 Register cnt1end = tmp2; 3879 Register str2end = cnt2; 3880 Register skipch = tmp2; 3881 3882 // Restrict ASIZE to 128 to reduce stack space/initialisation. 3883 // The presence of chars >= ASIZE in the target string does not affect 3884 // performance, but we must be careful not to initialise them in the stack 3885 // array. 3886 // The presence of chars >= ASIZE in the source string may adversely affect 3887 // performance since we can only advance by one when we encounter one. 3888 3889 stp(zr, zr, pre(sp, -128)); 3890 for (int i = 1; i < 8; i++) 3891 stp(zr, zr, Address(sp, i*16)); 3892 3893 mov(cnt1tmp, 0); 3894 sub(cnt1end, cnt1, 1); 3895 BIND(BCLOOP); 3896 ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); 3897 cmp(ch1, 128); 3898 add(cnt1tmp, cnt1tmp, 1); 3899 br(HS, BCSKIP); 3900 strb(cnt1tmp, Address(sp, ch1)); 3901 BIND(BCSKIP); 3902 cmp(cnt1tmp, cnt1end); 3903 br(LT, BCLOOP); 3904 3905 mov(result_tmp, str2); 3906 3907 sub(cnt2, cnt2, cnt1); 3908 add(str2end, str2, cnt2, LSL, 1); 3909 BIND(BMLOOPSTR2); 3910 sub(cnt1tmp, cnt1, 1); 3911 ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); 3912 ldrh(skipch, Address(str2, cnt1tmp, Address::lsl(1))); 3913 cmp(ch1, skipch); 3914 br(NE, BMSKIP); 3915 subs(cnt1tmp, cnt1tmp, 1); 3916 br(LT, BMMATCH); 3917 BIND(BMLOOPSTR1); 3918 ldrh(ch1, Address(str1, cnt1tmp, Address::lsl(1))); 3919 ldrh(ch2, Address(str2, cnt1tmp, Address::lsl(1))); 3920 cmp(ch1, ch2); 3921 br(NE, BMSKIP); 3922 subs(cnt1tmp, cnt1tmp, 1); 3923 br(GE, BMLOOPSTR1); 3924 BIND(BMMATCH); 3925 sub(result_tmp, str2, result_tmp); 3926 lsr(result, result_tmp, 1); 3927 add(sp, sp, 128); 3928 b(DONE); 3929 BIND(BMADV); 3930 add(str2, str2, 2); 3931 b(BMCHECKEND); 3932 BIND(BMSKIP); 3933 cmp(skipch, 128); 3934 br(HS, BMADV); 3935 ldrb(ch2, Address(sp, skipch)); 3936 add(str2, str2, cnt1, LSL, 1); 3937 sub(str2, str2, ch2, LSL, 1); 3938 BIND(BMCHECKEND); 3939 cmp(str2, str2end); 3940 br(LE, BMLOOPSTR2); 3941 add(sp, sp, 128); 3942 b(NOMATCH); 3943 } 3944 3945 BIND(LINEARSEARCH); 3946 { 3947 Label DO1, DO2, DO3; 3948 3949 Register str2tmp = tmp2; 3950 Register first = tmp3; 3951 3952 if (icnt1 == -1) 3953 { 3954 Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT, LAST_WORD; 3955 3956 cmp(cnt1, 4); 3957 br(LT, DOSHORT); 3958 3959 sub(cnt2, cnt2, cnt1); 3960 sub(cnt1, cnt1, 4); 3961 mov(result_tmp, cnt2); 3962 3963 lea(str1, Address(str1, cnt1, Address::uxtw(1))); 3964 lea(str2, Address(str2, cnt2, Address::uxtw(1))); 3965 sub(cnt1_neg, zr, cnt1, LSL, 1); 3966 sub(cnt2_neg, zr, cnt2, LSL, 1); 3967 ldr(first, Address(str1, cnt1_neg)); 3968 3969 BIND(FIRST_LOOP); 3970 ldr(ch2, Address(str2, cnt2_neg)); 3971 cmp(first, ch2); 3972 br(EQ, STR1_LOOP); 3973 BIND(STR2_NEXT); 3974 adds(cnt2_neg, cnt2_neg, 2); 3975 br(LE, FIRST_LOOP); 3976 b(NOMATCH); 3977 3978 BIND(STR1_LOOP); 3979 adds(cnt1tmp, cnt1_neg, 8); 3980 add(cnt2tmp, cnt2_neg, 8); 3981 br(GE, LAST_WORD); 3982 3983 BIND(STR1_NEXT); 3984 ldr(ch1, Address(str1, cnt1tmp)); 3985 ldr(ch2, Address(str2, cnt2tmp)); 3986 cmp(ch1, ch2); 3987 br(NE, STR2_NEXT); 3988 adds(cnt1tmp, cnt1tmp, 8); 3989 add(cnt2tmp, cnt2tmp, 8); 3990 br(LT, STR1_NEXT); 3991 3992 BIND(LAST_WORD); 3993 ldr(ch1, Address(str1)); 3994 sub(str2tmp, str2, cnt1_neg); // adjust to corresponding 3995 ldr(ch2, Address(str2tmp, cnt2_neg)); // word in str2 3996 cmp(ch1, ch2); 3997 br(NE, STR2_NEXT); 3998 b(MATCH); 3999 4000 BIND(DOSHORT); 4001 cmp(cnt1, 2); 4002 br(LT, DO1); 4003 br(GT, DO3); 4004 } 4005 4006 if (icnt1 == 4) { 4007 Label CH1_LOOP; 4008 4009 ldr(ch1, str1); 4010 sub(cnt2, cnt2, 4); 4011 mov(result_tmp, cnt2); 4012 lea(str2, Address(str2, cnt2, Address::uxtw(1))); 4013 sub(cnt2_neg, zr, cnt2, LSL, 1); 4014 4015 BIND(CH1_LOOP); 4016 ldr(ch2, Address(str2, cnt2_neg)); 4017 cmp(ch1, ch2); 4018 br(EQ, MATCH); 4019 adds(cnt2_neg, cnt2_neg, 2); 4020 br(LE, CH1_LOOP); 4021 b(NOMATCH); 4022 } 4023 4024 if (icnt1 == -1 || icnt1 == 2) { 4025 Label CH1_LOOP; 4026 4027 BIND(DO2); 4028 ldrw(ch1, str1); 4029 sub(cnt2, cnt2, 2); 4030 mov(result_tmp, cnt2); 4031 lea(str2, Address(str2, cnt2, Address::uxtw(1))); 4032 sub(cnt2_neg, zr, cnt2, LSL, 1); 4033 4034 BIND(CH1_LOOP); 4035 ldrw(ch2, Address(str2, cnt2_neg)); 4036 cmp(ch1, ch2); 4037 br(EQ, MATCH); 4038 adds(cnt2_neg, cnt2_neg, 2); 4039 br(LE, CH1_LOOP); 4040 b(NOMATCH); 4041 } 4042 4043 if (icnt1 == -1 || icnt1 == 3) { 4044 Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; 4045 4046 BIND(DO3); 4047 ldrw(first, str1); 4048 ldrh(ch1, Address(str1, 4)); 4049 4050 sub(cnt2, cnt2, 3); 4051 mov(result_tmp, cnt2); 4052 lea(str2, Address(str2, cnt2, Address::uxtw(1))); 4053 sub(cnt2_neg, zr, cnt2, LSL, 1); 4054 4055 BIND(FIRST_LOOP); 4056 ldrw(ch2, Address(str2, cnt2_neg)); 4057 cmpw(first, ch2); 4058 br(EQ, STR1_LOOP); 4059 BIND(STR2_NEXT); 4060 adds(cnt2_neg, cnt2_neg, 2); 4061 br(LE, FIRST_LOOP); 4062 b(NOMATCH); 4063 4064 BIND(STR1_LOOP); 4065 add(cnt2tmp, cnt2_neg, 4); 4066 ldrh(ch2, Address(str2, cnt2tmp)); 4067 cmp(ch1, ch2); 4068 br(NE, STR2_NEXT); 4069 b(MATCH); 4070 } 4071 4072 if (icnt1 == -1 || icnt1 == 1) { 4073 Label CH1_LOOP, HAS_ZERO; 4074 Label DO1_SHORT, DO1_LOOP; 4075 4076 BIND(DO1); 4077 ldrh(ch1, str1); 4078 cmp(cnt2, 4); 4079 br(LT, DO1_SHORT); 4080 4081 orr(ch1, ch1, ch1, LSL, 16); 4082 orr(ch1, ch1, ch1, LSL, 32); 4083 4084 sub(cnt2, cnt2, 4); 4085 mov(result_tmp, cnt2); 4086 lea(str2, Address(str2, cnt2, Address::uxtw(1))); 4087 sub(cnt2_neg, zr, cnt2, LSL, 1); 4088 4089 mov(tmp3, 0x0001000100010001); 4090 BIND(CH1_LOOP); 4091 ldr(ch2, Address(str2, cnt2_neg)); 4092 eor(ch2, ch1, ch2); 4093 sub(tmp1, ch2, tmp3); 4094 orr(tmp2, ch2, 0x7fff7fff7fff7fff); 4095 bics(tmp1, tmp1, tmp2); 4096 br(NE, HAS_ZERO); 4097 adds(cnt2_neg, cnt2_neg, 8); 4098 br(LT, CH1_LOOP); 4099 4100 cmp(cnt2_neg, 8); 4101 mov(cnt2_neg, 0); 4102 br(LT, CH1_LOOP); 4103 b(NOMATCH); 4104 4105 BIND(HAS_ZERO); 4106 rev(tmp1, tmp1); 4107 clz(tmp1, tmp1); 4108 add(cnt2_neg, cnt2_neg, tmp1, LSR, 3); 4109 b(MATCH); 4110 4111 BIND(DO1_SHORT); 4112 mov(result_tmp, cnt2); 4113 lea(str2, Address(str2, cnt2, Address::uxtw(1))); 4114 sub(cnt2_neg, zr, cnt2, LSL, 1); 4115 BIND(DO1_LOOP); 4116 ldrh(ch2, Address(str2, cnt2_neg)); 4117 cmpw(ch1, ch2); 4118 br(EQ, MATCH); 4119 adds(cnt2_neg, cnt2_neg, 2); 4120 br(LT, DO1_LOOP); 4121 } 4122 } 4123 BIND(NOMATCH); 4124 mov(result, -1); 4125 b(DONE); 4126 BIND(MATCH); 4127 add(result, result_tmp, cnt2_neg, ASR, 1); 4128 BIND(DONE); 4129 } 4130 4131 // Compare strings. 4132 void MacroAssembler::string_compare(Register str1, Register str2, 4133 Register cnt1, Register cnt2, Register result, 4134 Register tmp1) { 4135 Label LENGTH_DIFF, DONE, SHORT_LOOP, SHORT_STRING, 4136 NEXT_WORD, DIFFERENCE; 4137 4138 BLOCK_COMMENT("string_compare {"); 4139 4140 // Compute the minimum of the string lengths and save the difference. 4141 subsw(tmp1, cnt1, cnt2); 4142 cselw(cnt2, cnt1, cnt2, Assembler::LE); // min 4143 4144 // A very short string 4145 cmpw(cnt2, 4); 4146 br(Assembler::LT, SHORT_STRING); 4147 4148 // Check if the strings start at the same location. 4149 cmp(str1, str2); 4150 br(Assembler::EQ, LENGTH_DIFF); 4151 4152 // Compare longwords 4153 { 4154 subw(cnt2, cnt2, 4); // The last longword is a special case 4155 4156 // Move both string pointers to the last longword of their 4157 // strings, negate the remaining count, and convert it to bytes. 4158 lea(str1, Address(str1, cnt2, Address::uxtw(1))); 4159 lea(str2, Address(str2, cnt2, Address::uxtw(1))); 4160 sub(cnt2, zr, cnt2, LSL, 1); 4161 4162 // Loop, loading longwords and comparing them into rscratch2. 4163 bind(NEXT_WORD); 4164 ldr(result, Address(str1, cnt2)); 4165 ldr(cnt1, Address(str2, cnt2)); 4166 adds(cnt2, cnt2, wordSize); 4167 eor(rscratch2, result, cnt1); 4168 cbnz(rscratch2, DIFFERENCE); 4169 br(Assembler::LT, NEXT_WORD); 4170 4171 // Last longword. In the case where length == 4 we compare the 4172 // same longword twice, but that's still faster than another 4173 // conditional branch. 4174 4175 ldr(result, Address(str1)); 4176 ldr(cnt1, Address(str2)); 4177 eor(rscratch2, result, cnt1); 4178 cbz(rscratch2, LENGTH_DIFF); 4179 4180 // Find the first different characters in the longwords and 4181 // compute their difference. 4182 bind(DIFFERENCE); 4183 rev(rscratch2, rscratch2); 4184 clz(rscratch2, rscratch2); 4185 andr(rscratch2, rscratch2, -16); 4186 lsrv(result, result, rscratch2); 4187 uxthw(result, result); 4188 lsrv(cnt1, cnt1, rscratch2); 4189 uxthw(cnt1, cnt1); 4190 subw(result, result, cnt1); 4191 b(DONE); 4192 } 4193 4194 bind(SHORT_STRING); 4195 // Is the minimum length zero? 4196 cbz(cnt2, LENGTH_DIFF); 4197 4198 bind(SHORT_LOOP); 4199 load_unsigned_short(result, Address(post(str1, 2))); 4200 load_unsigned_short(cnt1, Address(post(str2, 2))); 4201 subw(result, result, cnt1); 4202 cbnz(result, DONE); 4203 sub(cnt2, cnt2, 1); 4204 cbnz(cnt2, SHORT_LOOP); 4205 4206 // Strings are equal up to min length. Return the length difference. 4207 bind(LENGTH_DIFF); 4208 mov(result, tmp1); 4209 4210 // That's it 4211 bind(DONE); 4212 4213 BLOCK_COMMENT("} string_compare"); 4214 } 4215 4216 4217 void MacroAssembler::string_equals(Register str1, Register str2, 4218 Register cnt, Register result, 4219 Register tmp1) { 4220 Label SAME_CHARS, DONE, SHORT_LOOP, SHORT_STRING, 4221 NEXT_WORD; 4222 4223 const Register tmp2 = rscratch1; 4224 assert_different_registers(str1, str2, cnt, result, tmp1, tmp2, rscratch2); 4225 4226 BLOCK_COMMENT("string_equals {"); 4227 4228 // Start by assuming that the strings are not equal. 4229 mov(result, zr); 4230 4231 // A very short string 4232 cmpw(cnt, 4); 4233 br(Assembler::LT, SHORT_STRING); 4234 4235 // Check if the strings start at the same location. 4236 cmp(str1, str2); 4237 br(Assembler::EQ, SAME_CHARS); 4238 4239 // Compare longwords 4240 { 4241 subw(cnt, cnt, 4); // The last longword is a special case 4242 4243 // Move both string pointers to the last longword of their 4244 // strings, negate the remaining count, and convert it to bytes. 4245 lea(str1, Address(str1, cnt, Address::uxtw(1))); 4246 lea(str2, Address(str2, cnt, Address::uxtw(1))); 4247 sub(cnt, zr, cnt, LSL, 1); 4248 4249 // Loop, loading longwords and comparing them into rscratch2. 4250 bind(NEXT_WORD); 4251 ldr(tmp1, Address(str1, cnt)); 4252 ldr(tmp2, Address(str2, cnt)); 4253 adds(cnt, cnt, wordSize); 4254 eor(rscratch2, tmp1, tmp2); 4255 cbnz(rscratch2, DONE); 4256 br(Assembler::LT, NEXT_WORD); 4257 4258 // Last longword. In the case where length == 4 we compare the 4259 // same longword twice, but that's still faster than another 4260 // conditional branch. 4261 4262 ldr(tmp1, Address(str1)); 4263 ldr(tmp2, Address(str2)); 4264 eor(rscratch2, tmp1, tmp2); 4265 cbz(rscratch2, SAME_CHARS); 4266 b(DONE); 4267 } 4268 4269 bind(SHORT_STRING); 4270 // Is the length zero? 4271 cbz(cnt, SAME_CHARS); 4272 4273 bind(SHORT_LOOP); 4274 load_unsigned_short(tmp1, Address(post(str1, 2))); 4275 load_unsigned_short(tmp2, Address(post(str2, 2))); 4276 subw(tmp1, tmp1, tmp2); 4277 cbnz(tmp1, DONE); 4278 sub(cnt, cnt, 1); 4279 cbnz(cnt, SHORT_LOOP); 4280 4281 // Strings are equal. 4282 bind(SAME_CHARS); 4283 mov(result, true); 4284 4285 // That's it 4286 bind(DONE); 4287 4288 BLOCK_COMMENT("} string_equals"); 4289 } 4290 4291 // Compare char[] arrays aligned to 4 bytes 4292 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, 4293 Register result, Register tmp1) 4294 { 4295 Register cnt1 = rscratch1; 4296 Register cnt2 = rscratch2; 4297 Register tmp2 = rscratch2; 4298 4299 Label SAME, DIFFER, NEXT, TAIL03, TAIL01; 4300 4301 int length_offset = arrayOopDesc::length_offset_in_bytes(); 4302 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 4303 4304 BLOCK_COMMENT("char_arrays_equals {"); 4305 4306 // different until proven equal 4307 mov(result, false); 4308 4309 // same array? 4310 cmp(ary1, ary2); 4311 br(Assembler::EQ, SAME); 4312 4313 // ne if either null 4314 cbz(ary1, DIFFER); 4315 cbz(ary2, DIFFER); 4316 4317 // lengths ne? 4318 ldrw(cnt1, Address(ary1, length_offset)); 4319 ldrw(cnt2, Address(ary2, length_offset)); 4320 cmp(cnt1, cnt2); 4321 br(Assembler::NE, DIFFER); 4322 4323 lea(ary1, Address(ary1, base_offset)); 4324 lea(ary2, Address(ary2, base_offset)); 4325 4326 subs(cnt1, cnt1, 4); 4327 br(LT, TAIL03); 4328 4329 BIND(NEXT); 4330 ldr(tmp1, Address(post(ary1, 8))); 4331 ldr(tmp2, Address(post(ary2, 8))); 4332 subs(cnt1, cnt1, 4); 4333 eor(tmp1, tmp1, tmp2); 4334 cbnz(tmp1, DIFFER); 4335 br(GE, NEXT); 4336 4337 BIND(TAIL03); // 0-3 chars left, cnt1 = #chars left - 4 4338 tst(cnt1, 0b10); 4339 br(EQ, TAIL01); 4340 ldrw(tmp1, Address(post(ary1, 4))); 4341 ldrw(tmp2, Address(post(ary2, 4))); 4342 cmp(tmp1, tmp2); 4343 br(NE, DIFFER); 4344 BIND(TAIL01); // 0-1 chars left 4345 tst(cnt1, 0b01); 4346 br(EQ, SAME); 4347 ldrh(tmp1, ary1); 4348 ldrh(tmp2, ary2); 4349 cmp(tmp1, tmp2); 4350 br(NE, DIFFER); 4351 4352 BIND(SAME); 4353 mov(result, true); 4354 BIND(DIFFER); // result already set 4355 4356 BLOCK_COMMENT("} char_arrays_equals"); 4357 } 4358 4359 // encode char[] to byte[] in ISO_8859_1 4360 void MacroAssembler::encode_iso_array(Register src, Register dst, 4361 Register len, Register result, 4362 FloatRegister Vtmp1, FloatRegister Vtmp2, 4363 FloatRegister Vtmp3, FloatRegister Vtmp4) 4364 { 4365 Label DONE, NEXT_32, LOOP_8, NEXT_8, LOOP_1, NEXT_1; 4366 Register tmp1 = rscratch1; 4367 4368 mov(result, len); // Save initial len 4369 4370 #ifndef BUILTIN_SIM 4371 subs(len, len, 32); 4372 br(LT, LOOP_8); 4373 4374 // The following code uses the SIMD 'uqxtn' and 'uqxtn2' instructions 4375 // to convert chars to bytes. These set the 'QC' bit in the FPSR if 4376 // any char could not fit in a byte, so clear the FPSR so we can test it. 4377 clear_fpsr(); 4378 4379 BIND(NEXT_32); 4380 ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src); 4381 uqxtn(Vtmp1, T8B, Vtmp1, T8H); // uqxtn - write bottom half 4382 uqxtn(Vtmp1, T16B, Vtmp2, T8H); // uqxtn2 - write top half 4383 uqxtn(Vtmp2, T8B, Vtmp3, T8H); 4384 uqxtn(Vtmp2, T16B, Vtmp4, T8H); // uqxtn2 4385 get_fpsr(tmp1); 4386 cbnzw(tmp1, LOOP_8); 4387 st1(Vtmp1, Vtmp2, T16B, post(dst, 32)); 4388 subs(len, len, 32); 4389 add(src, src, 64); 4390 br(GE, NEXT_32); 4391 4392 BIND(LOOP_8); 4393 adds(len, len, 32-8); 4394 br(LT, LOOP_1); 4395 clear_fpsr(); // QC may be set from loop above, clear again 4396 BIND(NEXT_8); 4397 ld1(Vtmp1, T8H, src); 4398 uqxtn(Vtmp1, T8B, Vtmp1, T8H); 4399 get_fpsr(tmp1); 4400 cbnzw(tmp1, LOOP_1); 4401 st1(Vtmp1, T8B, post(dst, 8)); 4402 subs(len, len, 8); 4403 add(src, src, 16); 4404 br(GE, NEXT_8); 4405 4406 BIND(LOOP_1); 4407 adds(len, len, 8); 4408 br(LE, DONE); 4409 #else 4410 cbz(len, DONE); 4411 #endif 4412 BIND(NEXT_1); 4413 ldrh(tmp1, Address(post(src, 2))); 4414 tst(tmp1, 0xff00); 4415 br(NE, DONE); 4416 strb(tmp1, Address(post(dst, 1))); 4417 subs(len, len, 1); 4418 br(GT, NEXT_1); 4419 4420 BIND(DONE); 4421 sub(result, result, len); // Return index where we stopped 4422 }