1 /* 2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 * Copyright 2012, 2015 SAP AG. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "compiler/disassembler.hpp" 29 #include "gc/shared/cardTableModRefBS.hpp" 30 #include "gc/shared/collectedHeap.inline.hpp" 31 #include "interpreter/interpreter.hpp" 32 #include "memory/resourceArea.hpp" 33 #include "prims/methodHandles.hpp" 34 #include "runtime/biasedLocking.hpp" 35 #include "runtime/icache.hpp" 36 #include "runtime/interfaceSupport.hpp" 37 #include "runtime/objectMonitor.hpp" 38 #include "runtime/os.hpp" 39 #include "runtime/sharedRuntime.hpp" 40 #include "runtime/stubRoutines.hpp" 41 #include "utilities/macros.hpp" 42 #if INCLUDE_ALL_GCS 43 #include "gc/g1/g1CollectedHeap.inline.hpp" 44 #include "gc/g1/g1SATBCardTableModRefBS.hpp" 45 #include "gc/g1/heapRegion.hpp" 46 #endif // INCLUDE_ALL_GCS 47 48 #ifdef PRODUCT 49 #define BLOCK_COMMENT(str) // nothing 50 #else 51 #define BLOCK_COMMENT(str) block_comment(str) 52 #endif 53 54 #ifdef ASSERT 55 // On RISC, there's no benefit to verifying instruction boundaries. 56 bool AbstractAssembler::pd_check_instruction_mark() { return false; } 57 #endif 58 59 void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) { 60 assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range"); 61 if (Assembler::is_simm(si31, 16)) { 62 ld(d, si31, a); 63 if (emit_filler_nop) nop(); 64 } else { 65 const int hi = MacroAssembler::largeoffset_si16_si16_hi(si31); 66 const int lo = MacroAssembler::largeoffset_si16_si16_lo(si31); 67 addis(d, a, hi); 68 ld(d, lo, d); 69 } 70 } 71 72 void MacroAssembler::ld_largeoffset(Register d, int si31, Register a, int emit_filler_nop) { 73 assert_different_registers(d, a); 74 ld_largeoffset_unchecked(d, si31, a, emit_filler_nop); 75 } 76 77 void MacroAssembler::load_sized_value(Register dst, RegisterOrConstant offs, Register base, 78 size_t size_in_bytes, bool is_signed) { 79 switch (size_in_bytes) { 80 case 8: ld(dst, offs, base); break; 81 case 4: is_signed ? lwa(dst, offs, base) : lwz(dst, offs, base); break; 82 case 2: is_signed ? lha(dst, offs, base) : lhz(dst, offs, base); break; 83 case 1: lbz(dst, offs, base); if (is_signed) extsb(dst, dst); break; // lba doesn't exist :( 84 default: ShouldNotReachHere(); 85 } 86 } 87 88 void MacroAssembler::store_sized_value(Register dst, RegisterOrConstant offs, Register base, 89 size_t size_in_bytes) { 90 switch (size_in_bytes) { 91 case 8: std(dst, offs, base); break; 92 case 4: stw(dst, offs, base); break; 93 case 2: sth(dst, offs, base); break; 94 case 1: stb(dst, offs, base); break; 95 default: ShouldNotReachHere(); 96 } 97 } 98 99 void MacroAssembler::align(int modulus, int max, int rem) { 100 int padding = (rem + modulus - (offset() % modulus)) % modulus; 101 if (padding > max) return; 102 for (int c = (padding >> 2); c > 0; --c) { nop(); } 103 } 104 105 // Issue instructions that calculate given TOC from global TOC. 106 void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16, 107 bool add_relocation, bool emit_dummy_addr) { 108 int offset = -1; 109 if (emit_dummy_addr) { 110 offset = -128; // dummy address 111 } else if (addr != (address)(intptr_t)-1) { 112 offset = MacroAssembler::offset_to_global_toc(addr); 113 } 114 115 if (hi16) { 116 addis(dst, R29, MacroAssembler::largeoffset_si16_si16_hi(offset)); 117 } 118 if (lo16) { 119 if (add_relocation) { 120 // Relocate at the addi to avoid confusion with a load from the method's TOC. 121 relocate(internal_word_Relocation::spec(addr)); 122 } 123 addi(dst, dst, MacroAssembler::largeoffset_si16_si16_lo(offset)); 124 } 125 } 126 127 int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) { 128 const int offset = MacroAssembler::offset_to_global_toc(addr); 129 130 const address inst2_addr = a; 131 const int inst2 = *(int *)inst2_addr; 132 133 // The relocation points to the second instruction, the addi, 134 // and the addi reads and writes the same register dst. 135 const int dst = inv_rt_field(inst2); 136 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); 137 138 // Now, find the preceding addis which writes to dst. 139 int inst1 = 0; 140 address inst1_addr = inst2_addr - BytesPerInstWord; 141 while (inst1_addr >= bound) { 142 inst1 = *(int *) inst1_addr; 143 if (is_addis(inst1) && inv_rt_field(inst1) == dst) { 144 // Stop, found the addis which writes dst. 145 break; 146 } 147 inst1_addr -= BytesPerInstWord; 148 } 149 150 assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC"); 151 set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset)); 152 set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset)); 153 return (int)((intptr_t)addr - (intptr_t)inst1_addr); 154 } 155 156 address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) { 157 const address inst2_addr = a; 158 const int inst2 = *(int *)inst2_addr; 159 160 // The relocation points to the second instruction, the addi, 161 // and the addi reads and writes the same register dst. 162 const int dst = inv_rt_field(inst2); 163 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); 164 165 // Now, find the preceding addis which writes to dst. 166 int inst1 = 0; 167 address inst1_addr = inst2_addr - BytesPerInstWord; 168 while (inst1_addr >= bound) { 169 inst1 = *(int *) inst1_addr; 170 if (is_addis(inst1) && inv_rt_field(inst1) == dst) { 171 // stop, found the addis which writes dst 172 break; 173 } 174 inst1_addr -= BytesPerInstWord; 175 } 176 177 assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC"); 178 179 int offset = (get_imm(inst1_addr, 0) << 16) + get_imm(inst2_addr, 0); 180 // -1 is a special case 181 if (offset == -1) { 182 return (address)(intptr_t)-1; 183 } else { 184 return global_toc() + offset; 185 } 186 } 187 188 #ifdef _LP64 189 // Patch compressed oops or klass constants. 190 // Assembler sequence is 191 // 1) compressed oops: 192 // lis rx = const.hi 193 // ori rx = rx | const.lo 194 // 2) compressed klass: 195 // lis rx = const.hi 196 // clrldi rx = rx & 0xFFFFffff // clearMS32b, optional 197 // ori rx = rx | const.lo 198 // Clrldi will be passed by. 199 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) { 200 assert(UseCompressedOops, "Should only patch compressed oops"); 201 202 const address inst2_addr = a; 203 const int inst2 = *(int *)inst2_addr; 204 205 // The relocation points to the second instruction, the ori, 206 // and the ori reads and writes the same register dst. 207 const int dst = inv_rta_field(inst2); 208 assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be ori reading and writing dst"); 209 // Now, find the preceding addis which writes to dst. 210 int inst1 = 0; 211 address inst1_addr = inst2_addr - BytesPerInstWord; 212 bool inst1_found = false; 213 while (inst1_addr >= bound) { 214 inst1 = *(int *)inst1_addr; 215 if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break; } 216 inst1_addr -= BytesPerInstWord; 217 } 218 assert(inst1_found, "inst is not lis"); 219 220 int xc = (data >> 16) & 0xffff; 221 int xd = (data >> 0) & 0xffff; 222 223 set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo 224 set_imm((int *)inst2_addr, (xd)); // unsigned int 225 return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr); 226 } 227 228 // Get compressed oop or klass constant. 229 narrowOop MacroAssembler::get_narrow_oop(address a, address bound) { 230 assert(UseCompressedOops, "Should only patch compressed oops"); 231 232 const address inst2_addr = a; 233 const int inst2 = *(int *)inst2_addr; 234 235 // The relocation points to the second instruction, the ori, 236 // and the ori reads and writes the same register dst. 237 const int dst = inv_rta_field(inst2); 238 assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be ori reading and writing dst"); 239 // Now, find the preceding lis which writes to dst. 240 int inst1 = 0; 241 address inst1_addr = inst2_addr - BytesPerInstWord; 242 bool inst1_found = false; 243 244 while (inst1_addr >= bound) { 245 inst1 = *(int *) inst1_addr; 246 if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;} 247 inst1_addr -= BytesPerInstWord; 248 } 249 assert(inst1_found, "inst is not lis"); 250 251 uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff)); 252 uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16); 253 254 return (int) (xl | xh); 255 } 256 #endif // _LP64 257 258 void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) { 259 int toc_offset = 0; 260 // Use RelocationHolder::none for the constant pool entry, otherwise 261 // we will end up with a failing NativeCall::verify(x) where x is 262 // the address of the constant pool entry. 263 // FIXME: We should insert relocation information for oops at the constant 264 // pool entries instead of inserting it at the loads; patching of a constant 265 // pool entry should be less expensive. 266 address oop_address = address_constant((address)a.value(), RelocationHolder::none); 267 // Relocate at the pc of the load. 268 relocate(a.rspec()); 269 toc_offset = (int)(oop_address - code()->consts()->start()); 270 ld_largeoffset_unchecked(dst, toc_offset, toc, true); 271 } 272 273 bool MacroAssembler::is_load_const_from_method_toc_at(address a) { 274 const address inst1_addr = a; 275 const int inst1 = *(int *)inst1_addr; 276 277 // The relocation points to the ld or the addis. 278 return (is_ld(inst1)) || 279 (is_addis(inst1) && inv_ra_field(inst1) != 0); 280 } 281 282 int MacroAssembler::get_offset_of_load_const_from_method_toc_at(address a) { 283 assert(is_load_const_from_method_toc_at(a), "must be load_const_from_method_toc"); 284 285 const address inst1_addr = a; 286 const int inst1 = *(int *)inst1_addr; 287 288 if (is_ld(inst1)) { 289 return inv_d1_field(inst1); 290 } else if (is_addis(inst1)) { 291 const int dst = inv_rt_field(inst1); 292 293 // Now, find the succeeding ld which reads and writes to dst. 294 address inst2_addr = inst1_addr + BytesPerInstWord; 295 int inst2 = 0; 296 while (true) { 297 inst2 = *(int *) inst2_addr; 298 if (is_ld(inst2) && inv_ra_field(inst2) == dst && inv_rt_field(inst2) == dst) { 299 // Stop, found the ld which reads and writes dst. 300 break; 301 } 302 inst2_addr += BytesPerInstWord; 303 } 304 return (inv_d1_field(inst1) << 16) + inv_d1_field(inst2); 305 } 306 ShouldNotReachHere(); 307 return 0; 308 } 309 310 // Get the constant from a `load_const' sequence. 311 long MacroAssembler::get_const(address a) { 312 assert(is_load_const_at(a), "not a load of a constant"); 313 const int *p = (const int*) a; 314 unsigned long x = (((unsigned long) (get_imm(a,0) & 0xffff)) << 48); 315 if (is_ori(*(p+1))) { 316 x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 32); 317 x |= (((unsigned long) (get_imm(a,3) & 0xffff)) << 16); 318 x |= (((unsigned long) (get_imm(a,4) & 0xffff))); 319 } else if (is_lis(*(p+1))) { 320 x |= (((unsigned long) (get_imm(a,2) & 0xffff)) << 32); 321 x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 16); 322 x |= (((unsigned long) (get_imm(a,3) & 0xffff))); 323 } else { 324 ShouldNotReachHere(); 325 return (long) 0; 326 } 327 return (long) x; 328 } 329 330 // Patch the 64 bit constant of a `load_const' sequence. This is a low 331 // level procedure. It neither flushes the instruction cache nor is it 332 // mt safe. 333 void MacroAssembler::patch_const(address a, long x) { 334 assert(is_load_const_at(a), "not a load of a constant"); 335 int *p = (int*) a; 336 if (is_ori(*(p+1))) { 337 set_imm(0 + p, (x >> 48) & 0xffff); 338 set_imm(1 + p, (x >> 32) & 0xffff); 339 set_imm(3 + p, (x >> 16) & 0xffff); 340 set_imm(4 + p, x & 0xffff); 341 } else if (is_lis(*(p+1))) { 342 set_imm(0 + p, (x >> 48) & 0xffff); 343 set_imm(2 + p, (x >> 32) & 0xffff); 344 set_imm(1 + p, (x >> 16) & 0xffff); 345 set_imm(3 + p, x & 0xffff); 346 } else { 347 ShouldNotReachHere(); 348 } 349 } 350 351 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { 352 assert(oop_recorder() != NULL, "this assembler needs a Recorder"); 353 int index = oop_recorder()->allocate_metadata_index(obj); 354 RelocationHolder rspec = metadata_Relocation::spec(index); 355 return AddressLiteral((address)obj, rspec); 356 } 357 358 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { 359 assert(oop_recorder() != NULL, "this assembler needs a Recorder"); 360 int index = oop_recorder()->find_index(obj); 361 RelocationHolder rspec = metadata_Relocation::spec(index); 362 return AddressLiteral((address)obj, rspec); 363 } 364 365 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) { 366 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 367 int oop_index = oop_recorder()->allocate_oop_index(obj); 368 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 369 } 370 371 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { 372 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 373 int oop_index = oop_recorder()->find_index(obj); 374 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); 375 } 376 377 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 378 Register tmp, int offset) { 379 intptr_t value = *delayed_value_addr; 380 if (value != 0) { 381 return RegisterOrConstant(value + offset); 382 } 383 384 // Load indirectly to solve generation ordering problem. 385 // static address, no relocation 386 int simm16_offset = load_const_optimized(tmp, delayed_value_addr, noreg, true); 387 ld(tmp, simm16_offset, tmp); // must be aligned ((xa & 3) == 0) 388 389 if (offset != 0) { 390 addi(tmp, tmp, offset); 391 } 392 393 return RegisterOrConstant(tmp); 394 } 395 396 #ifndef PRODUCT 397 void MacroAssembler::pd_print_patched_instruction(address branch) { 398 Unimplemented(); // TODO: PPC port 399 } 400 #endif // ndef PRODUCT 401 402 // Conditional far branch for destinations encodable in 24+2 bits. 403 void MacroAssembler::bc_far(int boint, int biint, Label& dest, int optimize) { 404 405 // If requested by flag optimize, relocate the bc_far as a 406 // runtime_call and prepare for optimizing it when the code gets 407 // relocated. 408 if (optimize == bc_far_optimize_on_relocate) { 409 relocate(relocInfo::runtime_call_type); 410 } 411 412 // variant 2: 413 // 414 // b!cxx SKIP 415 // bxx DEST 416 // SKIP: 417 // 418 419 const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)), 420 opposite_bcond(inv_boint_bcond(boint))); 421 422 // We emit two branches. 423 // First, a conditional branch which jumps around the far branch. 424 const address not_taken_pc = pc() + 2 * BytesPerInstWord; 425 const address bc_pc = pc(); 426 bc(opposite_boint, biint, not_taken_pc); 427 428 const int bc_instr = *(int*)bc_pc; 429 assert(not_taken_pc == (address)inv_bd_field(bc_instr, (intptr_t)bc_pc), "postcondition"); 430 assert(opposite_boint == inv_bo_field(bc_instr), "postcondition"); 431 assert(boint == add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(bc_instr))), 432 opposite_bcond(inv_boint_bcond(inv_bo_field(bc_instr)))), 433 "postcondition"); 434 assert(biint == inv_bi_field(bc_instr), "postcondition"); 435 436 // Second, an unconditional far branch which jumps to dest. 437 // Note: target(dest) remembers the current pc (see CodeSection::target) 438 // and returns the current pc if the label is not bound yet; when 439 // the label gets bound, the unconditional far branch will be patched. 440 const address target_pc = target(dest); 441 const address b_pc = pc(); 442 b(target_pc); 443 444 assert(not_taken_pc == pc(), "postcondition"); 445 assert(dest.is_bound() || target_pc == b_pc, "postcondition"); 446 } 447 448 bool MacroAssembler::is_bc_far_at(address instruction_addr) { 449 return is_bc_far_variant1_at(instruction_addr) || 450 is_bc_far_variant2_at(instruction_addr) || 451 is_bc_far_variant3_at(instruction_addr); 452 } 453 454 address MacroAssembler::get_dest_of_bc_far_at(address instruction_addr) { 455 if (is_bc_far_variant1_at(instruction_addr)) { 456 const address instruction_1_addr = instruction_addr; 457 const int instruction_1 = *(int*)instruction_1_addr; 458 return (address)inv_bd_field(instruction_1, (intptr_t)instruction_1_addr); 459 } else if (is_bc_far_variant2_at(instruction_addr)) { 460 const address instruction_2_addr = instruction_addr + 4; 461 return bxx_destination(instruction_2_addr); 462 } else if (is_bc_far_variant3_at(instruction_addr)) { 463 return instruction_addr + 8; 464 } 465 // variant 4 ??? 466 ShouldNotReachHere(); 467 return NULL; 468 } 469 void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address dest) { 470 471 if (is_bc_far_variant3_at(instruction_addr)) { 472 // variant 3, far cond branch to the next instruction, already patched to nops: 473 // 474 // nop 475 // endgroup 476 // SKIP/DEST: 477 // 478 return; 479 } 480 481 // first, extract boint and biint from the current branch 482 int boint = 0; 483 int biint = 0; 484 485 ResourceMark rm; 486 const int code_size = 2 * BytesPerInstWord; 487 CodeBuffer buf(instruction_addr, code_size); 488 MacroAssembler masm(&buf); 489 if (is_bc_far_variant2_at(instruction_addr) && dest == instruction_addr + 8) { 490 // Far branch to next instruction: Optimize it by patching nops (produce variant 3). 491 masm.nop(); 492 masm.endgroup(); 493 } else { 494 if (is_bc_far_variant1_at(instruction_addr)) { 495 // variant 1, the 1st instruction contains the destination address: 496 // 497 // bcxx DEST 498 // endgroup 499 // 500 const int instruction_1 = *(int*)(instruction_addr); 501 boint = inv_bo_field(instruction_1); 502 biint = inv_bi_field(instruction_1); 503 } else if (is_bc_far_variant2_at(instruction_addr)) { 504 // variant 2, the 2nd instruction contains the destination address: 505 // 506 // b!cxx SKIP 507 // bxx DEST 508 // SKIP: 509 // 510 const int instruction_1 = *(int*)(instruction_addr); 511 boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(instruction_1))), 512 opposite_bcond(inv_boint_bcond(inv_bo_field(instruction_1)))); 513 biint = inv_bi_field(instruction_1); 514 } else { 515 // variant 4??? 516 ShouldNotReachHere(); 517 } 518 519 // second, set the new branch destination and optimize the code 520 if (dest != instruction_addr + 4 && // the bc_far is still unbound! 521 masm.is_within_range_of_bcxx(dest, instruction_addr)) { 522 // variant 1: 523 // 524 // bcxx DEST 525 // endgroup 526 // 527 masm.bc(boint, biint, dest); 528 masm.endgroup(); 529 } else { 530 // variant 2: 531 // 532 // b!cxx SKIP 533 // bxx DEST 534 // SKIP: 535 // 536 const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)), 537 opposite_bcond(inv_boint_bcond(boint))); 538 const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord; 539 masm.bc(opposite_boint, biint, not_taken_pc); 540 masm.b(dest); 541 } 542 } 543 ICache::ppc64_flush_icache_bytes(instruction_addr, code_size); 544 } 545 546 // Emit a NOT mt-safe patchable 64 bit absolute call/jump. 547 void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) { 548 // get current pc 549 uint64_t start_pc = (uint64_t) pc(); 550 551 const address pc_of_bl = (address) (start_pc + (6*BytesPerInstWord)); // bl is last 552 const address pc_of_b = (address) (start_pc + (0*BytesPerInstWord)); // b is first 553 554 // relocate here 555 if (rt != relocInfo::none) { 556 relocate(rt); 557 } 558 559 if ( ReoptimizeCallSequences && 560 (( link && is_within_range_of_b(dest, pc_of_bl)) || 561 (!link && is_within_range_of_b(dest, pc_of_b)))) { 562 // variant 2: 563 // Emit an optimized, pc-relative call/jump. 564 565 if (link) { 566 // some padding 567 nop(); 568 nop(); 569 nop(); 570 nop(); 571 nop(); 572 nop(); 573 574 // do the call 575 assert(pc() == pc_of_bl, "just checking"); 576 bl(dest, relocInfo::none); 577 } else { 578 // do the jump 579 assert(pc() == pc_of_b, "just checking"); 580 b(dest, relocInfo::none); 581 582 // some padding 583 nop(); 584 nop(); 585 nop(); 586 nop(); 587 nop(); 588 nop(); 589 } 590 591 // Assert that we can identify the emitted call/jump. 592 assert(is_bxx64_patchable_variant2_at((address)start_pc, link), 593 "can't identify emitted call"); 594 } else { 595 // variant 1: 596 #if defined(ABI_ELFv2) 597 nop(); 598 calculate_address_from_global_toc(R12, dest, true, true, false); 599 mtctr(R12); 600 nop(); 601 nop(); 602 #else 603 mr(R0, R11); // spill R11 -> R0. 604 605 // Load the destination address into CTR, 606 // calculate destination relative to global toc. 607 calculate_address_from_global_toc(R11, dest, true, true, false); 608 609 mtctr(R11); 610 mr(R11, R0); // spill R11 <- R0. 611 nop(); 612 #endif 613 614 // do the call/jump 615 if (link) { 616 bctrl(); 617 } else{ 618 bctr(); 619 } 620 // Assert that we can identify the emitted call/jump. 621 assert(is_bxx64_patchable_variant1b_at((address)start_pc, link), 622 "can't identify emitted call"); 623 } 624 625 // Assert that we can identify the emitted call/jump. 626 assert(is_bxx64_patchable_at((address)start_pc, link), 627 "can't identify emitted call"); 628 assert(get_dest_of_bxx64_patchable_at((address)start_pc, link) == dest, 629 "wrong encoding of dest address"); 630 } 631 632 // Identify a bxx64_patchable instruction. 633 bool MacroAssembler::is_bxx64_patchable_at(address instruction_addr, bool link) { 634 return is_bxx64_patchable_variant1b_at(instruction_addr, link) 635 //|| is_bxx64_patchable_variant1_at(instruction_addr, link) 636 || is_bxx64_patchable_variant2_at(instruction_addr, link); 637 } 638 639 // Does the call64_patchable instruction use a pc-relative encoding of 640 // the call destination? 641 bool MacroAssembler::is_bxx64_patchable_pcrelative_at(address instruction_addr, bool link) { 642 // variant 2 is pc-relative 643 return is_bxx64_patchable_variant2_at(instruction_addr, link); 644 } 645 646 // Identify variant 1. 647 bool MacroAssembler::is_bxx64_patchable_variant1_at(address instruction_addr, bool link) { 648 unsigned int* instr = (unsigned int*) instruction_addr; 649 return (link ? is_bctrl(instr[6]) : is_bctr(instr[6])) // bctr[l] 650 && is_mtctr(instr[5]) // mtctr 651 && is_load_const_at(instruction_addr); 652 } 653 654 // Identify variant 1b: load destination relative to global toc. 655 bool MacroAssembler::is_bxx64_patchable_variant1b_at(address instruction_addr, bool link) { 656 unsigned int* instr = (unsigned int*) instruction_addr; 657 return (link ? is_bctrl(instr[6]) : is_bctr(instr[6])) // bctr[l] 658 && is_mtctr(instr[3]) // mtctr 659 && is_calculate_address_from_global_toc_at(instruction_addr + 2*BytesPerInstWord, instruction_addr); 660 } 661 662 // Identify variant 2. 663 bool MacroAssembler::is_bxx64_patchable_variant2_at(address instruction_addr, bool link) { 664 unsigned int* instr = (unsigned int*) instruction_addr; 665 if (link) { 666 return is_bl (instr[6]) // bl dest is last 667 && is_nop(instr[0]) // nop 668 && is_nop(instr[1]) // nop 669 && is_nop(instr[2]) // nop 670 && is_nop(instr[3]) // nop 671 && is_nop(instr[4]) // nop 672 && is_nop(instr[5]); // nop 673 } else { 674 return is_b (instr[0]) // b dest is first 675 && is_nop(instr[1]) // nop 676 && is_nop(instr[2]) // nop 677 && is_nop(instr[3]) // nop 678 && is_nop(instr[4]) // nop 679 && is_nop(instr[5]) // nop 680 && is_nop(instr[6]); // nop 681 } 682 } 683 684 // Set dest address of a bxx64_patchable instruction. 685 void MacroAssembler::set_dest_of_bxx64_patchable_at(address instruction_addr, address dest, bool link) { 686 ResourceMark rm; 687 int code_size = MacroAssembler::bxx64_patchable_size; 688 CodeBuffer buf(instruction_addr, code_size); 689 MacroAssembler masm(&buf); 690 masm.bxx64_patchable(dest, relocInfo::none, link); 691 ICache::ppc64_flush_icache_bytes(instruction_addr, code_size); 692 } 693 694 // Get dest address of a bxx64_patchable instruction. 695 address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) { 696 if (is_bxx64_patchable_variant1_at(instruction_addr, link)) { 697 return (address) (unsigned long) get_const(instruction_addr); 698 } else if (is_bxx64_patchable_variant2_at(instruction_addr, link)) { 699 unsigned int* instr = (unsigned int*) instruction_addr; 700 if (link) { 701 const int instr_idx = 6; // bl is last 702 int branchoffset = branch_destination(instr[instr_idx], 0); 703 return instruction_addr + branchoffset + instr_idx*BytesPerInstWord; 704 } else { 705 const int instr_idx = 0; // b is first 706 int branchoffset = branch_destination(instr[instr_idx], 0); 707 return instruction_addr + branchoffset + instr_idx*BytesPerInstWord; 708 } 709 // Load dest relative to global toc. 710 } else if (is_bxx64_patchable_variant1b_at(instruction_addr, link)) { 711 return get_address_of_calculate_address_from_global_toc_at(instruction_addr + 2*BytesPerInstWord, 712 instruction_addr); 713 } else { 714 ShouldNotReachHere(); 715 return NULL; 716 } 717 } 718 719 // Uses ordering which corresponds to ABI: 720 // _savegpr0_14: std r14,-144(r1) 721 // _savegpr0_15: std r15,-136(r1) 722 // _savegpr0_16: std r16,-128(r1) 723 void MacroAssembler::save_nonvolatile_gprs(Register dst, int offset) { 724 std(R14, offset, dst); offset += 8; 725 std(R15, offset, dst); offset += 8; 726 std(R16, offset, dst); offset += 8; 727 std(R17, offset, dst); offset += 8; 728 std(R18, offset, dst); offset += 8; 729 std(R19, offset, dst); offset += 8; 730 std(R20, offset, dst); offset += 8; 731 std(R21, offset, dst); offset += 8; 732 std(R22, offset, dst); offset += 8; 733 std(R23, offset, dst); offset += 8; 734 std(R24, offset, dst); offset += 8; 735 std(R25, offset, dst); offset += 8; 736 std(R26, offset, dst); offset += 8; 737 std(R27, offset, dst); offset += 8; 738 std(R28, offset, dst); offset += 8; 739 std(R29, offset, dst); offset += 8; 740 std(R30, offset, dst); offset += 8; 741 std(R31, offset, dst); offset += 8; 742 743 stfd(F14, offset, dst); offset += 8; 744 stfd(F15, offset, dst); offset += 8; 745 stfd(F16, offset, dst); offset += 8; 746 stfd(F17, offset, dst); offset += 8; 747 stfd(F18, offset, dst); offset += 8; 748 stfd(F19, offset, dst); offset += 8; 749 stfd(F20, offset, dst); offset += 8; 750 stfd(F21, offset, dst); offset += 8; 751 stfd(F22, offset, dst); offset += 8; 752 stfd(F23, offset, dst); offset += 8; 753 stfd(F24, offset, dst); offset += 8; 754 stfd(F25, offset, dst); offset += 8; 755 stfd(F26, offset, dst); offset += 8; 756 stfd(F27, offset, dst); offset += 8; 757 stfd(F28, offset, dst); offset += 8; 758 stfd(F29, offset, dst); offset += 8; 759 stfd(F30, offset, dst); offset += 8; 760 stfd(F31, offset, dst); 761 } 762 763 // Uses ordering which corresponds to ABI: 764 // _restgpr0_14: ld r14,-144(r1) 765 // _restgpr0_15: ld r15,-136(r1) 766 // _restgpr0_16: ld r16,-128(r1) 767 void MacroAssembler::restore_nonvolatile_gprs(Register src, int offset) { 768 ld(R14, offset, src); offset += 8; 769 ld(R15, offset, src); offset += 8; 770 ld(R16, offset, src); offset += 8; 771 ld(R17, offset, src); offset += 8; 772 ld(R18, offset, src); offset += 8; 773 ld(R19, offset, src); offset += 8; 774 ld(R20, offset, src); offset += 8; 775 ld(R21, offset, src); offset += 8; 776 ld(R22, offset, src); offset += 8; 777 ld(R23, offset, src); offset += 8; 778 ld(R24, offset, src); offset += 8; 779 ld(R25, offset, src); offset += 8; 780 ld(R26, offset, src); offset += 8; 781 ld(R27, offset, src); offset += 8; 782 ld(R28, offset, src); offset += 8; 783 ld(R29, offset, src); offset += 8; 784 ld(R30, offset, src); offset += 8; 785 ld(R31, offset, src); offset += 8; 786 787 // FP registers 788 lfd(F14, offset, src); offset += 8; 789 lfd(F15, offset, src); offset += 8; 790 lfd(F16, offset, src); offset += 8; 791 lfd(F17, offset, src); offset += 8; 792 lfd(F18, offset, src); offset += 8; 793 lfd(F19, offset, src); offset += 8; 794 lfd(F20, offset, src); offset += 8; 795 lfd(F21, offset, src); offset += 8; 796 lfd(F22, offset, src); offset += 8; 797 lfd(F23, offset, src); offset += 8; 798 lfd(F24, offset, src); offset += 8; 799 lfd(F25, offset, src); offset += 8; 800 lfd(F26, offset, src); offset += 8; 801 lfd(F27, offset, src); offset += 8; 802 lfd(F28, offset, src); offset += 8; 803 lfd(F29, offset, src); offset += 8; 804 lfd(F30, offset, src); offset += 8; 805 lfd(F31, offset, src); 806 } 807 808 // For verify_oops. 809 void MacroAssembler::save_volatile_gprs(Register dst, int offset) { 810 std(R2, offset, dst); offset += 8; 811 std(R3, offset, dst); offset += 8; 812 std(R4, offset, dst); offset += 8; 813 std(R5, offset, dst); offset += 8; 814 std(R6, offset, dst); offset += 8; 815 std(R7, offset, dst); offset += 8; 816 std(R8, offset, dst); offset += 8; 817 std(R9, offset, dst); offset += 8; 818 std(R10, offset, dst); offset += 8; 819 std(R11, offset, dst); offset += 8; 820 std(R12, offset, dst); 821 } 822 823 // For verify_oops. 824 void MacroAssembler::restore_volatile_gprs(Register src, int offset) { 825 ld(R2, offset, src); offset += 8; 826 ld(R3, offset, src); offset += 8; 827 ld(R4, offset, src); offset += 8; 828 ld(R5, offset, src); offset += 8; 829 ld(R6, offset, src); offset += 8; 830 ld(R7, offset, src); offset += 8; 831 ld(R8, offset, src); offset += 8; 832 ld(R9, offset, src); offset += 8; 833 ld(R10, offset, src); offset += 8; 834 ld(R11, offset, src); offset += 8; 835 ld(R12, offset, src); 836 } 837 838 void MacroAssembler::save_LR_CR(Register tmp) { 839 mfcr(tmp); 840 std(tmp, _abi(cr), R1_SP); 841 mflr(tmp); 842 std(tmp, _abi(lr), R1_SP); 843 // Tmp must contain lr on exit! (see return_addr and prolog in ppc64.ad) 844 } 845 846 void MacroAssembler::restore_LR_CR(Register tmp) { 847 assert(tmp != R1_SP, "must be distinct"); 848 ld(tmp, _abi(lr), R1_SP); 849 mtlr(tmp); 850 ld(tmp, _abi(cr), R1_SP); 851 mtcr(tmp); 852 } 853 854 address MacroAssembler::get_PC_trash_LR(Register result) { 855 Label L; 856 bl(L); 857 bind(L); 858 address lr_pc = pc(); 859 mflr(result); 860 return lr_pc; 861 } 862 863 void MacroAssembler::resize_frame(Register offset, Register tmp) { 864 #ifdef ASSERT 865 assert_different_registers(offset, tmp, R1_SP); 866 andi_(tmp, offset, frame::alignment_in_bytes-1); 867 asm_assert_eq("resize_frame: unaligned", 0x204); 868 #endif 869 870 // tmp <- *(SP) 871 ld(tmp, _abi(callers_sp), R1_SP); 872 // addr <- SP + offset; 873 // *(addr) <- tmp; 874 // SP <- addr 875 stdux(tmp, R1_SP, offset); 876 } 877 878 void MacroAssembler::resize_frame(int offset, Register tmp) { 879 assert(is_simm(offset, 16), "too big an offset"); 880 assert_different_registers(tmp, R1_SP); 881 assert((offset & (frame::alignment_in_bytes-1))==0, "resize_frame: unaligned"); 882 // tmp <- *(SP) 883 ld(tmp, _abi(callers_sp), R1_SP); 884 // addr <- SP + offset; 885 // *(addr) <- tmp; 886 // SP <- addr 887 stdu(tmp, offset, R1_SP); 888 } 889 890 void MacroAssembler::resize_frame_absolute(Register addr, Register tmp1, Register tmp2) { 891 // (addr == tmp1) || (addr == tmp2) is allowed here! 892 assert(tmp1 != tmp2, "must be distinct"); 893 894 // compute offset w.r.t. current stack pointer 895 // tmp_1 <- addr - SP (!) 896 subf(tmp1, R1_SP, addr); 897 898 // atomically update SP keeping back link. 899 resize_frame(tmp1/* offset */, tmp2/* tmp */); 900 } 901 902 void MacroAssembler::push_frame(Register bytes, Register tmp) { 903 #ifdef ASSERT 904 assert(bytes != R0, "r0 not allowed here"); 905 andi_(R0, bytes, frame::alignment_in_bytes-1); 906 asm_assert_eq("push_frame(Reg, Reg): unaligned", 0x203); 907 #endif 908 neg(tmp, bytes); 909 stdux(R1_SP, R1_SP, tmp); 910 } 911 912 // Push a frame of size `bytes'. 913 void MacroAssembler::push_frame(unsigned int bytes, Register tmp) { 914 long offset = align_addr(bytes, frame::alignment_in_bytes); 915 if (is_simm(-offset, 16)) { 916 stdu(R1_SP, -offset, R1_SP); 917 } else { 918 load_const(tmp, -offset); 919 stdux(R1_SP, R1_SP, tmp); 920 } 921 } 922 923 // Push a frame of size `bytes' plus abi_reg_args on top. 924 void MacroAssembler::push_frame_reg_args(unsigned int bytes, Register tmp) { 925 push_frame(bytes + frame::abi_reg_args_size, tmp); 926 } 927 928 // Setup up a new C frame with a spill area for non-volatile GPRs and 929 // additional space for local variables. 930 void MacroAssembler::push_frame_reg_args_nonvolatiles(unsigned int bytes, 931 Register tmp) { 932 push_frame(bytes + frame::abi_reg_args_size + frame::spill_nonvolatiles_size, tmp); 933 } 934 935 // Pop current C frame. 936 void MacroAssembler::pop_frame() { 937 ld(R1_SP, _abi(callers_sp), R1_SP); 938 } 939 940 #if defined(ABI_ELFv2) 941 address MacroAssembler::branch_to(Register r_function_entry, bool and_link) { 942 // TODO(asmundak): make sure the caller uses R12 as function descriptor 943 // most of the times. 944 if (R12 != r_function_entry) { 945 mr(R12, r_function_entry); 946 } 947 mtctr(R12); 948 // Do a call or a branch. 949 if (and_link) { 950 bctrl(); 951 } else { 952 bctr(); 953 } 954 _last_calls_return_pc = pc(); 955 956 return _last_calls_return_pc; 957 } 958 959 // Call a C function via a function descriptor and use full C 960 // calling conventions. Updates and returns _last_calls_return_pc. 961 address MacroAssembler::call_c(Register r_function_entry) { 962 return branch_to(r_function_entry, /*and_link=*/true); 963 } 964 965 // For tail calls: only branch, don't link, so callee returns to caller of this function. 966 address MacroAssembler::call_c_and_return_to_caller(Register r_function_entry) { 967 return branch_to(r_function_entry, /*and_link=*/false); 968 } 969 970 address MacroAssembler::call_c(address function_entry, relocInfo::relocType rt) { 971 load_const(R12, function_entry, R0); 972 return branch_to(R12, /*and_link=*/true); 973 } 974 975 #else 976 // Generic version of a call to C function via a function descriptor 977 // with variable support for C calling conventions (TOC, ENV, etc.). 978 // Updates and returns _last_calls_return_pc. 979 address MacroAssembler::branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call, 980 bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee) { 981 // we emit standard ptrgl glue code here 982 assert((function_descriptor != R0), "function_descriptor cannot be R0"); 983 984 // retrieve necessary entries from the function descriptor 985 ld(R0, in_bytes(FunctionDescriptor::entry_offset()), function_descriptor); 986 mtctr(R0); 987 988 if (load_toc_of_callee) { 989 ld(R2_TOC, in_bytes(FunctionDescriptor::toc_offset()), function_descriptor); 990 } 991 if (load_env_of_callee) { 992 ld(R11, in_bytes(FunctionDescriptor::env_offset()), function_descriptor); 993 } else if (load_toc_of_callee) { 994 li(R11, 0); 995 } 996 997 // do a call or a branch 998 if (and_link) { 999 bctrl(); 1000 } else { 1001 bctr(); 1002 } 1003 _last_calls_return_pc = pc(); 1004 1005 return _last_calls_return_pc; 1006 } 1007 1008 // Call a C function via a function descriptor and use full C calling 1009 // conventions. 1010 // We don't use the TOC in generated code, so there is no need to save 1011 // and restore its value. 1012 address MacroAssembler::call_c(Register fd) { 1013 return branch_to(fd, /*and_link=*/true, 1014 /*save toc=*/false, 1015 /*restore toc=*/false, 1016 /*load toc=*/true, 1017 /*load env=*/true); 1018 } 1019 1020 address MacroAssembler::call_c_and_return_to_caller(Register fd) { 1021 return branch_to(fd, /*and_link=*/false, 1022 /*save toc=*/false, 1023 /*restore toc=*/false, 1024 /*load toc=*/true, 1025 /*load env=*/true); 1026 } 1027 1028 address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) { 1029 if (rt != relocInfo::none) { 1030 // this call needs to be relocatable 1031 if (!ReoptimizeCallSequences 1032 || (rt != relocInfo::runtime_call_type && rt != relocInfo::none) 1033 || fd == NULL // support code-size estimation 1034 || !fd->is_friend_function() 1035 || fd->entry() == NULL) { 1036 // it's not a friend function as defined by class FunctionDescriptor, 1037 // so do a full call-c here. 1038 load_const(R11, (address)fd, R0); 1039 1040 bool has_env = (fd != NULL && fd->env() != NULL); 1041 return branch_to(R11, /*and_link=*/true, 1042 /*save toc=*/false, 1043 /*restore toc=*/false, 1044 /*load toc=*/true, 1045 /*load env=*/has_env); 1046 } else { 1047 // It's a friend function. Load the entry point and don't care about 1048 // toc and env. Use an optimizable call instruction, but ensure the 1049 // same code-size as in the case of a non-friend function. 1050 nop(); 1051 nop(); 1052 nop(); 1053 bl64_patchable(fd->entry(), rt); 1054 _last_calls_return_pc = pc(); 1055 return _last_calls_return_pc; 1056 } 1057 } else { 1058 // This call does not need to be relocatable, do more aggressive 1059 // optimizations. 1060 if (!ReoptimizeCallSequences 1061 || !fd->is_friend_function()) { 1062 // It's not a friend function as defined by class FunctionDescriptor, 1063 // so do a full call-c here. 1064 load_const(R11, (address)fd, R0); 1065 return branch_to(R11, /*and_link=*/true, 1066 /*save toc=*/false, 1067 /*restore toc=*/false, 1068 /*load toc=*/true, 1069 /*load env=*/true); 1070 } else { 1071 // it's a friend function, load the entry point and don't care about 1072 // toc and env. 1073 address dest = fd->entry(); 1074 if (is_within_range_of_b(dest, pc())) { 1075 bl(dest); 1076 } else { 1077 bl64_patchable(dest, rt); 1078 } 1079 _last_calls_return_pc = pc(); 1080 return _last_calls_return_pc; 1081 } 1082 } 1083 } 1084 1085 // Call a C function. All constants needed reside in TOC. 1086 // 1087 // Read the address to call from the TOC. 1088 // Read env from TOC, if fd specifies an env. 1089 // Read new TOC from TOC. 1090 address MacroAssembler::call_c_using_toc(const FunctionDescriptor* fd, 1091 relocInfo::relocType rt, Register toc) { 1092 if (!ReoptimizeCallSequences 1093 || (rt != relocInfo::runtime_call_type && rt != relocInfo::none) 1094 || !fd->is_friend_function()) { 1095 // It's not a friend function as defined by class FunctionDescriptor, 1096 // so do a full call-c here. 1097 assert(fd->entry() != NULL, "function must be linked"); 1098 1099 AddressLiteral fd_entry(fd->entry()); 1100 load_const_from_method_toc(R11, fd_entry, toc); 1101 mtctr(R11); 1102 if (fd->env() == NULL) { 1103 li(R11, 0); 1104 nop(); 1105 } else { 1106 AddressLiteral fd_env(fd->env()); 1107 load_const_from_method_toc(R11, fd_env, toc); 1108 } 1109 AddressLiteral fd_toc(fd->toc()); 1110 load_toc_from_toc(R2_TOC, fd_toc, toc); 1111 // R2_TOC is killed. 1112 bctrl(); 1113 _last_calls_return_pc = pc(); 1114 } else { 1115 // It's a friend function, load the entry point and don't care about 1116 // toc and env. Use an optimizable call instruction, but ensure the 1117 // same code-size as in the case of a non-friend function. 1118 nop(); 1119 bl64_patchable(fd->entry(), rt); 1120 _last_calls_return_pc = pc(); 1121 } 1122 return _last_calls_return_pc; 1123 } 1124 #endif // ABI_ELFv2 1125 1126 void MacroAssembler::call_VM_base(Register oop_result, 1127 Register last_java_sp, 1128 address entry_point, 1129 bool check_exceptions) { 1130 BLOCK_COMMENT("call_VM {"); 1131 // Determine last_java_sp register. 1132 if (!last_java_sp->is_valid()) { 1133 last_java_sp = R1_SP; 1134 } 1135 set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, R11_scratch1); 1136 1137 // ARG1 must hold thread address. 1138 mr(R3_ARG1, R16_thread); 1139 #if defined(ABI_ELFv2) 1140 address return_pc = call_c(entry_point, relocInfo::none); 1141 #else 1142 address return_pc = call_c((FunctionDescriptor*)entry_point, relocInfo::none); 1143 #endif 1144 1145 reset_last_Java_frame(); 1146 1147 // Check for pending exceptions. 1148 if (check_exceptions) { 1149 // We don't check for exceptions here. 1150 ShouldNotReachHere(); 1151 } 1152 1153 // Get oop result if there is one and reset the value in the thread. 1154 if (oop_result->is_valid()) { 1155 get_vm_result(oop_result); 1156 } 1157 1158 _last_calls_return_pc = return_pc; 1159 BLOCK_COMMENT("} call_VM"); 1160 } 1161 1162 void MacroAssembler::call_VM_leaf_base(address entry_point) { 1163 BLOCK_COMMENT("call_VM_leaf {"); 1164 #if defined(ABI_ELFv2) 1165 call_c(entry_point, relocInfo::none); 1166 #else 1167 call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::none); 1168 #endif 1169 BLOCK_COMMENT("} call_VM_leaf"); 1170 } 1171 1172 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { 1173 call_VM_base(oop_result, noreg, entry_point, check_exceptions); 1174 } 1175 1176 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, 1177 bool check_exceptions) { 1178 // R3_ARG1 is reserved for the thread. 1179 mr_if_needed(R4_ARG2, arg_1); 1180 call_VM(oop_result, entry_point, check_exceptions); 1181 } 1182 1183 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, 1184 bool check_exceptions) { 1185 // R3_ARG1 is reserved for the thread 1186 mr_if_needed(R4_ARG2, arg_1); 1187 assert(arg_2 != R4_ARG2, "smashed argument"); 1188 mr_if_needed(R5_ARG3, arg_2); 1189 call_VM(oop_result, entry_point, check_exceptions); 1190 } 1191 1192 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, 1193 bool check_exceptions) { 1194 // R3_ARG1 is reserved for the thread 1195 mr_if_needed(R4_ARG2, arg_1); 1196 assert(arg_2 != R4_ARG2, "smashed argument"); 1197 mr_if_needed(R5_ARG3, arg_2); 1198 mr_if_needed(R6_ARG4, arg_3); 1199 call_VM(oop_result, entry_point, check_exceptions); 1200 } 1201 1202 void MacroAssembler::call_VM_leaf(address entry_point) { 1203 call_VM_leaf_base(entry_point); 1204 } 1205 1206 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { 1207 mr_if_needed(R3_ARG1, arg_1); 1208 call_VM_leaf(entry_point); 1209 } 1210 1211 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { 1212 mr_if_needed(R3_ARG1, arg_1); 1213 assert(arg_2 != R3_ARG1, "smashed argument"); 1214 mr_if_needed(R4_ARG2, arg_2); 1215 call_VM_leaf(entry_point); 1216 } 1217 1218 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { 1219 mr_if_needed(R3_ARG1, arg_1); 1220 assert(arg_2 != R3_ARG1, "smashed argument"); 1221 mr_if_needed(R4_ARG2, arg_2); 1222 assert(arg_3 != R3_ARG1 && arg_3 != R4_ARG2, "smashed argument"); 1223 mr_if_needed(R5_ARG3, arg_3); 1224 call_VM_leaf(entry_point); 1225 } 1226 1227 // Check whether instruction is a read access to the polling page 1228 // which was emitted by load_from_polling_page(..). 1229 bool MacroAssembler::is_load_from_polling_page(int instruction, void* ucontext, 1230 address* polling_address_ptr) { 1231 if (!is_ld(instruction)) 1232 return false; // It's not a ld. Fail. 1233 1234 int rt = inv_rt_field(instruction); 1235 int ra = inv_ra_field(instruction); 1236 int ds = inv_ds_field(instruction); 1237 if (!(ds == 0 && ra != 0 && rt == 0)) { 1238 return false; // It's not a ld(r0, X, ra). Fail. 1239 } 1240 1241 if (!ucontext) { 1242 // Set polling address. 1243 if (polling_address_ptr != NULL) { 1244 *polling_address_ptr = NULL; 1245 } 1246 return true; // No ucontext given. Can't check value of ra. Assume true. 1247 } 1248 1249 #ifdef LINUX 1250 // Ucontext given. Check that register ra contains the address of 1251 // the safepoing polling page. 1252 ucontext_t* uc = (ucontext_t*) ucontext; 1253 // Set polling address. 1254 address addr = (address)uc->uc_mcontext.regs->gpr[ra] + (ssize_t)ds; 1255 if (polling_address_ptr != NULL) { 1256 *polling_address_ptr = addr; 1257 } 1258 return os::is_poll_address(addr); 1259 #else 1260 // Not on Linux, ucontext must be NULL. 1261 ShouldNotReachHere(); 1262 return false; 1263 #endif 1264 } 1265 1266 bool MacroAssembler::is_memory_serialization(int instruction, JavaThread* thread, void* ucontext) { 1267 #ifdef LINUX 1268 ucontext_t* uc = (ucontext_t*) ucontext; 1269 1270 if (is_stwx(instruction) || is_stwux(instruction)) { 1271 int ra = inv_ra_field(instruction); 1272 int rb = inv_rb_field(instruction); 1273 1274 // look up content of ra and rb in ucontext 1275 address ra_val=(address)uc->uc_mcontext.regs->gpr[ra]; 1276 long rb_val=(long)uc->uc_mcontext.regs->gpr[rb]; 1277 return os::is_memory_serialize_page(thread, ra_val+rb_val); 1278 } else if (is_stw(instruction) || is_stwu(instruction)) { 1279 int ra = inv_ra_field(instruction); 1280 int d1 = inv_d1_field(instruction); 1281 1282 // look up content of ra in ucontext 1283 address ra_val=(address)uc->uc_mcontext.regs->gpr[ra]; 1284 return os::is_memory_serialize_page(thread, ra_val+d1); 1285 } else { 1286 return false; 1287 } 1288 #else 1289 // workaround not needed on !LINUX :-) 1290 ShouldNotCallThis(); 1291 return false; 1292 #endif 1293 } 1294 1295 void MacroAssembler::bang_stack_with_offset(int offset) { 1296 // When increasing the stack, the old stack pointer will be written 1297 // to the new top of stack according to the PPC64 abi. 1298 // Therefore, stack banging is not necessary when increasing 1299 // the stack by <= os::vm_page_size() bytes. 1300 // When increasing the stack by a larger amount, this method is 1301 // called repeatedly to bang the intermediate pages. 1302 1303 // Stack grows down, caller passes positive offset. 1304 assert(offset > 0, "must bang with positive offset"); 1305 1306 long stdoffset = -offset; 1307 1308 if (is_simm(stdoffset, 16)) { 1309 // Signed 16 bit offset, a simple std is ok. 1310 if (UseLoadInstructionsForStackBangingPPC64) { 1311 ld(R0, (int)(signed short)stdoffset, R1_SP); 1312 } else { 1313 std(R0,(int)(signed short)stdoffset, R1_SP); 1314 } 1315 } else if (is_simm(stdoffset, 31)) { 1316 const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset); 1317 const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset); 1318 1319 Register tmp = R11; 1320 addis(tmp, R1_SP, hi); 1321 if (UseLoadInstructionsForStackBangingPPC64) { 1322 ld(R0, lo, tmp); 1323 } else { 1324 std(R0, lo, tmp); 1325 } 1326 } else { 1327 ShouldNotReachHere(); 1328 } 1329 } 1330 1331 // If instruction is a stack bang of the form 1332 // std R0, x(Ry), (see bang_stack_with_offset()) 1333 // stdu R1_SP, x(R1_SP), (see push_frame(), resize_frame()) 1334 // or stdux R1_SP, Rx, R1_SP (see push_frame(), resize_frame()) 1335 // return the banged address. Otherwise, return 0. 1336 address MacroAssembler::get_stack_bang_address(int instruction, void *ucontext) { 1337 #ifdef LINUX 1338 ucontext_t* uc = (ucontext_t*) ucontext; 1339 int rs = inv_rs_field(instruction); 1340 int ra = inv_ra_field(instruction); 1341 if ( (is_ld(instruction) && rs == 0 && UseLoadInstructionsForStackBangingPPC64) 1342 || (is_std(instruction) && rs == 0 && !UseLoadInstructionsForStackBangingPPC64) 1343 || (is_stdu(instruction) && rs == 1)) { 1344 int ds = inv_ds_field(instruction); 1345 // return banged address 1346 return ds+(address)uc->uc_mcontext.regs->gpr[ra]; 1347 } else if (is_stdux(instruction) && rs == 1) { 1348 int rb = inv_rb_field(instruction); 1349 address sp = (address)uc->uc_mcontext.regs->gpr[1]; 1350 long rb_val = (long)uc->uc_mcontext.regs->gpr[rb]; 1351 return ra != 1 || rb_val >= 0 ? NULL // not a stack bang 1352 : sp + rb_val; // banged address 1353 } 1354 return NULL; // not a stack bang 1355 #else 1356 // workaround not needed on !LINUX :-) 1357 ShouldNotCallThis(); 1358 return NULL; 1359 #endif 1360 } 1361 1362 // CmpxchgX sets condition register to cmpX(current, compare). 1363 void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_value, 1364 Register compare_value, Register exchange_value, 1365 Register addr_base, int semantics, bool cmpxchgx_hint, 1366 Register int_flag_success, bool contention_hint) { 1367 Label retry; 1368 Label failed; 1369 Label done; 1370 1371 // Save one branch if result is returned via register and 1372 // result register is different from the other ones. 1373 bool use_result_reg = (int_flag_success != noreg); 1374 bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value && 1375 int_flag_success != exchange_value && int_flag_success != addr_base); 1376 1377 // release/fence semantics 1378 if (semantics & MemBarRel) { 1379 release(); 1380 } 1381 1382 if (use_result_reg && preset_result_reg) { 1383 li(int_flag_success, 0); // preset (assume cas failed) 1384 } 1385 1386 // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). 1387 if (contention_hint) { // Don't try to reserve if cmp fails. 1388 lwz(dest_current_value, 0, addr_base); 1389 cmpw(flag, dest_current_value, compare_value); 1390 bne(flag, failed); 1391 } 1392 1393 // atomic emulation loop 1394 bind(retry); 1395 1396 lwarx(dest_current_value, addr_base, cmpxchgx_hint); 1397 cmpw(flag, dest_current_value, compare_value); 1398 if (UseStaticBranchPredictionInCompareAndSwapPPC64) { 1399 bne_predict_not_taken(flag, failed); 1400 } else { 1401 bne( flag, failed); 1402 } 1403 // branch to done => (flag == ne), (dest_current_value != compare_value) 1404 // fall through => (flag == eq), (dest_current_value == compare_value) 1405 1406 stwcx_(exchange_value, addr_base); 1407 if (UseStaticBranchPredictionInCompareAndSwapPPC64) { 1408 bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0. 1409 } else { 1410 bne( CCR0, retry); // StXcx_ sets CCR0. 1411 } 1412 // fall through => (flag == eq), (dest_current_value == compare_value), (swapped) 1413 1414 // Result in register (must do this at the end because int_flag_success can be the 1415 // same register as one above). 1416 if (use_result_reg) { 1417 li(int_flag_success, 1); 1418 } 1419 1420 if (semantics & MemBarFenceAfter) { 1421 fence(); 1422 } else if (semantics & MemBarAcq) { 1423 isync(); 1424 } 1425 1426 if (use_result_reg && !preset_result_reg) { 1427 b(done); 1428 } 1429 1430 bind(failed); 1431 if (use_result_reg && !preset_result_reg) { 1432 li(int_flag_success, 0); 1433 } 1434 1435 bind(done); 1436 // (flag == ne) => (dest_current_value != compare_value), (!swapped) 1437 // (flag == eq) => (dest_current_value == compare_value), ( swapped) 1438 } 1439 1440 // Preforms atomic compare exchange: 1441 // if (compare_value == *addr_base) 1442 // *addr_base = exchange_value 1443 // int_flag_success = 1; 1444 // else 1445 // int_flag_success = 0; 1446 // 1447 // ConditionRegister flag = cmp(compare_value, *addr_base) 1448 // Register dest_current_value = *addr_base 1449 // Register compare_value Used to compare with value in memory 1450 // Register exchange_value Written to memory if compare_value == *addr_base 1451 // Register addr_base The memory location to compareXChange 1452 // Register int_flag_success Set to 1 if exchange_value was written to *addr_base 1453 // 1454 // To avoid the costly compare exchange the value is tested beforehand. 1455 // Several special cases exist to avoid that unnecessary information is generated. 1456 // 1457 void MacroAssembler::cmpxchgd(ConditionRegister flag, 1458 Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, 1459 Register addr_base, int semantics, bool cmpxchgx_hint, 1460 Register int_flag_success, Label* failed_ext, bool contention_hint) { 1461 Label retry; 1462 Label failed_int; 1463 Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int; 1464 Label done; 1465 1466 // Save one branch if result is returned via register and result register is different from the other ones. 1467 bool use_result_reg = (int_flag_success!=noreg); 1468 bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() && 1469 int_flag_success!=exchange_value && int_flag_success!=addr_base); 1470 assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both"); 1471 1472 // release/fence semantics 1473 if (semantics & MemBarRel) { 1474 release(); 1475 } 1476 1477 if (use_result_reg && preset_result_reg) { 1478 li(int_flag_success, 0); // preset (assume cas failed) 1479 } 1480 1481 // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). 1482 if (contention_hint) { // Don't try to reserve if cmp fails. 1483 ld(dest_current_value, 0, addr_base); 1484 cmpd(flag, compare_value, dest_current_value); 1485 bne(flag, failed); 1486 } 1487 1488 // atomic emulation loop 1489 bind(retry); 1490 1491 ldarx(dest_current_value, addr_base, cmpxchgx_hint); 1492 cmpd(flag, compare_value, dest_current_value); 1493 if (UseStaticBranchPredictionInCompareAndSwapPPC64) { 1494 bne_predict_not_taken(flag, failed); 1495 } else { 1496 bne( flag, failed); 1497 } 1498 1499 stdcx_(exchange_value, addr_base); 1500 if (UseStaticBranchPredictionInCompareAndSwapPPC64) { 1501 bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0 1502 } else { 1503 bne( CCR0, retry); // stXcx_ sets CCR0 1504 } 1505 1506 // result in register (must do this at the end because int_flag_success can be the same register as one above) 1507 if (use_result_reg) { 1508 li(int_flag_success, 1); 1509 } 1510 1511 // POWER6 doesn't need isync in CAS. 1512 // Always emit isync to be on the safe side. 1513 if (semantics & MemBarFenceAfter) { 1514 fence(); 1515 } else if (semantics & MemBarAcq) { 1516 isync(); 1517 } 1518 1519 if (use_result_reg && !preset_result_reg) { 1520 b(done); 1521 } 1522 1523 bind(failed_int); 1524 if (use_result_reg && !preset_result_reg) { 1525 li(int_flag_success, 0); 1526 } 1527 1528 bind(done); 1529 // (flag == ne) => (dest_current_value != compare_value), (!swapped) 1530 // (flag == eq) => (dest_current_value == compare_value), ( swapped) 1531 } 1532 1533 // Look up the method for a megamorphic invokeinterface call. 1534 // The target method is determined by <intf_klass, itable_index>. 1535 // The receiver klass is in recv_klass. 1536 // On success, the result will be in method_result, and execution falls through. 1537 // On failure, execution transfers to the given label. 1538 void MacroAssembler::lookup_interface_method(Register recv_klass, 1539 Register intf_klass, 1540 RegisterOrConstant itable_index, 1541 Register method_result, 1542 Register scan_temp, 1543 Register sethi_temp, 1544 Label& L_no_such_interface) { 1545 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 1546 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 1547 "caller must use same register for non-constant itable index as for method"); 1548 1549 // Compute start of first itableOffsetEntry (which is at the end of the vtable). 1550 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 1551 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 1552 int logMEsize = exact_log2(itableMethodEntry::size() * wordSize); 1553 int scan_step = itableOffsetEntry::size() * wordSize; 1554 int log_vte_size= exact_log2(vtableEntry::size() * wordSize); 1555 1556 lwz(scan_temp, InstanceKlass::vtable_length_offset() * wordSize, recv_klass); 1557 // %%% We should store the aligned, prescaled offset in the klassoop. 1558 // Then the next several instructions would fold away. 1559 1560 sldi(scan_temp, scan_temp, log_vte_size); 1561 addi(scan_temp, scan_temp, vtable_base); 1562 add(scan_temp, recv_klass, scan_temp); 1563 1564 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 1565 if (itable_index.is_register()) { 1566 Register itable_offset = itable_index.as_register(); 1567 sldi(itable_offset, itable_offset, logMEsize); 1568 if (itentry_off) addi(itable_offset, itable_offset, itentry_off); 1569 add(recv_klass, itable_offset, recv_klass); 1570 } else { 1571 long itable_offset = (long)itable_index.as_constant(); 1572 load_const_optimized(sethi_temp, (itable_offset<<logMEsize)+itentry_off); // static address, no relocation 1573 add(recv_klass, sethi_temp, recv_klass); 1574 } 1575 1576 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 1577 // if (scan->interface() == intf) { 1578 // result = (klass + scan->offset() + itable_index); 1579 // } 1580 // } 1581 Label search, found_method; 1582 1583 for (int peel = 1; peel >= 0; peel--) { 1584 // %%%% Could load both offset and interface in one ldx, if they were 1585 // in the opposite order. This would save a load. 1586 ld(method_result, itableOffsetEntry::interface_offset_in_bytes(), scan_temp); 1587 1588 // Check that this entry is non-null. A null entry means that 1589 // the receiver class doesn't implement the interface, and wasn't the 1590 // same as when the caller was compiled. 1591 cmpd(CCR0, method_result, intf_klass); 1592 1593 if (peel) { 1594 beq(CCR0, found_method); 1595 } else { 1596 bne(CCR0, search); 1597 // (invert the test to fall through to found_method...) 1598 } 1599 1600 if (!peel) break; 1601 1602 bind(search); 1603 1604 cmpdi(CCR0, method_result, 0); 1605 beq(CCR0, L_no_such_interface); 1606 addi(scan_temp, scan_temp, scan_step); 1607 } 1608 1609 bind(found_method); 1610 1611 // Got a hit. 1612 int ito_offset = itableOffsetEntry::offset_offset_in_bytes(); 1613 lwz(scan_temp, ito_offset, scan_temp); 1614 ldx(method_result, scan_temp, recv_klass); 1615 } 1616 1617 // virtual method calling 1618 void MacroAssembler::lookup_virtual_method(Register recv_klass, 1619 RegisterOrConstant vtable_index, 1620 Register method_result) { 1621 1622 assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg()); 1623 1624 const int base = InstanceKlass::vtable_start_offset() * wordSize; 1625 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 1626 1627 if (vtable_index.is_register()) { 1628 sldi(vtable_index.as_register(), vtable_index.as_register(), LogBytesPerWord); 1629 add(recv_klass, vtable_index.as_register(), recv_klass); 1630 } else { 1631 addi(recv_klass, recv_klass, vtable_index.as_constant() << LogBytesPerWord); 1632 } 1633 ld(R19_method, base + vtableEntry::method_offset_in_bytes(), recv_klass); 1634 } 1635 1636 /////////////////////////////////////////// subtype checking //////////////////////////////////////////// 1637 1638 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 1639 Register super_klass, 1640 Register temp1_reg, 1641 Register temp2_reg, 1642 Label& L_success, 1643 Label& L_failure) { 1644 1645 const Register check_cache_offset = temp1_reg; 1646 const Register cached_super = temp2_reg; 1647 1648 assert_different_registers(sub_klass, super_klass, check_cache_offset, cached_super); 1649 1650 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 1651 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 1652 1653 // If the pointers are equal, we are done (e.g., String[] elements). 1654 // This self-check enables sharing of secondary supertype arrays among 1655 // non-primary types such as array-of-interface. Otherwise, each such 1656 // type would need its own customized SSA. 1657 // We move this check to the front of the fast path because many 1658 // type checks are in fact trivially successful in this manner, 1659 // so we get a nicely predicted branch right at the start of the check. 1660 cmpd(CCR0, sub_klass, super_klass); 1661 beq(CCR0, L_success); 1662 1663 // Check the supertype display: 1664 lwz(check_cache_offset, sco_offset, super_klass); 1665 // The loaded value is the offset from KlassOopDesc. 1666 1667 ldx(cached_super, check_cache_offset, sub_klass); 1668 cmpd(CCR0, cached_super, super_klass); 1669 beq(CCR0, L_success); 1670 1671 // This check has worked decisively for primary supers. 1672 // Secondary supers are sought in the super_cache ('super_cache_addr'). 1673 // (Secondary supers are interfaces and very deeply nested subtypes.) 1674 // This works in the same check above because of a tricky aliasing 1675 // between the super_cache and the primary super display elements. 1676 // (The 'super_check_addr' can address either, as the case requires.) 1677 // Note that the cache is updated below if it does not help us find 1678 // what we need immediately. 1679 // So if it was a primary super, we can just fail immediately. 1680 // Otherwise, it's the slow path for us (no success at this point). 1681 1682 cmpwi(CCR0, check_cache_offset, sc_offset); 1683 bne(CCR0, L_failure); 1684 // bind(slow_path); // fallthru 1685 } 1686 1687 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 1688 Register super_klass, 1689 Register temp1_reg, 1690 Register temp2_reg, 1691 Label* L_success, 1692 Register result_reg) { 1693 const Register array_ptr = temp1_reg; // current value from cache array 1694 const Register temp = temp2_reg; 1695 1696 assert_different_registers(sub_klass, super_klass, array_ptr, temp); 1697 1698 int source_offset = in_bytes(Klass::secondary_supers_offset()); 1699 int target_offset = in_bytes(Klass::secondary_super_cache_offset()); 1700 1701 int length_offset = Array<Klass*>::length_offset_in_bytes(); 1702 int base_offset = Array<Klass*>::base_offset_in_bytes(); 1703 1704 Label hit, loop, failure, fallthru; 1705 1706 ld(array_ptr, source_offset, sub_klass); 1707 1708 //assert(4 == arrayOopDesc::length_length_in_bytes(), "precondition violated."); 1709 lwz(temp, length_offset, array_ptr); 1710 cmpwi(CCR0, temp, 0); 1711 beq(CCR0, result_reg!=noreg ? failure : fallthru); // length 0 1712 1713 mtctr(temp); // load ctr 1714 1715 bind(loop); 1716 // Oops in table are NO MORE compressed. 1717 ld(temp, base_offset, array_ptr); 1718 cmpd(CCR0, temp, super_klass); 1719 beq(CCR0, hit); 1720 addi(array_ptr, array_ptr, BytesPerWord); 1721 bdnz(loop); 1722 1723 bind(failure); 1724 if (result_reg!=noreg) li(result_reg, 1); // load non-zero result (indicates a miss) 1725 b(fallthru); 1726 1727 bind(hit); 1728 std(super_klass, target_offset, sub_klass); // save result to cache 1729 if (result_reg != noreg) li(result_reg, 0); // load zero result (indicates a hit) 1730 if (L_success != NULL) b(*L_success); 1731 1732 bind(fallthru); 1733 } 1734 1735 // Try fast path, then go to slow one if not successful 1736 void MacroAssembler::check_klass_subtype(Register sub_klass, 1737 Register super_klass, 1738 Register temp1_reg, 1739 Register temp2_reg, 1740 Label& L_success) { 1741 Label L_failure; 1742 check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, temp2_reg, L_success, L_failure); 1743 check_klass_subtype_slow_path(sub_klass, super_klass, temp1_reg, temp2_reg, &L_success); 1744 bind(L_failure); // Fallthru if not successful. 1745 } 1746 1747 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, 1748 Register temp_reg, 1749 Label& wrong_method_type) { 1750 assert_different_registers(mtype_reg, mh_reg, temp_reg); 1751 // Compare method type against that of the receiver. 1752 load_heap_oop_not_null(temp_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg), mh_reg); 1753 cmpd(CCR0, temp_reg, mtype_reg); 1754 bne(CCR0, wrong_method_type); 1755 } 1756 1757 RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot, 1758 Register temp_reg, 1759 int extra_slot_offset) { 1760 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 1761 int stackElementSize = Interpreter::stackElementSize; 1762 int offset = extra_slot_offset * stackElementSize; 1763 if (arg_slot.is_constant()) { 1764 offset += arg_slot.as_constant() * stackElementSize; 1765 return offset; 1766 } else { 1767 assert(temp_reg != noreg, "must specify"); 1768 sldi(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); 1769 if (offset != 0) 1770 addi(temp_reg, temp_reg, offset); 1771 return temp_reg; 1772 } 1773 } 1774 1775 void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj_reg, 1776 Register mark_reg, Register temp_reg, 1777 Register temp2_reg, Label& done, Label* slow_case) { 1778 assert(UseBiasedLocking, "why call this otherwise?"); 1779 1780 #ifdef ASSERT 1781 assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg); 1782 #endif 1783 1784 Label cas_label; 1785 1786 // Branch to done if fast path fails and no slow_case provided. 1787 Label *slow_case_int = (slow_case != NULL) ? slow_case : &done; 1788 1789 // Biased locking 1790 // See whether the lock is currently biased toward our thread and 1791 // whether the epoch is still valid 1792 // Note that the runtime guarantees sufficient alignment of JavaThread 1793 // pointers to allow age to be placed into low bits 1794 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, 1795 "biased locking makes assumptions about bit layout"); 1796 1797 if (PrintBiasedLockingStatistics) { 1798 load_const(temp_reg, (address) BiasedLocking::total_entry_count_addr(), temp2_reg); 1799 lwz(temp2_reg, 0, temp_reg); 1800 addi(temp2_reg, temp2_reg, 1); 1801 stw(temp2_reg, 0, temp_reg); 1802 } 1803 1804 andi(temp_reg, mark_reg, markOopDesc::biased_lock_mask_in_place); 1805 cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern); 1806 bne(cr_reg, cas_label); 1807 1808 load_klass(temp_reg, obj_reg); 1809 1810 load_const_optimized(temp2_reg, ~((int) markOopDesc::age_mask_in_place)); 1811 ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg); 1812 orr(temp_reg, R16_thread, temp_reg); 1813 xorr(temp_reg, mark_reg, temp_reg); 1814 andr(temp_reg, temp_reg, temp2_reg); 1815 cmpdi(cr_reg, temp_reg, 0); 1816 if (PrintBiasedLockingStatistics) { 1817 Label l; 1818 bne(cr_reg, l); 1819 load_const(mark_reg, (address) BiasedLocking::biased_lock_entry_count_addr()); 1820 lwz(temp2_reg, 0, mark_reg); 1821 addi(temp2_reg, temp2_reg, 1); 1822 stw(temp2_reg, 0, mark_reg); 1823 // restore mark_reg 1824 ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg); 1825 bind(l); 1826 } 1827 beq(cr_reg, done); 1828 1829 Label try_revoke_bias; 1830 Label try_rebias; 1831 1832 // At this point we know that the header has the bias pattern and 1833 // that we are not the bias owner in the current epoch. We need to 1834 // figure out more details about the state of the header in order to 1835 // know what operations can be legally performed on the object's 1836 // header. 1837 1838 // If the low three bits in the xor result aren't clear, that means 1839 // the prototype header is no longer biased and we have to revoke 1840 // the bias on this object. 1841 andi(temp2_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); 1842 cmpwi(cr_reg, temp2_reg, 0); 1843 bne(cr_reg, try_revoke_bias); 1844 1845 // Biasing is still enabled for this data type. See whether the 1846 // epoch of the current bias is still valid, meaning that the epoch 1847 // bits of the mark word are equal to the epoch bits of the 1848 // prototype header. (Note that the prototype header's epoch bits 1849 // only change at a safepoint.) If not, attempt to rebias the object 1850 // toward the current thread. Note that we must be absolutely sure 1851 // that the current epoch is invalid in order to do this because 1852 // otherwise the manipulations it performs on the mark word are 1853 // illegal. 1854 1855 int shift_amount = 64 - markOopDesc::epoch_shift; 1856 // rotate epoch bits to right (little) end and set other bits to 0 1857 // [ big part | epoch | little part ] -> [ 0..0 | epoch ] 1858 rldicl_(temp2_reg, temp_reg, shift_amount, 64 - markOopDesc::epoch_bits); 1859 // branch if epoch bits are != 0, i.e. they differ, because the epoch has been incremented 1860 bne(CCR0, try_rebias); 1861 1862 // The epoch of the current bias is still valid but we know nothing 1863 // about the owner; it might be set or it might be clear. Try to 1864 // acquire the bias of the object using an atomic operation. If this 1865 // fails we will go in to the runtime to revoke the object's bias. 1866 // Note that we first construct the presumed unbiased header so we 1867 // don't accidentally blow away another thread's valid bias. 1868 andi(mark_reg, mark_reg, (markOopDesc::biased_lock_mask_in_place | 1869 markOopDesc::age_mask_in_place | 1870 markOopDesc::epoch_mask_in_place)); 1871 orr(temp_reg, R16_thread, mark_reg); 1872 1873 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); 1874 1875 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). 1876 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, 1877 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, 1878 /*where=*/obj_reg, 1879 MacroAssembler::MemBarAcq, 1880 MacroAssembler::cmpxchgx_hint_acquire_lock(), 1881 noreg, slow_case_int); // bail out if failed 1882 1883 // If the biasing toward our thread failed, this means that 1884 // another thread succeeded in biasing it toward itself and we 1885 // need to revoke that bias. The revocation will occur in the 1886 // interpreter runtime in the slow case. 1887 if (PrintBiasedLockingStatistics) { 1888 load_const(temp_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp2_reg); 1889 lwz(temp2_reg, 0, temp_reg); 1890 addi(temp2_reg, temp2_reg, 1); 1891 stw(temp2_reg, 0, temp_reg); 1892 } 1893 b(done); 1894 1895 bind(try_rebias); 1896 // At this point we know the epoch has expired, meaning that the 1897 // current "bias owner", if any, is actually invalid. Under these 1898 // circumstances _only_, we are allowed to use the current header's 1899 // value as the comparison value when doing the cas to acquire the 1900 // bias in the current epoch. In other words, we allow transfer of 1901 // the bias from one thread to another directly in this situation. 1902 andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place); 1903 orr(temp_reg, R16_thread, temp_reg); 1904 load_klass(temp2_reg, obj_reg); 1905 ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg); 1906 orr(temp_reg, temp_reg, temp2_reg); 1907 1908 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); 1909 1910 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). 1911 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, 1912 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, 1913 /*where=*/obj_reg, 1914 MacroAssembler::MemBarAcq, 1915 MacroAssembler::cmpxchgx_hint_acquire_lock(), 1916 noreg, slow_case_int); // bail out if failed 1917 1918 // If the biasing toward our thread failed, this means that 1919 // another thread succeeded in biasing it toward itself and we 1920 // need to revoke that bias. The revocation will occur in the 1921 // interpreter runtime in the slow case. 1922 if (PrintBiasedLockingStatistics) { 1923 load_const(temp_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp2_reg); 1924 lwz(temp2_reg, 0, temp_reg); 1925 addi(temp2_reg, temp2_reg, 1); 1926 stw(temp2_reg, 0, temp_reg); 1927 } 1928 b(done); 1929 1930 bind(try_revoke_bias); 1931 // The prototype mark in the klass doesn't have the bias bit set any 1932 // more, indicating that objects of this data type are not supposed 1933 // to be biased any more. We are going to try to reset the mark of 1934 // this object to the prototype value and fall through to the 1935 // CAS-based locking scheme. Note that if our CAS fails, it means 1936 // that another thread raced us for the privilege of revoking the 1937 // bias of this particular object, so it's okay to continue in the 1938 // normal locking code. 1939 load_klass(temp_reg, obj_reg); 1940 ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg); 1941 andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place); 1942 orr(temp_reg, temp_reg, temp2_reg); 1943 1944 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); 1945 1946 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). 1947 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, 1948 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, 1949 /*where=*/obj_reg, 1950 MacroAssembler::MemBarAcq, 1951 MacroAssembler::cmpxchgx_hint_acquire_lock()); 1952 1953 // reload markOop in mark_reg before continuing with lightweight locking 1954 ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg); 1955 1956 // Fall through to the normal CAS-based lock, because no matter what 1957 // the result of the above CAS, some thread must have succeeded in 1958 // removing the bias bit from the object's header. 1959 if (PrintBiasedLockingStatistics) { 1960 Label l; 1961 bne(cr_reg, l); 1962 load_const(temp_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp2_reg); 1963 lwz(temp2_reg, 0, temp_reg); 1964 addi(temp2_reg, temp2_reg, 1); 1965 stw(temp2_reg, 0, temp_reg); 1966 bind(l); 1967 } 1968 1969 bind(cas_label); 1970 } 1971 1972 void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done) { 1973 // Check for biased locking unlock case, which is a no-op 1974 // Note: we do not have to check the thread ID for two reasons. 1975 // First, the interpreter checks for IllegalMonitorStateException at 1976 // a higher level. Second, if the bias was revoked while we held the 1977 // lock, the object could not be rebiased toward another thread, so 1978 // the bias bit would be clear. 1979 1980 ld(temp_reg, 0, mark_addr); 1981 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); 1982 1983 cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern); 1984 beq(cr_reg, done); 1985 } 1986 1987 // TM on PPC64. 1988 void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) { 1989 Label retry; 1990 bind(retry); 1991 ldarx(result, addr, /*hint*/ false); 1992 addi(result, result, simm16); 1993 stdcx_(result, addr); 1994 if (UseStaticBranchPredictionInCompareAndSwapPPC64) { 1995 bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0 1996 } else { 1997 bne( CCR0, retry); // stXcx_ sets CCR0 1998 } 1999 } 2000 2001 void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) { 2002 Label retry; 2003 bind(retry); 2004 lwarx(result, addr, /*hint*/ false); 2005 ori(result, result, uimm16); 2006 stwcx_(result, addr); 2007 if (UseStaticBranchPredictionInCompareAndSwapPPC64) { 2008 bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0 2009 } else { 2010 bne( CCR0, retry); // stXcx_ sets CCR0 2011 } 2012 } 2013 2014 #if INCLUDE_RTM_OPT 2015 2016 // Update rtm_counters based on abort status 2017 // input: abort_status 2018 // rtm_counters (RTMLockingCounters*) 2019 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) { 2020 // Mapping to keep PreciseRTMLockingStatistics similar to x86. 2021 // x86 ppc (! means inverted, ? means not the same) 2022 // 0 31 Set if abort caused by XABORT instruction. 2023 // 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set. 2024 // 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted. 2025 // 3 10 Set if an internal buffer overflowed. 2026 // 4 ?12 Set if a debug breakpoint was hit. 2027 // 5 ?32 Set if an abort occurred during execution of a nested transaction. 2028 const int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too. 2029 Assembler::tm_failure_persistent, // inverted: transient 2030 Assembler::tm_trans_cf, 2031 Assembler::tm_footprint_of, 2032 Assembler::tm_non_trans_cf, 2033 Assembler::tm_suspended}; 2034 const bool tm_failure_inv[] = {false, true, false, false, false, false}; 2035 assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!"); 2036 2037 const Register addr_Reg = R0; 2038 // Keep track of offset to where rtm_counters_Reg had pointed to. 2039 int counters_offs = RTMLockingCounters::abort_count_offset(); 2040 addi(addr_Reg, rtm_counters_Reg, counters_offs); 2041 const Register temp_Reg = rtm_counters_Reg; 2042 2043 //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically 2044 ldx(temp_Reg, addr_Reg); 2045 addi(temp_Reg, temp_Reg, 1); 2046 stdx(temp_Reg, addr_Reg); 2047 2048 if (PrintPreciseRTMLockingStatistics) { 2049 int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs; 2050 2051 //mftexasr(abort_status); done by caller 2052 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) { 2053 counters_offs += counters_offs_delta; 2054 li(temp_Reg, counters_offs_delta); // can't use addi with R0 2055 add(addr_Reg, addr_Reg, temp_Reg); // point to next counter 2056 counters_offs_delta = sizeof(uintx); 2057 2058 Label check_abort; 2059 rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0); 2060 if (tm_failure_inv[i]) { 2061 bne(CCR0, check_abort); 2062 } else { 2063 beq(CCR0, check_abort); 2064 } 2065 //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically 2066 ldx(temp_Reg, addr_Reg); 2067 addi(temp_Reg, temp_Reg, 1); 2068 stdx(temp_Reg, addr_Reg); 2069 bind(check_abort); 2070 } 2071 } 2072 li(temp_Reg, -counters_offs); // can't use addi with R0 2073 add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore 2074 } 2075 2076 // Branch if (random & (count-1) != 0), count is 2^n 2077 // tmp and CR0 are killed 2078 void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) { 2079 mftb(tmp); 2080 andi_(tmp, tmp, count-1); 2081 bne(CCR0, brLabel); 2082 } 2083 2084 // Perform abort ratio calculation, set no_rtm bit if high ratio. 2085 // input: rtm_counters_Reg (RTMLockingCounters* address) - KILLED 2086 void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg, 2087 RTMLockingCounters* rtm_counters, 2088 Metadata* method_data) { 2089 Label L_done, L_check_always_rtm1, L_check_always_rtm2; 2090 2091 if (RTMLockingCalculationDelay > 0) { 2092 // Delay calculation. 2093 ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr()); 2094 cmpdi(CCR0, rtm_counters_Reg, 0); 2095 beq(CCR0, L_done); 2096 load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload 2097 } 2098 // Abort ratio calculation only if abort_count > RTMAbortThreshold. 2099 // Aborted transactions = abort_count * 100 2100 // All transactions = total_count * RTMTotalCountIncrRate 2101 // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio) 2102 ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg); 2103 cmpdi(CCR0, R0, RTMAbortThreshold); 2104 blt(CCR0, L_check_always_rtm2); 2105 mulli(R0, R0, 100); 2106 2107 const Register tmpReg = rtm_counters_Reg; 2108 ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg); 2109 mulli(tmpReg, tmpReg, RTMTotalCountIncrRate); 2110 mulli(tmpReg, tmpReg, RTMAbortRatio); 2111 cmpd(CCR0, R0, tmpReg); 2112 blt(CCR0, L_check_always_rtm1); // jump to reload 2113 if (method_data != NULL) { 2114 // Set rtm_state to "no rtm" in MDO. 2115 // Not using a metadata relocation. Method and Class Loader are kept alive anyway. 2116 // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.) 2117 load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg); 2118 atomic_ori_int(R0, tmpReg, NoRTM); 2119 } 2120 b(L_done); 2121 2122 bind(L_check_always_rtm1); 2123 load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload 2124 bind(L_check_always_rtm2); 2125 ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg); 2126 cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate); 2127 blt(CCR0, L_done); 2128 if (method_data != NULL) { 2129 // Set rtm_state to "always rtm" in MDO. 2130 // Not using a metadata relocation. See above. 2131 load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg); 2132 atomic_ori_int(R0, tmpReg, UseRTM); 2133 } 2134 bind(L_done); 2135 } 2136 2137 // Update counters and perform abort ratio calculation. 2138 // input: abort_status_Reg 2139 void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg, 2140 RTMLockingCounters* rtm_counters, 2141 Metadata* method_data, 2142 bool profile_rtm) { 2143 2144 assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); 2145 // Update rtm counters based on state at abort. 2146 // Reads abort_status_Reg, updates flags. 2147 assert_different_registers(abort_status_Reg, temp_Reg); 2148 load_const_optimized(temp_Reg, (address)rtm_counters, R0); 2149 rtm_counters_update(abort_status_Reg, temp_Reg); 2150 if (profile_rtm) { 2151 assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); 2152 rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data); 2153 } 2154 } 2155 2156 // Retry on abort if abort's status indicates non-persistent failure. 2157 // inputs: retry_count_Reg 2158 // : abort_status_Reg 2159 // output: retry_count_Reg decremented by 1 2160 void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, 2161 Label& retryLabel, Label* checkRetry) { 2162 Label doneRetry; 2163 rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0); 2164 bne(CCR0, doneRetry); 2165 if (checkRetry) { bind(*checkRetry); } 2166 addic_(retry_count_Reg, retry_count_Reg, -1); 2167 blt(CCR0, doneRetry); 2168 smt_yield(); // Can't use wait(). No permission (SIGILL). 2169 b(retryLabel); 2170 bind(doneRetry); 2171 } 2172 2173 // Spin and retry if lock is busy. 2174 // inputs: box_Reg (monitor address) 2175 // : retry_count_Reg 2176 // output: retry_count_Reg decremented by 1 2177 // CTR is killed 2178 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) { 2179 Label SpinLoop, doneRetry; 2180 addic_(retry_count_Reg, retry_count_Reg, -1); 2181 blt(CCR0, doneRetry); 2182 li(R0, RTMSpinLoopCount); 2183 mtctr(R0); 2184 2185 bind(SpinLoop); 2186 smt_yield(); // Can't use waitrsv(). No permission (SIGILL). 2187 bdz(retryLabel); 2188 ld(R0, 0, owner_addr_Reg); 2189 cmpdi(CCR0, R0, 0); 2190 bne(CCR0, SpinLoop); 2191 b(retryLabel); 2192 2193 bind(doneRetry); 2194 } 2195 2196 // Use RTM for normal stack locks. 2197 // Input: objReg (object to lock) 2198 void MacroAssembler::rtm_stack_locking(ConditionRegister flag, 2199 Register obj, Register mark_word, Register tmp, 2200 Register retry_on_abort_count_Reg, 2201 RTMLockingCounters* stack_rtm_counters, 2202 Metadata* method_data, bool profile_rtm, 2203 Label& DONE_LABEL, Label& IsInflated) { 2204 assert(UseRTMForStackLocks, "why call this otherwise?"); 2205 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); 2206 Label L_rtm_retry, L_decrement_retry, L_on_abort; 2207 2208 if (RTMRetryCount > 0) { 2209 load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort 2210 bind(L_rtm_retry); 2211 } 2212 andi_(R0, mark_word, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased 2213 bne(CCR0, IsInflated); 2214 2215 if (PrintPreciseRTMLockingStatistics || profile_rtm) { 2216 Label L_noincrement; 2217 if (RTMTotalCountIncrRate > 1) { 2218 branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement); 2219 } 2220 assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM"); 2221 load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0); 2222 //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically 2223 ldx(mark_word, tmp); 2224 addi(mark_word, mark_word, 1); 2225 stdx(mark_word, tmp); 2226 bind(L_noincrement); 2227 } 2228 tbegin_(); 2229 beq(CCR0, L_on_abort); 2230 ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked. 2231 andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits 2232 cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked 2233 beq(flag, DONE_LABEL); // all done if unlocked 2234 2235 if (UseRTMXendForLockBusy) { 2236 tend_(); 2237 b(L_decrement_retry); 2238 } else { 2239 tabort_(); 2240 } 2241 bind(L_on_abort); 2242 const Register abort_status_Reg = tmp; 2243 mftexasr(abort_status_Reg); 2244 if (PrintPreciseRTMLockingStatistics || profile_rtm) { 2245 rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm); 2246 } 2247 ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload 2248 if (RTMRetryCount > 0) { 2249 // Retry on lock abort if abort status is not permanent. 2250 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry); 2251 } else { 2252 bind(L_decrement_retry); 2253 } 2254 } 2255 2256 // Use RTM for inflating locks 2257 // inputs: obj (object to lock) 2258 // mark_word (current header - KILLED) 2259 // boxReg (on-stack box address (displaced header location) - KILLED) 2260 void MacroAssembler::rtm_inflated_locking(ConditionRegister flag, 2261 Register obj, Register mark_word, Register boxReg, 2262 Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg, 2263 RTMLockingCounters* rtm_counters, 2264 Metadata* method_data, bool profile_rtm, 2265 Label& DONE_LABEL) { 2266 assert(UseRTMLocking, "why call this otherwise?"); 2267 Label L_rtm_retry, L_decrement_retry, L_on_abort; 2268 // Clean monitor_value bit to get valid pointer. 2269 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; 2270 2271 // Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark(). 2272 std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg); 2273 const Register tmpReg = boxReg; 2274 const Register owner_addr_Reg = mark_word; 2275 addi(owner_addr_Reg, mark_word, owner_offset); 2276 2277 if (RTMRetryCount > 0) { 2278 load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy. 2279 load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort. 2280 bind(L_rtm_retry); 2281 } 2282 if (PrintPreciseRTMLockingStatistics || profile_rtm) { 2283 Label L_noincrement; 2284 if (RTMTotalCountIncrRate > 1) { 2285 branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement); 2286 } 2287 assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); 2288 load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg); 2289 //atomic_inc_ptr(R0, tmpReg); We don't increment atomically 2290 ldx(tmpReg, R0); 2291 addi(tmpReg, tmpReg, 1); 2292 stdx(tmpReg, R0); 2293 bind(L_noincrement); 2294 } 2295 tbegin_(); 2296 beq(CCR0, L_on_abort); 2297 // We don't reload mark word. Will only be reset at safepoint. 2298 ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked. 2299 cmpdi(flag, R0, 0); 2300 beq(flag, DONE_LABEL); 2301 2302 if (UseRTMXendForLockBusy) { 2303 tend_(); 2304 b(L_decrement_retry); 2305 } else { 2306 tabort_(); 2307 } 2308 bind(L_on_abort); 2309 const Register abort_status_Reg = tmpReg; 2310 mftexasr(abort_status_Reg); 2311 if (PrintPreciseRTMLockingStatistics || profile_rtm) { 2312 rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm); 2313 // Restore owner_addr_Reg 2314 ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); 2315 #ifdef ASSERT 2316 andi_(R0, mark_word, markOopDesc::monitor_value); 2317 asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint. 2318 #endif 2319 addi(owner_addr_Reg, mark_word, owner_offset); 2320 } 2321 if (RTMRetryCount > 0) { 2322 // Retry on lock abort if abort status is not permanent. 2323 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); 2324 } 2325 2326 // Appears unlocked - try to swing _owner from null to non-null. 2327 cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg, 2328 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, 2329 MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true); 2330 2331 if (RTMRetryCount > 0) { 2332 // success done else retry 2333 b(DONE_LABEL); 2334 bind(L_decrement_retry); 2335 // Spin and retry if lock is busy. 2336 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry); 2337 } else { 2338 bind(L_decrement_retry); 2339 } 2340 } 2341 2342 #endif // INCLUDE_RTM_OPT 2343 2344 // "The box" is the space on the stack where we copy the object mark. 2345 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, 2346 Register temp, Register displaced_header, Register current_header, 2347 bool try_bias, 2348 RTMLockingCounters* rtm_counters, 2349 RTMLockingCounters* stack_rtm_counters, 2350 Metadata* method_data, 2351 bool use_rtm, bool profile_rtm) { 2352 assert_different_registers(oop, box, temp, displaced_header, current_header); 2353 assert(flag != CCR0, "bad condition register"); 2354 Label cont; 2355 Label object_has_monitor; 2356 Label cas_failed; 2357 2358 // Load markOop from object into displaced_header. 2359 ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop); 2360 2361 2362 // Always do locking in runtime. 2363 if (EmitSync & 0x01) { 2364 cmpdi(flag, oop, 0); // Oop can't be 0 here => always false. 2365 return; 2366 } 2367 2368 if (try_bias) { 2369 biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont); 2370 } 2371 2372 #if INCLUDE_RTM_OPT 2373 if (UseRTMForStackLocks && use_rtm) { 2374 rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header, 2375 stack_rtm_counters, method_data, profile_rtm, 2376 cont, object_has_monitor); 2377 } 2378 #endif // INCLUDE_RTM_OPT 2379 2380 // Handle existing monitor. 2381 if ((EmitSync & 0x02) == 0) { 2382 // The object has an existing monitor iff (mark & monitor_value) != 0. 2383 andi_(temp, displaced_header, markOopDesc::monitor_value); 2384 bne(CCR0, object_has_monitor); 2385 } 2386 2387 // Set displaced_header to be (markOop of object | UNLOCK_VALUE). 2388 ori(displaced_header, displaced_header, markOopDesc::unlocked_value); 2389 2390 // Load Compare Value application register. 2391 2392 // Initialize the box. (Must happen before we update the object mark!) 2393 std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box); 2394 2395 // Must fence, otherwise, preceding store(s) may float below cmpxchg. 2396 // Compare object markOop with mark and if equal exchange scratch1 with object markOop. 2397 // CmpxchgX sets cr_reg to cmpX(current, displaced). 2398 membar(Assembler::StoreStore); 2399 cmpxchgd(/*flag=*/flag, 2400 /*current_value=*/current_header, 2401 /*compare_value=*/displaced_header, 2402 /*exchange_value=*/box, 2403 /*where=*/oop, 2404 MacroAssembler::MemBarAcq, 2405 MacroAssembler::cmpxchgx_hint_acquire_lock(), 2406 noreg, 2407 &cas_failed); 2408 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); 2409 2410 // If the compare-and-exchange succeeded, then we found an unlocked 2411 // object and we have now locked it. 2412 b(cont); 2413 2414 bind(cas_failed); 2415 // We did not see an unlocked object so try the fast recursive case. 2416 2417 // Check if the owner is self by comparing the value in the markOop of object 2418 // (current_header) with the stack pointer. 2419 sub(current_header, current_header, R1_SP); 2420 load_const_optimized(temp, (address) (~(os::vm_page_size()-1) | 2421 markOopDesc::lock_mask_in_place)); 2422 2423 and_(R0/*==0?*/, current_header, temp); 2424 // If condition is true we are cont and hence we can store 0 as the 2425 // displaced header in the box, which indicates that it is a recursive lock. 2426 mcrf(flag,CCR0); 2427 std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box); 2428 2429 // Handle existing monitor. 2430 if ((EmitSync & 0x02) == 0) { 2431 b(cont); 2432 2433 bind(object_has_monitor); 2434 // The object's monitor m is unlocked iff m->owner == NULL, 2435 // otherwise m->owner may contain a thread or a stack address. 2436 2437 #if INCLUDE_RTM_OPT 2438 // Use the same RTM locking code in 32- and 64-bit VM. 2439 if (use_rtm) { 2440 rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header, 2441 rtm_counters, method_data, profile_rtm, cont); 2442 } else { 2443 #endif // INCLUDE_RTM_OPT 2444 2445 // Try to CAS m->owner from NULL to current thread. 2446 addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value); 2447 li(displaced_header, 0); 2448 // CmpxchgX sets flag to cmpX(current, displaced). 2449 cmpxchgd(/*flag=*/flag, 2450 /*current_value=*/current_header, 2451 /*compare_value=*/(intptr_t)0, 2452 /*exchange_value=*/R16_thread, 2453 /*where=*/temp, 2454 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, 2455 MacroAssembler::cmpxchgx_hint_acquire_lock()); 2456 2457 // Store a non-null value into the box. 2458 std(box, BasicLock::displaced_header_offset_in_bytes(), box); 2459 2460 # ifdef ASSERT 2461 bne(flag, cont); 2462 // We have acquired the monitor, check some invariants. 2463 addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes()); 2464 // Invariant 1: _recursions should be 0. 2465 //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size"); 2466 asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp, 2467 "monitor->_recursions should be 0", -1); 2468 // Invariant 2: OwnerIsThread shouldn't be 0. 2469 //assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size"); 2470 //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp, 2471 // "monitor->OwnerIsThread shouldn't be 0", -1); 2472 # endif 2473 2474 #if INCLUDE_RTM_OPT 2475 } // use_rtm() 2476 #endif 2477 } 2478 2479 bind(cont); 2480 // flag == EQ indicates success 2481 // flag == NE indicates failure 2482 } 2483 2484 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, 2485 Register temp, Register displaced_header, Register current_header, 2486 bool try_bias, bool use_rtm) { 2487 assert_different_registers(oop, box, temp, displaced_header, current_header); 2488 assert(flag != CCR0, "bad condition register"); 2489 Label cont; 2490 Label object_has_monitor; 2491 2492 // Always do locking in runtime. 2493 if (EmitSync & 0x01) { 2494 cmpdi(flag, oop, 0); // Oop can't be 0 here => always false. 2495 return; 2496 } 2497 2498 if (try_bias) { 2499 biased_locking_exit(flag, oop, current_header, cont); 2500 } 2501 2502 #if INCLUDE_RTM_OPT 2503 if (UseRTMForStackLocks && use_rtm) { 2504 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); 2505 Label L_regular_unlock; 2506 ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword 2507 andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits 2508 cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked 2509 bne(flag, L_regular_unlock); // else RegularLock 2510 tend_(); // otherwise end... 2511 b(cont); // ... and we're done 2512 bind(L_regular_unlock); 2513 } 2514 #endif 2515 2516 // Find the lock address and load the displaced header from the stack. 2517 ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box); 2518 2519 // If the displaced header is 0, we have a recursive unlock. 2520 cmpdi(flag, displaced_header, 0); 2521 beq(flag, cont); 2522 2523 // Handle existing monitor. 2524 if ((EmitSync & 0x02) == 0) { 2525 // The object has an existing monitor iff (mark & monitor_value) != 0. 2526 RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done 2527 ld(current_header, oopDesc::mark_offset_in_bytes(), oop); 2528 andi_(R0, current_header, markOopDesc::monitor_value); 2529 bne(CCR0, object_has_monitor); 2530 } 2531 2532 // Check if it is still a light weight lock, this is is true if we see 2533 // the stack address of the basicLock in the markOop of the object. 2534 // Cmpxchg sets flag to cmpd(current_header, box). 2535 cmpxchgd(/*flag=*/flag, 2536 /*current_value=*/current_header, 2537 /*compare_value=*/box, 2538 /*exchange_value=*/displaced_header, 2539 /*where=*/oop, 2540 MacroAssembler::MemBarRel, 2541 MacroAssembler::cmpxchgx_hint_release_lock(), 2542 noreg, 2543 &cont); 2544 2545 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); 2546 2547 // Handle existing monitor. 2548 if ((EmitSync & 0x02) == 0) { 2549 b(cont); 2550 2551 bind(object_has_monitor); 2552 addi(current_header, current_header, -markOopDesc::monitor_value); // monitor 2553 ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header); 2554 2555 // It's inflated. 2556 #if INCLUDE_RTM_OPT 2557 if (use_rtm) { 2558 Label L_regular_inflated_unlock; 2559 // Clean monitor_value bit to get valid pointer 2560 cmpdi(flag, temp, 0); 2561 bne(flag, L_regular_inflated_unlock); 2562 tend_(); 2563 b(cont); 2564 bind(L_regular_inflated_unlock); 2565 } 2566 #endif 2567 2568 ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header); 2569 xorr(temp, R16_thread, temp); // Will be 0 if we are the owner. 2570 orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions. 2571 cmpdi(flag, temp, 0); 2572 bne(flag, cont); 2573 2574 ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header); 2575 ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header); 2576 orr(temp, temp, displaced_header); // Will be 0 if both are 0. 2577 cmpdi(flag, temp, 0); 2578 bne(flag, cont); 2579 release(); 2580 std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header); 2581 } 2582 2583 bind(cont); 2584 // flag == EQ indicates success 2585 // flag == NE indicates failure 2586 } 2587 2588 // Write serialization page so VM thread can do a pseudo remote membar. 2589 // We use the current thread pointer to calculate a thread specific 2590 // offset to write to within the page. This minimizes bus traffic 2591 // due to cache line collision. 2592 void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { 2593 srdi(tmp2, thread, os::get_serialize_page_shift_count()); 2594 2595 int mask = os::vm_page_size() - sizeof(int); 2596 if (Assembler::is_simm(mask, 16)) { 2597 andi(tmp2, tmp2, mask); 2598 } else { 2599 lis(tmp1, (int)((signed short) (mask >> 16))); 2600 ori(tmp1, tmp1, mask & 0x0000ffff); 2601 andr(tmp2, tmp2, tmp1); 2602 } 2603 2604 load_const(tmp1, (long) os::get_memory_serialize_page()); 2605 release(); 2606 stwx(R0, tmp1, tmp2); 2607 } 2608 2609 2610 // GC barrier helper macros 2611 2612 // Write the card table byte if needed. 2613 void MacroAssembler::card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp) { 2614 CardTableModRefBS* bs = 2615 barrier_set_cast<CardTableModRefBS>(Universe::heap()->barrier_set()); 2616 assert(bs->kind() == BarrierSet::CardTableModRef || 2617 bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); 2618 #ifdef ASSERT 2619 cmpdi(CCR0, Rnew_val, 0); 2620 asm_assert_ne("null oop not allowed", 0x321); 2621 #endif 2622 card_table_write(bs->byte_map_base, Rtmp, Rstore_addr); 2623 } 2624 2625 // Write the card table byte. 2626 void MacroAssembler::card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj) { 2627 assert_different_registers(Robj, Rtmp, R0); 2628 load_const_optimized(Rtmp, (address)byte_map_base, R0); 2629 srdi(Robj, Robj, CardTableModRefBS::card_shift); 2630 li(R0, 0); // dirty 2631 if (UseConcMarkSweepGC) membar(Assembler::StoreStore); 2632 stbx(R0, Rtmp, Robj); 2633 } 2634 2635 #if INCLUDE_ALL_GCS 2636 // General G1 pre-barrier generator. 2637 // Goal: record the previous value if it is not null. 2638 void MacroAssembler::g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val, 2639 Register Rtmp1, Register Rtmp2, bool needs_frame) { 2640 Label runtime, filtered; 2641 2642 // Is marking active? 2643 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 2644 lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread); 2645 } else { 2646 guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 2647 lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread); 2648 } 2649 cmpdi(CCR0, Rtmp1, 0); 2650 beq(CCR0, filtered); 2651 2652 // Do we need to load the previous value? 2653 if (Robj != noreg) { 2654 // Load the previous value... 2655 if (UseCompressedOops) { 2656 lwz(Rpre_val, offset, Robj); 2657 } else { 2658 ld(Rpre_val, offset, Robj); 2659 } 2660 // Previous value has been loaded into Rpre_val. 2661 } 2662 assert(Rpre_val != noreg, "must have a real register"); 2663 2664 // Is the previous value null? 2665 cmpdi(CCR0, Rpre_val, 0); 2666 beq(CCR0, filtered); 2667 2668 if (Robj != noreg && UseCompressedOops) { 2669 decode_heap_oop_not_null(Rpre_val); 2670 } 2671 2672 // OK, it's not filtered, so we'll need to call enqueue. In the normal 2673 // case, pre_val will be a scratch G-reg, but there are some cases in 2674 // which it's an O-reg. In the first case, do a normal call. In the 2675 // latter, do a save here and call the frameless version. 2676 2677 // Can we store original value in the thread's buffer? 2678 // Is index == 0? 2679 // (The index field is typed as size_t.) 2680 const Register Rbuffer = Rtmp1, Rindex = Rtmp2; 2681 2682 ld(Rindex, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); 2683 cmpdi(CCR0, Rindex, 0); 2684 beq(CCR0, runtime); // If index == 0, goto runtime. 2685 ld(Rbuffer, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_buf()), R16_thread); 2686 2687 addi(Rindex, Rindex, -wordSize); // Decrement index. 2688 std(Rindex, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); 2689 2690 // Record the previous value. 2691 stdx(Rpre_val, Rbuffer, Rindex); 2692 b(filtered); 2693 2694 bind(runtime); 2695 2696 // VM call need frame to access(write) O register. 2697 if (needs_frame) { 2698 save_LR_CR(Rtmp1); 2699 push_frame_reg_args(0, Rtmp2); 2700 } 2701 2702 if (Rpre_val->is_volatile() && Robj == noreg) mr(R31, Rpre_val); // Save pre_val across C call if it was preloaded. 2703 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_val, R16_thread); 2704 if (Rpre_val->is_volatile() && Robj == noreg) mr(Rpre_val, R31); // restore 2705 2706 if (needs_frame) { 2707 pop_frame(); 2708 restore_LR_CR(Rtmp1); 2709 } 2710 2711 bind(filtered); 2712 } 2713 2714 // General G1 post-barrier generator 2715 // Store cross-region card. 2716 void MacroAssembler::g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1, Register Rtmp2, Register Rtmp3, Label *filtered_ext) { 2717 Label runtime, filtered_int; 2718 Label& filtered = (filtered_ext != NULL) ? *filtered_ext : filtered_int; 2719 assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); 2720 2721 G1SATBCardTableLoggingModRefBS* bs = 2722 barrier_set_cast<G1SATBCardTableLoggingModRefBS>(Universe::heap()->barrier_set()); 2723 2724 // Does store cross heap regions? 2725 if (G1RSBarrierRegionFilter) { 2726 xorr(Rtmp1, Rstore_addr, Rnew_val); 2727 srdi_(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes); 2728 beq(CCR0, filtered); 2729 } 2730 2731 // Crosses regions, storing NULL? 2732 #ifdef ASSERT 2733 cmpdi(CCR0, Rnew_val, 0); 2734 asm_assert_ne("null oop not allowed (G1)", 0x322); // Checked by caller on PPC64, so following branch is obsolete: 2735 //beq(CCR0, filtered); 2736 #endif 2737 2738 // Storing region crossing non-NULL, is card already dirty? 2739 assert(sizeof(*bs->byte_map_base) == sizeof(jbyte), "adjust this code"); 2740 const Register Rcard_addr = Rtmp1; 2741 Register Rbase = Rtmp2; 2742 load_const_optimized(Rbase, (address)bs->byte_map_base, /*temp*/ Rtmp3); 2743 2744 srdi(Rcard_addr, Rstore_addr, CardTableModRefBS::card_shift); 2745 2746 // Get the address of the card. 2747 lbzx(/*card value*/ Rtmp3, Rbase, Rcard_addr); 2748 cmpwi(CCR0, Rtmp3, (int)G1SATBCardTableModRefBS::g1_young_card_val()); 2749 beq(CCR0, filtered); 2750 2751 membar(Assembler::StoreLoad); 2752 lbzx(/*card value*/ Rtmp3, Rbase, Rcard_addr); // Reload after membar. 2753 cmpwi(CCR0, Rtmp3 /* card value */, CardTableModRefBS::dirty_card_val()); 2754 beq(CCR0, filtered); 2755 2756 // Storing a region crossing, non-NULL oop, card is clean. 2757 // Dirty card and log. 2758 li(Rtmp3, CardTableModRefBS::dirty_card_val()); 2759 //release(); // G1: oops are allowed to get visible after dirty marking. 2760 stbx(Rtmp3, Rbase, Rcard_addr); 2761 2762 add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued. 2763 Rbase = noreg; // end of lifetime 2764 2765 const Register Rqueue_index = Rtmp2, 2766 Rqueue_buf = Rtmp3; 2767 ld(Rqueue_index, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); 2768 cmpdi(CCR0, Rqueue_index, 0); 2769 beq(CCR0, runtime); // index == 0 then jump to runtime 2770 ld(Rqueue_buf, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf()), R16_thread); 2771 2772 addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index 2773 std(Rqueue_index, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); 2774 2775 stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card 2776 b(filtered); 2777 2778 bind(runtime); 2779 2780 // Save the live input values. 2781 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, R16_thread); 2782 2783 bind(filtered_int); 2784 } 2785 #endif // INCLUDE_ALL_GCS 2786 2787 // Values for last_Java_pc, and last_Java_sp must comply to the rules 2788 // in frame_ppc.hpp. 2789 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc) { 2790 // Always set last_Java_pc and flags first because once last_Java_sp 2791 // is visible has_last_Java_frame is true and users will look at the 2792 // rest of the fields. (Note: flags should always be zero before we 2793 // get here so doesn't need to be set.) 2794 2795 // Verify that last_Java_pc was zeroed on return to Java 2796 asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), R16_thread, 2797 "last_Java_pc not zeroed before leaving Java", 0x200); 2798 2799 // When returning from calling out from Java mode the frame anchor's 2800 // last_Java_pc will always be set to NULL. It is set here so that 2801 // if we are doing a call to native (not VM) that we capture the 2802 // known pc and don't have to rely on the native call having a 2803 // standard frame linkage where we can find the pc. 2804 if (last_Java_pc != noreg) 2805 std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); 2806 2807 // Set last_Java_sp last. 2808 std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); 2809 } 2810 2811 void MacroAssembler::reset_last_Java_frame(void) { 2812 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), 2813 R16_thread, "SP was not set, still zero", 0x202); 2814 2815 BLOCK_COMMENT("reset_last_Java_frame {"); 2816 li(R0, 0); 2817 2818 // _last_Java_sp = 0 2819 std(R0, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); 2820 2821 // _last_Java_pc = 0 2822 std(R0, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); 2823 BLOCK_COMMENT("} reset_last_Java_frame"); 2824 } 2825 2826 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1) { 2827 assert_different_registers(sp, tmp1); 2828 2829 // sp points to a TOP_IJAVA_FRAME, retrieve frame's PC via 2830 // TOP_IJAVA_FRAME_ABI. 2831 // FIXME: assert that we really have a TOP_IJAVA_FRAME here! 2832 #ifdef CC_INTERP 2833 ld(tmp1/*pc*/, _top_ijava_frame_abi(frame_manager_lr), sp); 2834 #else 2835 address entry = pc(); 2836 load_const_optimized(tmp1, entry); 2837 #endif 2838 2839 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1); 2840 } 2841 2842 void MacroAssembler::get_vm_result(Register oop_result) { 2843 // Read: 2844 // R16_thread 2845 // R16_thread->in_bytes(JavaThread::vm_result_offset()) 2846 // 2847 // Updated: 2848 // oop_result 2849 // R16_thread->in_bytes(JavaThread::vm_result_offset()) 2850 2851 verify_thread(); 2852 2853 ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread); 2854 li(R0, 0); 2855 std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread); 2856 2857 verify_oop(oop_result); 2858 } 2859 2860 void MacroAssembler::get_vm_result_2(Register metadata_result) { 2861 // Read: 2862 // R16_thread 2863 // R16_thread->in_bytes(JavaThread::vm_result_2_offset()) 2864 // 2865 // Updated: 2866 // metadata_result 2867 // R16_thread->in_bytes(JavaThread::vm_result_2_offset()) 2868 2869 ld(metadata_result, in_bytes(JavaThread::vm_result_2_offset()), R16_thread); 2870 li(R0, 0); 2871 std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread); 2872 } 2873 2874 Register MacroAssembler::encode_klass_not_null(Register dst, Register src) { 2875 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. 2876 if (Universe::narrow_klass_base() != 0) { 2877 // Use dst as temp if it is free. 2878 sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0); 2879 current = dst; 2880 } 2881 if (Universe::narrow_klass_shift() != 0) { 2882 srdi(dst, current, Universe::narrow_klass_shift()); 2883 current = dst; 2884 } 2885 return current; 2886 } 2887 2888 void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) { 2889 if (UseCompressedClassPointers) { 2890 Register compressedKlass = encode_klass_not_null(ck, klass); 2891 stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop); 2892 } else { 2893 std(klass, oopDesc::klass_offset_in_bytes(), dst_oop); 2894 } 2895 } 2896 2897 void MacroAssembler::store_klass_gap(Register dst_oop, Register val) { 2898 if (UseCompressedClassPointers) { 2899 if (val == noreg) { 2900 val = R0; 2901 li(val, 0); 2902 } 2903 stw(val, oopDesc::klass_gap_offset_in_bytes(), dst_oop); // klass gap if compressed 2904 } 2905 } 2906 2907 int MacroAssembler::instr_size_for_decode_klass_not_null() { 2908 if (!UseCompressedClassPointers) return 0; 2909 int num_instrs = 1; // shift or move 2910 if (Universe::narrow_klass_base() != 0) num_instrs = 7; // shift + load const + add 2911 return num_instrs * BytesPerInstWord; 2912 } 2913 2914 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 2915 assert(dst != R0, "Dst reg may not be R0, as R0 is used here."); 2916 if (src == noreg) src = dst; 2917 Register shifted_src = src; 2918 if (Universe::narrow_klass_shift() != 0 || 2919 Universe::narrow_klass_base() == 0 && src != dst) { // Move required. 2920 shifted_src = dst; 2921 sldi(shifted_src, src, Universe::narrow_klass_shift()); 2922 } 2923 if (Universe::narrow_klass_base() != 0) { 2924 add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0); 2925 } 2926 } 2927 2928 void MacroAssembler::load_klass(Register dst, Register src) { 2929 if (UseCompressedClassPointers) { 2930 lwz(dst, oopDesc::klass_offset_in_bytes(), src); 2931 // Attention: no null check here! 2932 decode_klass_not_null(dst, dst); 2933 } else { 2934 ld(dst, oopDesc::klass_offset_in_bytes(), src); 2935 } 2936 } 2937 2938 void MacroAssembler::load_klass_with_trap_null_check(Register dst, Register src) { 2939 if (!os::zero_page_read_protected()) { 2940 if (TrapBasedNullChecks) { 2941 trap_null_check(src); 2942 } 2943 } 2944 load_klass(dst, src); 2945 } 2946 2947 void MacroAssembler::reinit_heapbase(Register d, Register tmp) { 2948 if (Universe::heap() != NULL) { 2949 load_const_optimized(R30, Universe::narrow_ptrs_base(), tmp); 2950 } else { 2951 // Heap not yet allocated. Load indirectly. 2952 int simm16_offset = load_const_optimized(R30, Universe::narrow_ptrs_base_addr(), tmp, true); 2953 ld(R30, simm16_offset, R30); 2954 } 2955 } 2956 2957 // Clear Array 2958 // Kills both input registers. tmp == R0 is allowed. 2959 void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) { 2960 // Procedure for large arrays (uses data cache block zero instruction). 2961 Label startloop, fast, fastloop, small_rest, restloop, done; 2962 const int cl_size = VM_Version::get_cache_line_size(), 2963 cl_dwords = cl_size>>3, 2964 cl_dw_addr_bits = exact_log2(cl_dwords), 2965 dcbz_min = 1; // Min count of dcbz executions, needs to be >0. 2966 2967 //2: 2968 cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included). 2969 blt(CCR1, small_rest); // Too small. 2970 rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line. 2971 beq(CCR0, fast); // Already 128byte aligned. 2972 2973 subfic(tmp, tmp, cl_dwords); 2974 mtctr(tmp); // Set ctr to hit 128byte boundary (0<ctr<cl_dwords). 2975 subf(cnt_dwords, tmp, cnt_dwords); // rest. 2976 li(tmp, 0); 2977 //10: 2978 bind(startloop); // Clear at the beginning to reach 128byte boundary. 2979 std(tmp, 0, base_ptr); // Clear 8byte aligned block. 2980 addi(base_ptr, base_ptr, 8); 2981 bdnz(startloop); 2982 //13: 2983 bind(fast); // Clear 128byte blocks. 2984 srdi(tmp, cnt_dwords, cl_dw_addr_bits); // Loop count for 128byte loop (>0). 2985 andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords. 2986 mtctr(tmp); // Load counter. 2987 //16: 2988 bind(fastloop); 2989 dcbz(base_ptr); // Clear 128byte aligned block. 2990 addi(base_ptr, base_ptr, cl_size); 2991 bdnz(fastloop); 2992 if (InsertEndGroupPPC64) { endgroup(); } else { nop(); } 2993 //20: 2994 bind(small_rest); 2995 cmpdi(CCR0, cnt_dwords, 0); // size 0? 2996 beq(CCR0, done); // rest == 0 2997 li(tmp, 0); 2998 mtctr(cnt_dwords); // Load counter. 2999 //24: 3000 bind(restloop); // Clear rest. 3001 std(tmp, 0, base_ptr); // Clear 8byte aligned block. 3002 addi(base_ptr, base_ptr, 8); 3003 bdnz(restloop); 3004 //27: 3005 bind(done); 3006 } 3007 3008 /////////////////////////////////////////// String intrinsics //////////////////////////////////////////// 3009 3010 // Search for a single jchar in an jchar[]. 3011 // 3012 // Assumes that result differs from all other registers. 3013 // 3014 // Haystack, needle are the addresses of jchar-arrays. 3015 // NeedleChar is needle[0] if it is known at compile time. 3016 // Haycnt is the length of the haystack. We assume haycnt >=1. 3017 // 3018 // Preserves haystack, haycnt, kills all other registers. 3019 // 3020 // If needle == R0, we search for the constant needleChar. 3021 void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt, 3022 Register needle, jchar needleChar, 3023 Register tmp1, Register tmp2) { 3024 3025 assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2); 3026 3027 Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End; 3028 Register needle0 = needle, // Contains needle[0]. 3029 addr = tmp1, 3030 ch1 = tmp2, 3031 ch2 = R0; 3032 3033 //2 (variable) or 3 (const): 3034 if (needle != R0) lhz(needle0, 0, needle); // Preload needle character, needle has len==1. 3035 dcbtct(haystack, 0x00); // Indicate R/O access to haystack. 3036 3037 srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR). 3038 mr(addr, haystack); 3039 beq(CCR0, L_FinalCheck); 3040 mtctr(tmp2); // Move to count register. 3041 //8: 3042 bind(L_InnerLoop); // Main work horse (2x unrolled search loop). 3043 lhz(ch1, 0, addr); // Load characters from haystack. 3044 lhz(ch2, 2, addr); 3045 (needle != R0) ? cmpw(CCR0, ch1, needle0) : cmplwi(CCR0, ch1, needleChar); 3046 (needle != R0) ? cmpw(CCR1, ch2, needle0) : cmplwi(CCR1, ch2, needleChar); 3047 beq(CCR0, L_Found1); // Did we find the needle? 3048 beq(CCR1, L_Found2); 3049 addi(addr, addr, 4); 3050 bdnz(L_InnerLoop); 3051 //16: 3052 bind(L_FinalCheck); 3053 andi_(R0, haycnt, 1); 3054 beq(CCR0, L_NotFound); 3055 lhz(ch1, 0, addr); // One position left at which we have to compare. 3056 (needle != R0) ? cmpw(CCR1, ch1, needle0) : cmplwi(CCR1, ch1, needleChar); 3057 beq(CCR1, L_Found3); 3058 //21: 3059 bind(L_NotFound); 3060 li(result, -1); // Not found. 3061 b(L_End); 3062 3063 bind(L_Found2); 3064 addi(addr, addr, 2); 3065 //24: 3066 bind(L_Found1); 3067 bind(L_Found3); // Return index ... 3068 subf(addr, haystack, addr); // relative to haystack, 3069 srdi(result, addr, 1); // in characters. 3070 bind(L_End); 3071 } 3072 3073 3074 // Implementation of IndexOf for jchar arrays. 3075 // 3076 // The length of haystack and needle are not constant, i.e. passed in a register. 3077 // 3078 // Preserves registers haystack, needle. 3079 // Kills registers haycnt, needlecnt. 3080 // Assumes that result differs from all other registers. 3081 // Haystack, needle are the addresses of jchar-arrays. 3082 // Haycnt, needlecnt are the lengths of them, respectively. 3083 // 3084 // Needlecntval must be zero or 15-bit unsigned immediate and > 1. 3085 void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt, 3086 Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, 3087 Register tmp1, Register tmp2, Register tmp3, Register tmp4) { 3088 3089 // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite! 3090 Label L_TooShort, L_Found, L_NotFound, L_End; 3091 Register last_addr = haycnt, // Kill haycnt at the beginning. 3092 addr = tmp1, 3093 n_start = tmp2, 3094 ch1 = tmp3, 3095 ch2 = R0; 3096 3097 // ************************************************************************************************** 3098 // Prepare for main loop: optimized for needle count >=2, bail out otherwise. 3099 // ************************************************************************************************** 3100 3101 //1 (variable) or 3 (const): 3102 dcbtct(needle, 0x00); // Indicate R/O access to str1. 3103 dcbtct(haystack, 0x00); // Indicate R/O access to str2. 3104 3105 // Compute last haystack addr to use if no match gets found. 3106 if (needlecntval == 0) { // variable needlecnt 3107 //3: 3108 subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt. 3109 addi(addr, haystack, -2); // Accesses use pre-increment. 3110 cmpwi(CCR6, needlecnt, 2); 3111 blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately. 3112 slwi(ch1, ch1, 1); // Scale to number of bytes. 3113 lwz(n_start, 0, needle); // Load first 2 characters of needle. 3114 add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). 3115 addi(needlecnt, needlecnt, -2); // Rest of needle. 3116 } else { // constant needlecnt 3117 guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately"); 3118 assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate"); 3119 //5: 3120 addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt. 3121 lwz(n_start, 0, needle); // Load first 2 characters of needle. 3122 addi(addr, haystack, -2); // Accesses use pre-increment. 3123 slwi(ch1, ch1, 1); // Scale to number of bytes. 3124 add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). 3125 li(needlecnt, needlecntval-2); // Rest of needle. 3126 } 3127 3128 // Main Loop (now we have at least 3 characters). 3129 //11: 3130 Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2, L_Comp3; 3131 bind(L_OuterLoop); // Search for 1st 2 characters. 3132 Register addr_diff = tmp4; 3133 subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check. 3134 addi(addr, addr, 2); // This is the new address we want to use for comparing. 3135 srdi_(ch2, addr_diff, 2); 3136 beq(CCR0, L_FinalCheck); // 2 characters left? 3137 mtctr(ch2); // addr_diff/4 3138 //16: 3139 bind(L_InnerLoop); // Main work horse (2x unrolled search loop) 3140 lwz(ch1, 0, addr); // Load 2 characters of haystack (ignore alignment). 3141 lwz(ch2, 2, addr); 3142 cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop). 3143 cmpw(CCR1, ch2, n_start); 3144 beq(CCR0, L_Comp1); // Did we find the needle start? 3145 beq(CCR1, L_Comp2); 3146 addi(addr, addr, 4); 3147 bdnz(L_InnerLoop); 3148 //24: 3149 bind(L_FinalCheck); 3150 rldicl_(addr_diff, addr_diff, 64-1, 63); // Remaining characters not covered by InnerLoop: (addr_diff>>1)&1. 3151 beq(CCR0, L_NotFound); 3152 lwz(ch1, 0, addr); // One position left at which we have to compare. 3153 cmpw(CCR1, ch1, n_start); 3154 beq(CCR1, L_Comp3); 3155 //29: 3156 bind(L_NotFound); 3157 li(result, -1); // not found 3158 b(L_End); 3159 3160 3161 // ************************************************************************************************** 3162 // Special Case: unfortunately, the variable needle case can be called with needlecnt<2 3163 // ************************************************************************************************** 3164 //31: 3165 if ((needlecntval>>1) !=1 ) { // Const needlecnt is 2 or 3? Reduce code size. 3166 int nopcnt = 5; 3167 if (needlecntval !=0 ) ++nopcnt; // Balance alignment (other case: see below). 3168 if (needlecntval == 0) { // We have to handle these cases separately. 3169 Label L_OneCharLoop; 3170 bind(L_TooShort); 3171 mtctr(haycnt); 3172 lhz(n_start, 0, needle); // First character of needle 3173 bind(L_OneCharLoop); 3174 lhzu(ch1, 2, addr); 3175 cmpw(CCR1, ch1, n_start); 3176 beq(CCR1, L_Found); // Did we find the one character needle? 3177 bdnz(L_OneCharLoop); 3178 li(result, -1); // Not found. 3179 b(L_End); 3180 } // 8 instructions, so no impact on alignment. 3181 for (int x = 0; x < nopcnt; ++x) nop(); 3182 } 3183 3184 // ************************************************************************************************** 3185 // Regular Case Part II: compare rest of needle (first 2 characters have been compared already) 3186 // ************************************************************************************************** 3187 3188 // Compare the rest 3189 //36 if needlecntval==0, else 37: 3190 bind(L_Comp2); 3191 addi(addr, addr, 2); // First comparison has failed, 2nd one hit. 3192 bind(L_Comp1); // Addr points to possible needle start. 3193 bind(L_Comp3); // Could have created a copy and use a different return address but saving code size here. 3194 if (needlecntval != 2) { // Const needlecnt==2? 3195 if (needlecntval != 3) { 3196 if (needlecntval == 0) beq(CCR6, L_Found); // Variable needlecnt==2? 3197 Register ind_reg = tmp4; 3198 li(ind_reg, 2*2); // First 2 characters are already compared, use index 2. 3199 mtctr(needlecnt); // Decremented by 2, still > 0. 3200 //40: 3201 Label L_CompLoop; 3202 bind(L_CompLoop); 3203 lhzx(ch2, needle, ind_reg); 3204 lhzx(ch1, addr, ind_reg); 3205 cmpw(CCR1, ch1, ch2); 3206 bne(CCR1, L_OuterLoop); 3207 addi(ind_reg, ind_reg, 2); 3208 bdnz(L_CompLoop); 3209 } else { // No loop required if there's only one needle character left. 3210 lhz(ch2, 2*2, needle); 3211 lhz(ch1, 2*2, addr); 3212 cmpw(CCR1, ch1, ch2); 3213 bne(CCR1, L_OuterLoop); 3214 } 3215 } 3216 // Return index ... 3217 //46: 3218 bind(L_Found); 3219 subf(addr, haystack, addr); // relative to haystack, ... 3220 srdi(result, addr, 1); // in characters. 3221 //48: 3222 bind(L_End); 3223 } 3224 3225 // Implementation of Compare for jchar arrays. 3226 // 3227 // Kills the registers str1, str2, cnt1, cnt2. 3228 // Kills cr0, ctr. 3229 // Assumes that result differes from the input registers. 3230 void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg, 3231 Register result_reg, Register tmp_reg) { 3232 assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg); 3233 3234 Label Ldone, Lslow_case, Lslow_loop, Lfast_loop; 3235 Register cnt_diff = R0, 3236 limit_reg = cnt1_reg, 3237 chr1_reg = result_reg, 3238 chr2_reg = cnt2_reg, 3239 addr_diff = str2_reg; 3240 3241 // Offset 0 should be 32 byte aligned. 3242 //-4: 3243 dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. 3244 dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. 3245 //-2: 3246 // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters). 3247 subf(result_reg, cnt2_reg, cnt1_reg); // difference between cnt1/2 3248 subf_(addr_diff, str1_reg, str2_reg); // alias? 3249 beq(CCR0, Ldone); // return cnt difference if both ones are identical 3250 srawi(limit_reg, result_reg, 31); // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow) 3251 mr(cnt_diff, result_reg); 3252 andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt1<cnt2 ? cnt1-cnt2 : 0 3253 add_(limit_reg, cnt2_reg, limit_reg); // min(cnt1, cnt2)==0? 3254 beq(CCR0, Ldone); // return cnt difference if one has 0 length 3255 3256 lhz(chr1_reg, 0, str1_reg); // optional: early out if first characters mismatch 3257 lhzx(chr2_reg, str1_reg, addr_diff); // optional: early out if first characters mismatch 3258 addi(tmp_reg, limit_reg, -1); // min(cnt1, cnt2)-1 3259 subf_(result_reg, chr2_reg, chr1_reg); // optional: early out if first characters mismatch 3260 bne(CCR0, Ldone); // optional: early out if first characters mismatch 3261 3262 // Set loop counter by scaling down tmp_reg 3263 srawi_(chr2_reg, tmp_reg, exact_log2(4)); // (min(cnt1, cnt2)-1)/4 3264 ble(CCR0, Lslow_case); // need >4 characters for fast loop 3265 andi(limit_reg, tmp_reg, 4-1); // remaining characters 3266 3267 // Adapt str1_reg str2_reg for the first loop iteration 3268 mtctr(chr2_reg); // (min(cnt1, cnt2)-1)/4 3269 addi(limit_reg, limit_reg, 4+1); // compare last 5-8 characters in slow_case if mismatch found in fast_loop 3270 //16: 3271 // Compare the rest of the characters 3272 bind(Lfast_loop); 3273 ld(chr1_reg, 0, str1_reg); 3274 ldx(chr2_reg, str1_reg, addr_diff); 3275 cmpd(CCR0, chr2_reg, chr1_reg); 3276 bne(CCR0, Lslow_case); // return chr1_reg 3277 addi(str1_reg, str1_reg, 4*2); 3278 bdnz(Lfast_loop); 3279 addi(limit_reg, limit_reg, -4); // no mismatch found in fast_loop, only 1-4 characters missing 3280 //23: 3281 bind(Lslow_case); 3282 mtctr(limit_reg); 3283 //24: 3284 bind(Lslow_loop); 3285 lhz(chr1_reg, 0, str1_reg); 3286 lhzx(chr2_reg, str1_reg, addr_diff); 3287 subf_(result_reg, chr2_reg, chr1_reg); 3288 bne(CCR0, Ldone); // return chr1_reg 3289 addi(str1_reg, str1_reg, 1*2); 3290 bdnz(Lslow_loop); 3291 //30: 3292 // If strings are equal up to min length, return the length difference. 3293 mr(result_reg, cnt_diff); 3294 nop(); // alignment 3295 //32: 3296 // Otherwise, return the difference between the first mismatched chars. 3297 bind(Ldone); 3298 } 3299 3300 3301 // Compare char[] arrays. 3302 // 3303 // str1_reg USE only 3304 // str2_reg USE only 3305 // cnt_reg USE_DEF, due to tmp reg shortage 3306 // result_reg DEF only, might compromise USE only registers 3307 void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg, 3308 Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg, 3309 Register tmp5_reg) { 3310 3311 // Str1 may be the same register as str2 which can occur e.g. after scalar replacement. 3312 assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg); 3313 assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg); 3314 3315 // Offset 0 should be 32 byte aligned. 3316 Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false; 3317 Register index_reg = tmp5_reg; 3318 Register cbc_iter = tmp4_reg; 3319 3320 //-1: 3321 dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. 3322 dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. 3323 //1: 3324 andi(cbc_iter, cnt_reg, 4-1); // Remaining iterations after 4 java characters per iteration loop. 3325 li(index_reg, 0); // init 3326 li(result_reg, 0); // assume false 3327 srwi_(tmp2_reg, cnt_reg, exact_log2(4)); // Div: 4 java characters per iteration (main loop). 3328 3329 cmpwi(CCR1, cbc_iter, 0); // CCR1 = (cbc_iter==0) 3330 beq(CCR0, Linit_cbc); // too short 3331 mtctr(tmp2_reg); 3332 //8: 3333 bind(Lloop); 3334 ldx(tmp1_reg, str1_reg, index_reg); 3335 ldx(tmp2_reg, str2_reg, index_reg); 3336 cmpd(CCR0, tmp1_reg, tmp2_reg); 3337 bne(CCR0, Ldone_false); // Unequal char pair found -> done. 3338 addi(index_reg, index_reg, 4*sizeof(jchar)); 3339 bdnz(Lloop); 3340 //14: 3341 bind(Linit_cbc); 3342 beq(CCR1, Ldone_true); 3343 mtctr(cbc_iter); 3344 //16: 3345 bind(Lcbc); 3346 lhzx(tmp1_reg, str1_reg, index_reg); 3347 lhzx(tmp2_reg, str2_reg, index_reg); 3348 cmpw(CCR0, tmp1_reg, tmp2_reg); 3349 bne(CCR0, Ldone_false); // Unequal char pair found -> done. 3350 addi(index_reg, index_reg, 1*sizeof(jchar)); 3351 bdnz(Lcbc); 3352 nop(); 3353 bind(Ldone_true); 3354 li(result_reg, 1); 3355 //24: 3356 bind(Ldone_false); 3357 } 3358 3359 3360 void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg, 3361 Register tmp1_reg, Register tmp2_reg) { 3362 // Str1 may be the same register as str2 which can occur e.g. after scalar replacement. 3363 assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg); 3364 assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg); 3365 assert(sizeof(jchar) == 2, "must be"); 3366 assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate"); 3367 3368 Label Ldone_false; 3369 3370 if (cntval < 16) { // short case 3371 if (cntval != 0) li(result_reg, 0); // assume false 3372 3373 const int num_bytes = cntval*sizeof(jchar); 3374 int index = 0; 3375 for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) { 3376 ld(tmp1_reg, index, str1_reg); 3377 ld(tmp2_reg, index, str2_reg); 3378 cmpd(CCR0, tmp1_reg, tmp2_reg); 3379 bne(CCR0, Ldone_false); 3380 } 3381 if (cntval & 2) { 3382 lwz(tmp1_reg, index, str1_reg); 3383 lwz(tmp2_reg, index, str2_reg); 3384 cmpw(CCR0, tmp1_reg, tmp2_reg); 3385 bne(CCR0, Ldone_false); 3386 index += 4; 3387 } 3388 if (cntval & 1) { 3389 lhz(tmp1_reg, index, str1_reg); 3390 lhz(tmp2_reg, index, str2_reg); 3391 cmpw(CCR0, tmp1_reg, tmp2_reg); 3392 bne(CCR0, Ldone_false); 3393 } 3394 // fallthrough: true 3395 } else { 3396 Label Lloop; 3397 Register index_reg = tmp1_reg; 3398 const int loopcnt = cntval/4; 3399 assert(loopcnt > 0, "must be"); 3400 // Offset 0 should be 32 byte aligned. 3401 //2: 3402 dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. 3403 dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. 3404 li(tmp2_reg, loopcnt); 3405 li(index_reg, 0); // init 3406 li(result_reg, 0); // assume false 3407 mtctr(tmp2_reg); 3408 //8: 3409 bind(Lloop); 3410 ldx(R0, str1_reg, index_reg); 3411 ldx(tmp2_reg, str2_reg, index_reg); 3412 cmpd(CCR0, R0, tmp2_reg); 3413 bne(CCR0, Ldone_false); // Unequal char pair found -> done. 3414 addi(index_reg, index_reg, 4*sizeof(jchar)); 3415 bdnz(Lloop); 3416 //14: 3417 if (cntval & 2) { 3418 lwzx(R0, str1_reg, index_reg); 3419 lwzx(tmp2_reg, str2_reg, index_reg); 3420 cmpw(CCR0, R0, tmp2_reg); 3421 bne(CCR0, Ldone_false); 3422 if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar)); 3423 } 3424 if (cntval & 1) { 3425 lhzx(R0, str1_reg, index_reg); 3426 lhzx(tmp2_reg, str2_reg, index_reg); 3427 cmpw(CCR0, R0, tmp2_reg); 3428 bne(CCR0, Ldone_false); 3429 } 3430 // fallthru: true 3431 } 3432 li(result_reg, 1); 3433 bind(Ldone_false); 3434 } 3435 3436 // dest_lo += src1 + src2 3437 // dest_hi += carry1 + carry2 3438 void MacroAssembler::add2_with_carry(Register dest_hi, 3439 Register dest_lo, 3440 Register src1, Register src2) { 3441 li(R0, 0); 3442 addc(dest_lo, dest_lo, src1); 3443 adde(dest_hi, dest_hi, R0); 3444 addc(dest_lo, dest_lo, src2); 3445 adde(dest_hi, dest_hi, R0); 3446 } 3447 3448 // Multiply 64 bit by 64 bit first loop. 3449 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, 3450 Register x_xstart, 3451 Register y, Register y_idx, 3452 Register z, 3453 Register carry, 3454 Register product_high, Register product, 3455 Register idx, Register kdx, 3456 Register tmp) { 3457 // jlong carry, x[], y[], z[]; 3458 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { 3459 // huge_128 product = y[idx] * x[xstart] + carry; 3460 // z[kdx] = (jlong)product; 3461 // carry = (jlong)(product >>> 64); 3462 // } 3463 // z[xstart] = carry; 3464 3465 Label L_first_loop, L_first_loop_exit; 3466 Label L_one_x, L_one_y, L_multiply; 3467 3468 addic_(xstart, xstart, -1); 3469 blt(CCR0, L_one_x); // Special case: length of x is 1. 3470 3471 // Load next two integers of x. 3472 sldi(tmp, xstart, LogBytesPerInt); 3473 ldx(x_xstart, x, tmp); 3474 #ifdef VM_LITTLE_ENDIAN 3475 rldicl(x_xstart, x_xstart, 32, 0); 3476 #endif 3477 3478 align(32, 16); 3479 bind(L_first_loop); 3480 3481 cmpdi(CCR0, idx, 1); 3482 blt(CCR0, L_first_loop_exit); 3483 addi(idx, idx, -2); 3484 beq(CCR0, L_one_y); 3485 3486 // Load next two integers of y. 3487 sldi(tmp, idx, LogBytesPerInt); 3488 ldx(y_idx, y, tmp); 3489 #ifdef VM_LITTLE_ENDIAN 3490 rldicl(y_idx, y_idx, 32, 0); 3491 #endif 3492 3493 3494 bind(L_multiply); 3495 multiply64(product_high, product, x_xstart, y_idx); 3496 3497 li(tmp, 0); 3498 addc(product, product, carry); // Add carry to result. 3499 adde(product_high, product_high, tmp); // Add carry of the last addition. 3500 addi(kdx, kdx, -2); 3501 3502 // Store result. 3503 #ifdef VM_LITTLE_ENDIAN 3504 rldicl(product, product, 32, 0); 3505 #endif 3506 sldi(tmp, kdx, LogBytesPerInt); 3507 stdx(product, z, tmp); 3508 mr_if_needed(carry, product_high); 3509 b(L_first_loop); 3510 3511 3512 bind(L_one_y); // Load one 32 bit portion of y as (0,value). 3513 3514 lwz(y_idx, 0, y); 3515 b(L_multiply); 3516 3517 3518 bind( L_one_x ); // Load one 32 bit portion of x as (0,value). 3519 3520 lwz(x_xstart, 0, x); 3521 b(L_first_loop); 3522 3523 bind(L_first_loop_exit); 3524 } 3525 3526 // Multiply 64 bit by 64 bit and add 128 bit. 3527 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, 3528 Register z, Register yz_idx, 3529 Register idx, Register carry, 3530 Register product_high, Register product, 3531 Register tmp, int offset) { 3532 3533 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; 3534 // z[kdx] = (jlong)product; 3535 3536 sldi(tmp, idx, LogBytesPerInt); 3537 if ( offset ) { 3538 addi(tmp, tmp, offset); 3539 } 3540 ldx(yz_idx, y, tmp); 3541 #ifdef VM_LITTLE_ENDIAN 3542 rldicl(yz_idx, yz_idx, 32, 0); 3543 #endif 3544 3545 multiply64(product_high, product, x_xstart, yz_idx); 3546 ldx(yz_idx, z, tmp); 3547 #ifdef VM_LITTLE_ENDIAN 3548 rldicl(yz_idx, yz_idx, 32, 0); 3549 #endif 3550 3551 add2_with_carry(product_high, product, carry, yz_idx); 3552 3553 sldi(tmp, idx, LogBytesPerInt); 3554 if ( offset ) { 3555 addi(tmp, tmp, offset); 3556 } 3557 #ifdef VM_LITTLE_ENDIAN 3558 rldicl(product, product, 32, 0); 3559 #endif 3560 stdx(product, z, tmp); 3561 } 3562 3563 // Multiply 128 bit by 128 bit. Unrolled inner loop. 3564 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, 3565 Register y, Register z, 3566 Register yz_idx, Register idx, Register carry, 3567 Register product_high, Register product, 3568 Register carry2, Register tmp) { 3569 3570 // jlong carry, x[], y[], z[]; 3571 // int kdx = ystart+1; 3572 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop 3573 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; 3574 // z[kdx+idx+1] = (jlong)product; 3575 // jlong carry2 = (jlong)(product >>> 64); 3576 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; 3577 // z[kdx+idx] = (jlong)product; 3578 // carry = (jlong)(product >>> 64); 3579 // } 3580 // idx += 2; 3581 // if (idx > 0) { 3582 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; 3583 // z[kdx+idx] = (jlong)product; 3584 // carry = (jlong)(product >>> 64); 3585 // } 3586 3587 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; 3588 const Register jdx = R0; 3589 3590 // Scale the index. 3591 srdi_(jdx, idx, 2); 3592 beq(CCR0, L_third_loop_exit); 3593 mtctr(jdx); 3594 3595 align(32, 16); 3596 bind(L_third_loop); 3597 3598 addi(idx, idx, -4); 3599 3600 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product_high, product, tmp, 8); 3601 mr_if_needed(carry2, product_high); 3602 3603 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product_high, product, tmp, 0); 3604 mr_if_needed(carry, product_high); 3605 bdnz(L_third_loop); 3606 3607 bind(L_third_loop_exit); // Handle any left-over operand parts. 3608 3609 andi_(idx, idx, 0x3); 3610 beq(CCR0, L_post_third_loop_done); 3611 3612 Label L_check_1; 3613 3614 addic_(idx, idx, -2); 3615 blt(CCR0, L_check_1); 3616 3617 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product_high, product, tmp, 0); 3618 mr_if_needed(carry, product_high); 3619 3620 bind(L_check_1); 3621 3622 addi(idx, idx, 0x2); 3623 andi_(idx, idx, 0x1) ; 3624 addic_(idx, idx, -1); 3625 blt(CCR0, L_post_third_loop_done); 3626 3627 sldi(tmp, idx, LogBytesPerInt); 3628 lwzx(yz_idx, y, tmp); 3629 multiply64(product_high, product, x_xstart, yz_idx); 3630 lwzx(yz_idx, z, tmp); 3631 3632 add2_with_carry(product_high, product, yz_idx, carry); 3633 3634 sldi(tmp, idx, LogBytesPerInt); 3635 stwx(product, z, tmp); 3636 srdi(product, product, 32); 3637 3638 sldi(product_high, product_high, 32); 3639 orr(product, product, product_high); 3640 mr_if_needed(carry, product); 3641 3642 bind(L_post_third_loop_done); 3643 } // multiply_128_x_128_loop 3644 3645 void MacroAssembler::multiply_to_len(Register x, Register xlen, 3646 Register y, Register ylen, 3647 Register z, Register zlen, 3648 Register tmp1, Register tmp2, 3649 Register tmp3, Register tmp4, 3650 Register tmp5, Register tmp6, 3651 Register tmp7, Register tmp8, 3652 Register tmp9, Register tmp10, 3653 Register tmp11, Register tmp12, 3654 Register tmp13) { 3655 3656 ShortBranchVerifier sbv(this); 3657 3658 assert_different_registers(x, xlen, y, ylen, z, zlen, 3659 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); 3660 assert_different_registers(x, xlen, y, ylen, z, zlen, 3661 tmp1, tmp2, tmp3, tmp4, tmp5, tmp7); 3662 assert_different_registers(x, xlen, y, ylen, z, zlen, 3663 tmp1, tmp2, tmp3, tmp4, tmp5, tmp8); 3664 3665 const Register idx = tmp1; 3666 const Register kdx = tmp2; 3667 const Register xstart = tmp3; 3668 3669 const Register y_idx = tmp4; 3670 const Register carry = tmp5; 3671 const Register product = tmp6; 3672 const Register product_high = tmp7; 3673 const Register x_xstart = tmp8; 3674 const Register tmp = tmp9; 3675 3676 // First Loop. 3677 // 3678 // final static long LONG_MASK = 0xffffffffL; 3679 // int xstart = xlen - 1; 3680 // int ystart = ylen - 1; 3681 // long carry = 0; 3682 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { 3683 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; 3684 // z[kdx] = (int)product; 3685 // carry = product >>> 32; 3686 // } 3687 // z[xstart] = (int)carry; 3688 3689 mr_if_needed(idx, ylen); // idx = ylen 3690 mr_if_needed(kdx, zlen); // kdx = xlen + ylen 3691 li(carry, 0); // carry = 0 3692 3693 Label L_done; 3694 3695 addic_(xstart, xlen, -1); 3696 blt(CCR0, L_done); 3697 3698 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, 3699 carry, product_high, product, idx, kdx, tmp); 3700 3701 Label L_second_loop; 3702 3703 cmpdi(CCR0, kdx, 0); 3704 beq(CCR0, L_second_loop); 3705 3706 Label L_carry; 3707 3708 addic_(kdx, kdx, -1); 3709 beq(CCR0, L_carry); 3710 3711 // Store lower 32 bits of carry. 3712 sldi(tmp, kdx, LogBytesPerInt); 3713 stwx(carry, z, tmp); 3714 srdi(carry, carry, 32); 3715 addi(kdx, kdx, -1); 3716 3717 3718 bind(L_carry); 3719 3720 // Store upper 32 bits of carry. 3721 sldi(tmp, kdx, LogBytesPerInt); 3722 stwx(carry, z, tmp); 3723 3724 // Second and third (nested) loops. 3725 // 3726 // for (int i = xstart-1; i >= 0; i--) { // Second loop 3727 // carry = 0; 3728 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop 3729 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + 3730 // (z[k] & LONG_MASK) + carry; 3731 // z[k] = (int)product; 3732 // carry = product >>> 32; 3733 // } 3734 // z[i] = (int)carry; 3735 // } 3736 // 3737 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx 3738 3739 bind(L_second_loop); 3740 3741 li(carry, 0); // carry = 0; 3742 3743 addic_(xstart, xstart, -1); // i = xstart-1; 3744 blt(CCR0, L_done); 3745 3746 Register zsave = tmp10; 3747 3748 mr(zsave, z); 3749 3750 3751 Label L_last_x; 3752 3753 sldi(tmp, xstart, LogBytesPerInt); 3754 add(z, z, tmp); // z = z + k - j 3755 addi(z, z, 4); 3756 addic_(xstart, xstart, -1); // i = xstart-1; 3757 blt(CCR0, L_last_x); 3758 3759 sldi(tmp, xstart, LogBytesPerInt); 3760 ldx(x_xstart, x, tmp); 3761 #ifdef VM_LITTLE_ENDIAN 3762 rldicl(x_xstart, x_xstart, 32, 0); 3763 #endif 3764 3765 3766 Label L_third_loop_prologue; 3767 3768 bind(L_third_loop_prologue); 3769 3770 Register xsave = tmp11; 3771 Register xlensave = tmp12; 3772 Register ylensave = tmp13; 3773 3774 mr(xsave, x); 3775 mr(xlensave, xstart); 3776 mr(ylensave, ylen); 3777 3778 3779 multiply_128_x_128_loop(x_xstart, y, z, y_idx, ylen, 3780 carry, product_high, product, x, tmp); 3781 3782 mr(z, zsave); 3783 mr(x, xsave); 3784 mr(xlen, xlensave); // This is the decrement of the loop counter! 3785 mr(ylen, ylensave); 3786 3787 addi(tmp3, xlen, 1); 3788 sldi(tmp, tmp3, LogBytesPerInt); 3789 stwx(carry, z, tmp); 3790 addic_(tmp3, tmp3, -1); 3791 blt(CCR0, L_done); 3792 3793 srdi(carry, carry, 32); 3794 sldi(tmp, tmp3, LogBytesPerInt); 3795 stwx(carry, z, tmp); 3796 b(L_second_loop); 3797 3798 // Next infrequent code is moved outside loops. 3799 bind(L_last_x); 3800 3801 lwz(x_xstart, 0, x); 3802 b(L_third_loop_prologue); 3803 3804 bind(L_done); 3805 } // multiply_to_len 3806 3807 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) { 3808 #ifdef ASSERT 3809 Label ok; 3810 if (check_equal) { 3811 beq(CCR0, ok); 3812 } else { 3813 bne(CCR0, ok); 3814 } 3815 stop(msg, id); 3816 bind(ok); 3817 #endif 3818 } 3819 3820 void MacroAssembler::asm_assert_mems_zero(bool check_equal, int size, int mem_offset, 3821 Register mem_base, const char* msg, int id) { 3822 #ifdef ASSERT 3823 switch (size) { 3824 case 4: 3825 lwz(R0, mem_offset, mem_base); 3826 cmpwi(CCR0, R0, 0); 3827 break; 3828 case 8: 3829 ld(R0, mem_offset, mem_base); 3830 cmpdi(CCR0, R0, 0); 3831 break; 3832 default: 3833 ShouldNotReachHere(); 3834 } 3835 asm_assert(check_equal, msg, id); 3836 #endif // ASSERT 3837 } 3838 3839 void MacroAssembler::verify_thread() { 3840 if (VerifyThread) { 3841 unimplemented("'VerifyThread' currently not implemented on PPC"); 3842 } 3843 } 3844 3845 // READ: oop. KILL: R0. Volatile floats perhaps. 3846 void MacroAssembler::verify_oop(Register oop, const char* msg) { 3847 if (!VerifyOops) { 3848 return; 3849 } 3850 3851 address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address(); 3852 const Register tmp = R11; // Will be preserved. 3853 const int nbytes_save = 11*8; // Volatile gprs except R0. 3854 save_volatile_gprs(R1_SP, -nbytes_save); // except R0 3855 3856 if (oop == tmp) mr(R4_ARG2, oop); 3857 save_LR_CR(tmp); // save in old frame 3858 push_frame_reg_args(nbytes_save, tmp); 3859 // load FunctionDescriptor** / entry_address * 3860 load_const_optimized(tmp, fd, R0); 3861 // load FunctionDescriptor* / entry_address 3862 ld(tmp, 0, tmp); 3863 if (oop != tmp) mr_if_needed(R4_ARG2, oop); 3864 load_const_optimized(R3_ARG1, (address)msg, R0); 3865 // Call destination for its side effect. 3866 call_c(tmp); 3867 3868 pop_frame(); 3869 restore_LR_CR(tmp); 3870 restore_volatile_gprs(R1_SP, -nbytes_save); // except R0 3871 } 3872 3873 const char* stop_types[] = { 3874 "stop", 3875 "untested", 3876 "unimplemented", 3877 "shouldnotreachhere" 3878 }; 3879 3880 static void stop_on_request(int tp, const char* msg) { 3881 tty->print("PPC assembly code requires stop: (%s) %s\n", stop_types[tp%/*stop_end*/4], msg); 3882 guarantee(false, err_msg("PPC assembly code requires stop: %s", msg)); 3883 } 3884 3885 // Call a C-function that prints output. 3886 void MacroAssembler::stop(int type, const char* msg, int id) { 3887 #ifndef PRODUCT 3888 block_comment(err_msg("stop: %s %s {", stop_types[type%stop_end], msg)); 3889 #else 3890 block_comment("stop {"); 3891 #endif 3892 3893 // setup arguments 3894 load_const_optimized(R3_ARG1, type); 3895 load_const_optimized(R4_ARG2, (void *)msg, /*tmp=*/R0); 3896 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), R3_ARG1, R4_ARG2); 3897 illtrap(); 3898 emit_int32(id); 3899 block_comment("} stop;"); 3900 } 3901 3902 #ifndef PRODUCT 3903 // Write pattern 0x0101010101010101 in memory region [low-before, high+after]. 3904 // Val, addr are temp registers. 3905 // If low == addr, addr is killed. 3906 // High is preserved. 3907 void MacroAssembler::zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) { 3908 if (!ZapMemory) return; 3909 3910 assert_different_registers(low, val); 3911 3912 BLOCK_COMMENT("zap memory region {"); 3913 load_const_optimized(val, 0x0101010101010101); 3914 int size = before + after; 3915 if (low == high && size < 5 && size > 0) { 3916 int offset = -before*BytesPerWord; 3917 for (int i = 0; i < size; ++i) { 3918 std(val, offset, low); 3919 offset += (1*BytesPerWord); 3920 } 3921 } else { 3922 addi(addr, low, -before*BytesPerWord); 3923 assert_different_registers(high, val); 3924 if (after) addi(high, high, after * BytesPerWord); 3925 Label loop; 3926 bind(loop); 3927 std(val, 0, addr); 3928 addi(addr, addr, 8); 3929 cmpd(CCR6, addr, high); 3930 ble(CCR6, loop); 3931 if (after) addi(high, high, -after * BytesPerWord); // Correct back to old value. 3932 } 3933 BLOCK_COMMENT("} zap memory region"); 3934 } 3935 3936 #endif // !PRODUCT 3937 3938 SkipIfEqualZero::SkipIfEqualZero(MacroAssembler* masm, Register temp, const bool* flag_addr) : _masm(masm), _label() { 3939 int simm16_offset = masm->load_const_optimized(temp, (address)flag_addr, R0, true); 3940 assert(sizeof(bool) == 1, "PowerPC ABI"); 3941 masm->lbz(temp, simm16_offset, temp); 3942 masm->cmpwi(CCR0, temp, 0); 3943 masm->beq(CCR0, _label); 3944 } 3945 3946 SkipIfEqualZero::~SkipIfEqualZero() { 3947 _masm->bind(_label); 3948 }