1 /* 2 * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2019, SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef CPU_PPC_MACROASSEMBLER_PPC_HPP 27 #define CPU_PPC_MACROASSEMBLER_PPC_HPP 28 29 #include "asm/assembler.hpp" 30 #include "oops/accessDecorators.hpp" 31 #include "runtime/rtmLocking.hpp" 32 #include "utilities/macros.hpp" 33 34 // MacroAssembler extends Assembler by a few frequently used macros. 35 36 class ciTypeArray; 37 38 class MacroAssembler: public Assembler { 39 public: 40 MacroAssembler(CodeBuffer* code) : Assembler(code) {} 41 42 // 43 // Optimized instruction emitters 44 // 45 46 inline static int largeoffset_si16_si16_hi(int si31) { return (si31 + (1<<15)) >> 16; } 47 inline static int largeoffset_si16_si16_lo(int si31) { return si31 - (((si31 + (1<<15)) >> 16) << 16); } 48 49 // load d = *[a+si31] 50 // Emits several instructions if the offset is not encodable in one instruction. 51 void ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop); 52 void ld_largeoffset (Register d, int si31, Register a, int emit_filler_nop); 53 inline static bool is_ld_largeoffset(address a); 54 inline static int get_ld_largeoffset_offset(address a); 55 56 inline void round_to(Register r, int modulus); 57 58 // Load/store with type given by parameter. 59 void load_sized_value( Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes, bool is_signed); 60 void store_sized_value(Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes); 61 62 // Move register if destination register and target register are different 63 inline void mr_if_needed(Register rd, Register rs); 64 inline void fmr_if_needed(FloatRegister rd, FloatRegister rs); 65 // This is dedicated for emitting scheduled mach nodes. For better 66 // readability of the ad file I put it here. 67 // Endgroups are not needed if 68 // - the scheduler is off 69 // - the scheduler found that there is a natural group end, in that 70 // case it reduced the size of the instruction used in the test 71 // yielding 'needed'. 72 inline void endgroup_if_needed(bool needed); 73 74 // Memory barriers. 75 inline void membar(int bits); 76 inline void release(); 77 inline void acquire(); 78 inline void fence(); 79 80 // nop padding 81 void align(int modulus, int max = 252, int rem = 0); 82 83 // 84 // Constants, loading constants, TOC support 85 // 86 87 // Address of the global TOC. 88 inline static address global_toc(); 89 // Offset of given address to the global TOC. 90 inline static int offset_to_global_toc(const address addr); 91 92 // Address of TOC of the current method. 93 inline address method_toc(); 94 // Offset of given address to TOC of the current method. 95 inline int offset_to_method_toc(const address addr); 96 97 // Global TOC. 98 void calculate_address_from_global_toc(Register dst, address addr, 99 bool hi16 = true, bool lo16 = true, 100 bool add_relocation = true, bool emit_dummy_addr = false); 101 inline void calculate_address_from_global_toc_hi16only(Register dst, address addr) { 102 calculate_address_from_global_toc(dst, addr, true, false); 103 }; 104 inline void calculate_address_from_global_toc_lo16only(Register dst, address addr) { 105 calculate_address_from_global_toc(dst, addr, false, true); 106 }; 107 108 inline static bool is_calculate_address_from_global_toc_at(address a, address bound); 109 // Returns address of first instruction in sequence. 110 static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr); 111 static address get_address_of_calculate_address_from_global_toc_at(address a, address addr); 112 113 #ifdef _LP64 114 // Patch narrow oop constant. 115 inline static bool is_set_narrow_oop(address a, address bound); 116 // Returns address of first instruction in sequence. 117 static address patch_set_narrow_oop(address a, address bound, narrowOop data); 118 static narrowOop get_narrow_oop(address a, address bound); 119 #endif 120 121 inline static bool is_load_const_at(address a); 122 123 // Emits an oop const to the constant pool, loads the constant, and 124 // sets a relocation info with address current_pc. 125 // Returns true if successful. 126 bool load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc, bool fixed_size = false); 127 128 static bool is_load_const_from_method_toc_at(address a); 129 static int get_offset_of_load_const_from_method_toc_at(address a); 130 131 // Get the 64 bit constant from a `load_const' sequence. 132 static long get_const(address load_const); 133 134 // Patch the 64 bit constant of a `load_const' sequence. This is a 135 // low level procedure. It neither flushes the instruction cache nor 136 // is it atomic. 137 static void patch_const(address load_const, long x); 138 139 // Metadata in code that we have to keep track of. 140 AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index 141 AddressLiteral constant_metadata_address(Metadata* obj); // find_index 142 // Oops used directly in compiled code are stored in the constant pool, 143 // and loaded from there. 144 // Allocate new entry for oop in constant pool. Generate relocation. 145 AddressLiteral allocate_oop_address(jobject obj); 146 // Find oop obj in constant pool. Return relocation with it's index. 147 AddressLiteral constant_oop_address(jobject obj); 148 149 // Find oop in constant pool and emit instructions to load it. 150 // Uses constant_oop_address. 151 inline void set_oop_constant(jobject obj, Register d); 152 // Same as load_address. 153 inline void set_oop (AddressLiteral obj_addr, Register d); 154 155 // Read runtime constant: Issue load if constant not yet established, 156 // else use real constant. 157 virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, 158 Register tmp, 159 int offset); 160 161 // 162 // branch, jump 163 // 164 165 inline void pd_patch_instruction(address branch, address target, const char* file, int line); 166 NOT_PRODUCT(static void pd_print_patched_instruction(address branch);) 167 168 // Conditional far branch for destinations encodable in 24+2 bits. 169 // Same interface as bc, e.g. no inverse boint-field. 170 enum { 171 bc_far_optimize_not = 0, 172 bc_far_optimize_on_relocate = 1 173 }; 174 // optimize: flag for telling the conditional far branch to optimize 175 // itself when relocated. 176 void bc_far(int boint, int biint, Label& dest, int optimize); 177 void bc_far_optimized(int boint, int biint, Label& dest); // 1 or 2 instructions 178 // Relocation of conditional far branches. 179 static bool is_bc_far_at(address instruction_addr); 180 static address get_dest_of_bc_far_at(address instruction_addr); 181 static void set_dest_of_bc_far_at(address instruction_addr, address dest); 182 private: 183 static bool inline is_bc_far_variant1_at(address instruction_addr); 184 static bool inline is_bc_far_variant2_at(address instruction_addr); 185 static bool inline is_bc_far_variant3_at(address instruction_addr); 186 public: 187 188 // Convenience bc_far versions. 189 inline void blt_far(ConditionRegister crx, Label& L, int optimize); 190 inline void bgt_far(ConditionRegister crx, Label& L, int optimize); 191 inline void beq_far(ConditionRegister crx, Label& L, int optimize); 192 inline void bso_far(ConditionRegister crx, Label& L, int optimize); 193 inline void bge_far(ConditionRegister crx, Label& L, int optimize); 194 inline void ble_far(ConditionRegister crx, Label& L, int optimize); 195 inline void bne_far(ConditionRegister crx, Label& L, int optimize); 196 inline void bns_far(ConditionRegister crx, Label& L, int optimize); 197 198 // Emit, identify and patch a NOT mt-safe patchable 64 bit absolute call/jump. 199 private: 200 enum { 201 bxx64_patchable_instruction_count = (2/*load_codecache_const*/ + 3/*5load_const*/ + 1/*mtctr*/ + 1/*bctrl*/), 202 bxx64_patchable_size = bxx64_patchable_instruction_count * BytesPerInstWord, 203 bxx64_patchable_ret_addr_offset = bxx64_patchable_size 204 }; 205 void bxx64_patchable(address target, relocInfo::relocType rt, bool link); 206 static bool is_bxx64_patchable_at( address instruction_addr, bool link); 207 // Does the instruction use a pc-relative encoding of the destination? 208 static bool is_bxx64_patchable_pcrelative_at( address instruction_addr, bool link); 209 static bool is_bxx64_patchable_variant1_at( address instruction_addr, bool link); 210 // Load destination relative to global toc. 211 static bool is_bxx64_patchable_variant1b_at( address instruction_addr, bool link); 212 static bool is_bxx64_patchable_variant2_at( address instruction_addr, bool link); 213 static void set_dest_of_bxx64_patchable_at( address instruction_addr, address target, bool link); 214 static address get_dest_of_bxx64_patchable_at(address instruction_addr, bool link); 215 216 public: 217 // call 218 enum { 219 bl64_patchable_instruction_count = bxx64_patchable_instruction_count, 220 bl64_patchable_size = bxx64_patchable_size, 221 bl64_patchable_ret_addr_offset = bxx64_patchable_ret_addr_offset 222 }; 223 inline void bl64_patchable(address target, relocInfo::relocType rt) { 224 bxx64_patchable(target, rt, /*link=*/true); 225 } 226 inline static bool is_bl64_patchable_at(address instruction_addr) { 227 return is_bxx64_patchable_at(instruction_addr, /*link=*/true); 228 } 229 inline static bool is_bl64_patchable_pcrelative_at(address instruction_addr) { 230 return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/true); 231 } 232 inline static void set_dest_of_bl64_patchable_at(address instruction_addr, address target) { 233 set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/true); 234 } 235 inline static address get_dest_of_bl64_patchable_at(address instruction_addr) { 236 return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/true); 237 } 238 // jump 239 enum { 240 b64_patchable_instruction_count = bxx64_patchable_instruction_count, 241 b64_patchable_size = bxx64_patchable_size, 242 }; 243 inline void b64_patchable(address target, relocInfo::relocType rt) { 244 bxx64_patchable(target, rt, /*link=*/false); 245 } 246 inline static bool is_b64_patchable_at(address instruction_addr) { 247 return is_bxx64_patchable_at(instruction_addr, /*link=*/false); 248 } 249 inline static bool is_b64_patchable_pcrelative_at(address instruction_addr) { 250 return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/false); 251 } 252 inline static void set_dest_of_b64_patchable_at(address instruction_addr, address target) { 253 set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/false); 254 } 255 inline static address get_dest_of_b64_patchable_at(address instruction_addr) { 256 return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/false); 257 } 258 259 // 260 // Support for frame handling 261 // 262 263 // some ABI-related functions 264 void save_nonvolatile_gprs( Register dst_base, int offset); 265 void restore_nonvolatile_gprs(Register src_base, int offset); 266 enum { num_volatile_regs = 11 + 14 }; // GPR + FPR 267 void save_volatile_gprs( Register dst_base, int offset); 268 void restore_volatile_gprs(Register src_base, int offset); 269 void save_LR_CR( Register tmp); // tmp contains LR on return. 270 void restore_LR_CR(Register tmp); 271 272 // Get current PC using bl-next-instruction trick. 273 address get_PC_trash_LR(Register result); 274 275 // Resize current frame either relatively wrt to current SP or absolute. 276 void resize_frame(Register offset, Register tmp); 277 void resize_frame(int offset, Register tmp); 278 void resize_frame_absolute(Register addr, Register tmp1, Register tmp2); 279 280 // Push a frame of size bytes. 281 void push_frame(Register bytes, Register tmp); 282 283 // Push a frame of size `bytes'. No abi space provided. 284 void push_frame(unsigned int bytes, Register tmp); 285 286 // Push a frame of size `bytes' plus abi_reg_args on top. 287 void push_frame_reg_args(unsigned int bytes, Register tmp); 288 289 // Setup up a new C frame with a spill area for non-volatile GPRs and additional 290 // space for local variables 291 void push_frame_reg_args_nonvolatiles(unsigned int bytes, Register tmp); 292 293 // pop current C frame 294 void pop_frame(); 295 296 // 297 // Calls 298 // 299 300 private: 301 address _last_calls_return_pc; 302 303 #if defined(ABI_ELFv2) 304 // Generic version of a call to C function. 305 // Updates and returns _last_calls_return_pc. 306 address branch_to(Register function_entry, bool and_link); 307 #else 308 // Generic version of a call to C function via a function descriptor 309 // with variable support for C calling conventions (TOC, ENV, etc.). 310 // updates and returns _last_calls_return_pc. 311 address branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call, 312 bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee); 313 #endif 314 315 public: 316 317 // Get the pc where the last call will return to. returns _last_calls_return_pc. 318 inline address last_calls_return_pc(); 319 320 #if defined(ABI_ELFv2) 321 // Call a C function via a function descriptor and use full C 322 // calling conventions. Updates and returns _last_calls_return_pc. 323 address call_c(Register function_entry); 324 // For tail calls: only branch, don't link, so callee returns to caller of this function. 325 address call_c_and_return_to_caller(Register function_entry); 326 address call_c(address function_entry, relocInfo::relocType rt); 327 #else 328 // Call a C function via a function descriptor and use full C 329 // calling conventions. Updates and returns _last_calls_return_pc. 330 address call_c(Register function_descriptor); 331 // For tail calls: only branch, don't link, so callee returns to caller of this function. 332 address call_c_and_return_to_caller(Register function_descriptor); 333 address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt); 334 address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt, 335 Register toc); 336 #endif 337 338 protected: 339 340 // It is imperative that all calls into the VM are handled via the 341 // call_VM macros. They make sure that the stack linkage is setup 342 // correctly. call_VM's correspond to ENTRY/ENTRY_X entry points 343 // while call_VM_leaf's correspond to LEAF entry points. 344 // 345 // This is the base routine called by the different versions of 346 // call_VM. The interpreter may customize this version by overriding 347 // it for its purposes (e.g., to save/restore additional registers 348 // when doing a VM call). 349 // 350 // If no last_java_sp is specified (noreg) then SP will be used instead. 351 virtual void call_VM_base( 352 // where an oop-result ends up if any; use noreg otherwise 353 Register oop_result, 354 // to set up last_Java_frame in stubs; use noreg otherwise 355 Register last_java_sp, 356 // the entry point 357 address entry_point, 358 // flag which indicates if exception should be checked 359 bool check_exception = true 360 ); 361 362 // Support for VM calls. This is the base routine called by the 363 // different versions of call_VM_leaf. The interpreter may customize 364 // this version by overriding it for its purposes (e.g., to 365 // save/restore additional registers when doing a VM call). 366 void call_VM_leaf_base(address entry_point); 367 368 public: 369 // Call into the VM. 370 // Passes the thread pointer (in R3_ARG1) as a prepended argument. 371 // Makes sure oop return values are visible to the GC. 372 void call_VM(Register oop_result, address entry_point, bool check_exceptions = true); 373 void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true); 374 void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); 375 void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg3, bool check_exceptions = true); 376 void call_VM_leaf(address entry_point); 377 void call_VM_leaf(address entry_point, Register arg_1); 378 void call_VM_leaf(address entry_point, Register arg_1, Register arg_2); 379 void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); 380 381 // Call a stub function via a function descriptor, but don't save 382 // TOC before call, don't setup TOC and ENV for call, and don't 383 // restore TOC after call. Updates and returns _last_calls_return_pc. 384 inline address call_stub(Register function_entry); 385 inline void call_stub_and_return_to(Register function_entry, Register return_pc); 386 387 // 388 // Java utilities 389 // 390 391 // Read from the polling page, its address is already in a register. 392 inline void load_from_polling_page(Register polling_page_address, int offset = 0); 393 // Check whether instruction is a read access to the polling page 394 // which was emitted by load_from_polling_page(..). 395 static bool is_load_from_polling_page(int instruction, void* ucontext/*may be NULL*/, 396 address* polling_address_ptr = NULL); 397 398 // Support for NULL-checks 399 // 400 // Generates code that causes a NULL OS exception if the content of reg is NULL. 401 // If the accessed location is M[reg + offset] and the offset is known, provide the 402 // offset. No explicit code generation is needed if the offset is within a certain 403 // range (0 <= offset <= page_size). 404 405 // Stack overflow checking 406 void bang_stack_with_offset(int offset); 407 408 // If instruction is a stack bang of the form ld, stdu, or 409 // stdux, return the banged address. Otherwise, return 0. 410 static address get_stack_bang_address(int instruction, void* ucontext); 411 412 // Check for reserved stack access in method being exited. If the reserved 413 // stack area was accessed, protect it again and throw StackOverflowError. 414 void reserved_stack_check(Register return_pc); 415 416 // Atomics 417 // CmpxchgX sets condition register to cmpX(current, compare). 418 // (flag == ne) => (dest_current_value != compare_value), (!swapped) 419 // (flag == eq) => (dest_current_value == compare_value), ( swapped) 420 static inline bool cmpxchgx_hint_acquire_lock() { return true; } 421 // The stxcx will probably not be succeeded by a releasing store. 422 static inline bool cmpxchgx_hint_release_lock() { return false; } 423 static inline bool cmpxchgx_hint_atomic_update() { return false; } 424 425 // Cmpxchg semantics 426 enum { 427 MemBarNone = 0, 428 MemBarRel = 1, 429 MemBarAcq = 2, 430 MemBarFenceAfter = 4 // use powers of 2 431 }; 432 private: 433 // Helper functions for word/sub-word atomics. 434 void atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value, 435 Register addr_base, Register tmp1, Register tmp2, Register tmp3, 436 bool cmpxchgx_hint, bool is_add, int size); 437 void cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value, 438 Register compare_value, Register exchange_value, 439 Register addr_base, Register tmp1, Register tmp2, 440 Label &retry, Label &failed, bool cmpxchgx_hint, int size); 441 void cmpxchg_generic(ConditionRegister flag, 442 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, 443 Register tmp1, Register tmp2, 444 int semantics, bool cmpxchgx_hint, Register int_flag_success, bool contention_hint, bool weak, int size); 445 public: 446 // Temps and addr_base are killed if processor does not support Power 8 instructions. 447 // Result will be sign extended. 448 void getandsetb(Register dest_current_value, Register exchange_value, Register addr_base, 449 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 450 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 1); 451 } 452 // Temps and addr_base are killed if processor does not support Power 8 instructions. 453 // Result will be sign extended. 454 void getandseth(Register dest_current_value, Register exchange_value, Register addr_base, 455 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 456 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 2); 457 } 458 void getandsetw(Register dest_current_value, Register exchange_value, Register addr_base, 459 bool cmpxchgx_hint) { 460 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, noreg, noreg, noreg, cmpxchgx_hint, false, 4); 461 } 462 void getandsetd(Register dest_current_value, Register exchange_value, Register addr_base, 463 bool cmpxchgx_hint); 464 // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed). 465 // Result will be sign extended. 466 void getandaddb(Register dest_current_value, Register inc_value, Register addr_base, 467 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 468 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 1); 469 } 470 // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed). 471 // Result will be sign extended. 472 void getandaddh(Register dest_current_value, Register inc_value, Register addr_base, 473 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 474 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 2); 475 } 476 void getandaddw(Register dest_current_value, Register inc_value, Register addr_base, 477 Register tmp1, bool cmpxchgx_hint) { 478 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, noreg, noreg, cmpxchgx_hint, true, 4); 479 } 480 void getandaddd(Register dest_current_value, Register exchange_value, Register addr_base, 481 Register tmp, bool cmpxchgx_hint); 482 // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions. 483 // compare_value must be at least 32 bit sign extended. Result will be sign extended. 484 void cmpxchgb(ConditionRegister flag, 485 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, 486 Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false, 487 Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) { 488 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, 489 semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 1); 490 } 491 // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions. 492 // compare_value must be at least 32 bit sign extended. Result will be sign extended. 493 void cmpxchgh(ConditionRegister flag, 494 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, 495 Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false, 496 Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) { 497 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, 498 semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 2); 499 } 500 void cmpxchgw(ConditionRegister flag, 501 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, 502 int semantics, bool cmpxchgx_hint = false, 503 Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) { 504 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, noreg, noreg, 505 semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 4); 506 } 507 void cmpxchgd(ConditionRegister flag, 508 Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, 509 Register addr_base, int semantics, bool cmpxchgx_hint = false, 510 Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false, bool weak = false); 511 512 // interface method calling 513 void lookup_interface_method(Register recv_klass, 514 Register intf_klass, 515 RegisterOrConstant itable_index, 516 Register method_result, 517 Register temp_reg, Register temp2_reg, 518 Label& no_such_interface, 519 bool return_method = true); 520 521 // virtual method calling 522 void lookup_virtual_method(Register recv_klass, 523 RegisterOrConstant vtable_index, 524 Register method_result); 525 526 // Test sub_klass against super_klass, with fast and slow paths. 527 528 // The fast path produces a tri-state answer: yes / no / maybe-slow. 529 // One of the three labels can be NULL, meaning take the fall-through. 530 // If super_check_offset is -1, the value is loaded up from super_klass. 531 // No registers are killed, except temp_reg and temp2_reg. 532 // If super_check_offset is not -1, temp2_reg is not used and can be noreg. 533 void check_klass_subtype_fast_path(Register sub_klass, 534 Register super_klass, 535 Register temp1_reg, 536 Register temp2_reg, 537 Label* L_success, 538 Label* L_failure, 539 Label* L_slow_path = NULL, // default fall through 540 RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); 541 542 // The rest of the type check; must be wired to a corresponding fast path. 543 // It does not repeat the fast path logic, so don't use it standalone. 544 // The temp_reg can be noreg, if no temps are available. 545 // It can also be sub_klass or super_klass, meaning it's OK to kill that one. 546 // Updates the sub's secondary super cache as necessary. 547 void check_klass_subtype_slow_path(Register sub_klass, 548 Register super_klass, 549 Register temp1_reg, 550 Register temp2_reg, 551 Label* L_success = NULL, 552 Register result_reg = noreg); 553 554 // Simplified, combined version, good for typical uses. 555 // Falls through on failure. 556 void check_klass_subtype(Register sub_klass, 557 Register super_klass, 558 Register temp1_reg, 559 Register temp2_reg, 560 Label& L_success); 561 562 // Method handle support (JSR 292). 563 void check_method_handle_type(Register mtype_reg, Register mh_reg, Register temp_reg, Label& wrong_method_type); 564 565 RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, Register temp_reg, int extra_slot_offset = 0); 566 567 // Biased locking support 568 // Upon entry,obj_reg must contain the target object, and mark_reg 569 // must contain the target object's header. 570 // Destroys mark_reg if an attempt is made to bias an anonymously 571 // biased lock. In this case a failure will go either to the slow 572 // case or fall through with the notEqual condition code set with 573 // the expectation that the slow case in the runtime will be called. 574 // In the fall-through case where the CAS-based lock is done, 575 // mark_reg is not destroyed. 576 void biased_locking_enter(ConditionRegister cr_reg, Register obj_reg, Register mark_reg, Register temp_reg, 577 Register temp2_reg, Label& done, Label* slow_case = NULL); 578 // Upon entry, the base register of mark_addr must contain the oop. 579 // Destroys temp_reg. 580 // If allow_delay_slot_filling is set to true, the next instruction 581 // emitted after this one will go in an annulled delay slot if the 582 // biased locking exit case failed. 583 void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done); 584 585 // allocation (for C1) 586 void eden_allocate( 587 Register obj, // result: pointer to object after successful allocation 588 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 589 int con_size_in_bytes, // object size in bytes if known at compile time 590 Register t1, // temp register 591 Register t2, // temp register 592 Label& slow_case // continuation point if fast allocation fails 593 ); 594 void tlab_allocate( 595 Register obj, // result: pointer to object after successful allocation 596 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 597 int con_size_in_bytes, // object size in bytes if known at compile time 598 Register t1, // temp register 599 Label& slow_case // continuation point if fast allocation fails 600 ); 601 void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2); 602 603 enum { trampoline_stub_size = 6 * 4 }; 604 address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg); 605 606 void atomic_inc_ptr(Register addr, Register result, int simm16 = 1); 607 void atomic_ori_int(Register addr, Register result, int uimm16); 608 609 #if INCLUDE_RTM_OPT 610 void rtm_counters_update(Register abort_status, Register rtm_counters); 611 void branch_on_random_using_tb(Register tmp, int count, Label& brLabel); 612 void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters, 613 Metadata* method_data); 614 void rtm_profiling(Register abort_status_Reg, Register temp_Reg, 615 RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); 616 void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, 617 Label& retryLabel, Label* checkRetry = NULL); 618 void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel); 619 void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp, 620 Register retry_on_abort_count, 621 RTMLockingCounters* stack_rtm_counters, 622 Metadata* method_data, bool profile_rtm, 623 Label& DONE_LABEL, Label& IsInflated); 624 void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box, 625 Register retry_on_busy_count, Register retry_on_abort_count, 626 RTMLockingCounters* rtm_counters, 627 Metadata* method_data, bool profile_rtm, 628 Label& DONE_LABEL); 629 #endif 630 631 void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, 632 Register tmp1, Register tmp2, Register tmp3, 633 bool try_bias = UseBiasedLocking, 634 RTMLockingCounters* rtm_counters = NULL, 635 RTMLockingCounters* stack_rtm_counters = NULL, 636 Metadata* method_data = NULL, 637 bool use_rtm = false, bool profile_rtm = false); 638 639 void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, 640 Register tmp1, Register tmp2, Register tmp3, 641 bool try_bias = UseBiasedLocking, bool use_rtm = false); 642 643 // Check if safepoint requested and if so branch 644 void safepoint_poll(Label& slow_path, Register temp_reg); 645 646 void resolve_jobject(Register value, Register tmp1, Register tmp2, bool needs_frame); 647 648 // Support for managing the JavaThread pointer (i.e.; the reference to 649 // thread-local information). 650 651 // Support for last Java frame (but use call_VM instead where possible): 652 // access R16_thread->last_Java_sp. 653 void set_last_Java_frame(Register last_java_sp, Register last_Java_pc); 654 void reset_last_Java_frame(void); 655 void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1); 656 657 // Read vm result from thread: oop_result = R16_thread->result; 658 void get_vm_result (Register oop_result); 659 void get_vm_result_2(Register metadata_result); 660 661 static bool needs_explicit_null_check(intptr_t offset); 662 static bool uses_implicit_null_check(void* address); 663 664 // Trap-instruction-based checks. 665 // Range checks can be distinguished from zero checks as they check 32 bit, 666 // zero checks all 64 bits (tw, td). 667 inline void trap_null_check(Register a, trap_to_bits cmp = traptoEqual); 668 static bool is_trap_null_check(int x) { 669 return is_tdi(x, traptoEqual, -1/*any reg*/, 0) || 670 is_tdi(x, traptoGreaterThanUnsigned, -1/*any reg*/, 0); 671 } 672 673 inline void trap_zombie_not_entrant(); 674 static bool is_trap_zombie_not_entrant(int x) { return is_tdi(x, traptoUnconditional, 0/*reg 0*/, 1); } 675 676 inline void trap_should_not_reach_here(); 677 static bool is_trap_should_not_reach_here(int x) { return is_tdi(x, traptoUnconditional, 0/*reg 0*/, 2); } 678 679 inline void trap_ic_miss_check(Register a, Register b); 680 static bool is_trap_ic_miss_check(int x) { 681 return is_td(x, traptoGreaterThanUnsigned | traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/); 682 } 683 684 // Implicit or explicit null check, jumps to static address exception_entry. 685 inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry); 686 inline void null_check(Register a, int offset, Label *Lis_null); // implicit only if Lis_null not provided 687 688 // Access heap oop, handle encoding and GC barriers. 689 // Some GC barriers call C so use needs_frame = true if an extra frame is needed at the current call site. 690 private: 691 inline void access_store_at(BasicType type, DecoratorSet decorators, 692 Register base, RegisterOrConstant ind_or_offs, Register val, 693 Register tmp1, Register tmp2, Register tmp3, bool needs_frame); 694 inline void access_load_at(BasicType type, DecoratorSet decorators, 695 Register base, RegisterOrConstant ind_or_offs, Register dst, 696 Register tmp1, Register tmp2, bool needs_frame, Label *L_handle_null = NULL); 697 698 public: 699 // Specify tmp1 for better code in certain compressed oops cases. Specify Label to bail out on null oop. 700 // tmp1, tmp2 and needs_frame are used with decorators ON_PHANTOM_OOP_REF or ON_WEAK_OOP_REF. 701 inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1, 702 Register tmp1, Register tmp2, bool needs_frame, 703 DecoratorSet decorators = 0, Label *L_handle_null = NULL); 704 705 inline void store_heap_oop(Register d, RegisterOrConstant offs, Register s1, 706 Register tmp1, Register tmp2, Register tmp3, bool needs_frame, 707 DecoratorSet decorators = 0); 708 709 // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong. 710 // src == d allowed. 711 inline Register encode_heap_oop_not_null(Register d, Register src = noreg); 712 inline Register decode_heap_oop_not_null(Register d, Register src = noreg); 713 714 // Null allowed. 715 inline Register encode_heap_oop(Register d, Register src); // Prefer null check in GC barrier! 716 inline void decode_heap_oop(Register d); 717 718 // Load/Store klass oop from klass field. Compress. 719 void load_klass(Register dst, Register src); 720 void store_klass(Register dst_oop, Register klass, Register tmp = R0); 721 void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified. 722 723 void resolve_oop_handle(Register result); 724 void load_mirror_from_const_method(Register mirror, Register const_method); 725 726 static int instr_size_for_decode_klass_not_null(); 727 void decode_klass_not_null(Register dst, Register src = noreg); 728 Register encode_klass_not_null(Register dst, Register src = noreg); 729 730 // SIGTRAP-based range checks for arrays. 731 inline void trap_range_check_l(Register a, Register b); 732 inline void trap_range_check_l(Register a, int si16); 733 static bool is_trap_range_check_l(int x) { 734 return (is_tw (x, traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/) || 735 is_twi(x, traptoLessThanUnsigned, -1/*any reg*/) ); 736 } 737 inline void trap_range_check_le(Register a, int si16); 738 static bool is_trap_range_check_le(int x) { 739 return is_twi(x, traptoEqual | traptoLessThanUnsigned, -1/*any reg*/); 740 } 741 inline void trap_range_check_g(Register a, int si16); 742 static bool is_trap_range_check_g(int x) { 743 return is_twi(x, traptoGreaterThanUnsigned, -1/*any reg*/); 744 } 745 inline void trap_range_check_ge(Register a, Register b); 746 inline void trap_range_check_ge(Register a, int si16); 747 static bool is_trap_range_check_ge(int x) { 748 return (is_tw (x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/, -1/*any reg*/) || 749 is_twi(x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/) ); 750 } 751 static bool is_trap_range_check(int x) { 752 return is_trap_range_check_l(x) || is_trap_range_check_le(x) || 753 is_trap_range_check_g(x) || is_trap_range_check_ge(x); 754 } 755 756 void clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp = R0, int offset = 0); 757 void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0); 758 void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1); 759 760 #ifdef COMPILER2 761 // Intrinsics for CompactStrings 762 // Compress char[] to byte[] by compressing 16 bytes at once. 763 void string_compress_16(Register src, Register dst, Register cnt, 764 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 765 Label& Lfailure); 766 767 // Compress char[] to byte[]. cnt must be positive int. 768 void string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure); 769 770 // Inflate byte[] to char[] by inflating 16 bytes at once. 771 void string_inflate_16(Register src, Register dst, Register cnt, 772 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); 773 774 // Inflate byte[] to char[]. cnt must be positive int. 775 void string_inflate(Register src, Register dst, Register cnt, Register tmp); 776 777 void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, 778 Register tmp1, Register result, int ae); 779 780 void array_equals(bool is_array_equ, Register ary1, Register ary2, 781 Register limit, Register tmp1, Register result, bool is_byte); 782 783 void string_indexof(Register result, Register haystack, Register haycnt, 784 Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, 785 Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae); 786 787 void string_indexof_char(Register result, Register haystack, Register haycnt, 788 Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte); 789 790 void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2); 791 #endif 792 793 // Emitters for BigInteger.multiplyToLen intrinsic. 794 inline void multiply64(Register dest_hi, Register dest_lo, 795 Register x, Register y); 796 void add2_with_carry(Register dest_hi, Register dest_lo, 797 Register src1, Register src2); 798 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 799 Register y, Register y_idx, Register z, 800 Register carry, Register product_high, Register product, 801 Register idx, Register kdx, Register tmp); 802 void multiply_add_128_x_128(Register x_xstart, Register y, Register z, 803 Register yz_idx, Register idx, Register carry, 804 Register product_high, Register product, Register tmp, 805 int offset); 806 void multiply_128_x_128_loop(Register x_xstart, 807 Register y, Register z, 808 Register yz_idx, Register idx, Register carry, 809 Register product_high, Register product, 810 Register carry2, Register tmp); 811 void muladd(Register out, Register in, Register offset, Register len, Register k, 812 Register tmp1, Register tmp2, Register carry); 813 void multiply_to_len(Register x, Register xlen, 814 Register y, Register ylen, 815 Register z, Register zlen, 816 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 817 Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10, 818 Register tmp11, Register tmp12, Register tmp13); 819 820 // Emitters for CRC32 calculation. 821 // A note on invertCRC: 822 // Unfortunately, internal representation of crc differs between CRC32 and CRC32C. 823 // CRC32 holds it's current crc value in the externally visible representation. 824 // CRC32C holds it's current crc value in internal format, ready for updating. 825 // Thus, the crc value must be bit-flipped before updating it in the CRC32 case. 826 // In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()). 827 // The bool invertCRC parameter indicates whether bit-flipping is required before updates. 828 void load_reverse_32(Register dst, Register src); 829 int crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3); 830 void fold_byte_crc32(Register crc, Register val, Register table, Register tmp); 831 void update_byte_crc32(Register crc, Register val, Register table); 832 void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, 833 Register data, bool loopAlignment); 834 void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, 835 Register t0, Register t1, Register t2, Register t3, 836 Register tc0, Register tc1, Register tc2, Register tc3); 837 void kernel_crc32_1word(Register crc, Register buf, Register len, Register table, 838 Register t0, Register t1, Register t2, Register t3, 839 Register tc0, Register tc1, Register tc2, Register tc3, 840 bool invertCRC); 841 void kernel_crc32_vpmsum(Register crc, Register buf, Register len, Register constants, 842 Register t0, Register t1, Register t2, Register t3, Register t4, 843 Register t5, Register t6, bool invertCRC); 844 void kernel_crc32_vpmsum_aligned(Register crc, Register buf, Register len, Register constants, 845 Register t0, Register t1, Register t2, Register t3, Register t4, 846 Register t5, Register t6); 847 // Version which internally decides what to use. 848 void crc32(Register crc, Register buf, Register len, Register t0, Register t1, Register t2, 849 Register t3, Register t4, Register t5, Register t6, Register t7, bool is_crc32c); 850 851 void kernel_crc32_singleByteReg(Register crc, Register val, Register table, 852 bool invertCRC); 853 854 // SHA-2 auxiliary functions and public interfaces 855 private: 856 void sha256_deque(const VectorRegister src, 857 const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3); 858 void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr); 859 void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw); 860 void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws, 861 const int total_ws, const Register k, const VectorRegister* kpws, 862 const int total_kpws); 863 void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1, 864 const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0, 865 const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3, 866 const Register j, const Register k); 867 void sha256_update_sha_state(const VectorRegister a, const VectorRegister b, 868 const VectorRegister c, const VectorRegister d, const VectorRegister e, 869 const VectorRegister f, const VectorRegister g, const VectorRegister h, 870 const Register hptr); 871 872 void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws); 873 void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs); 874 void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw); 875 void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs); 876 void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1, 877 const VectorRegister w2, const VectorRegister w3, 878 const VectorRegister w4, const VectorRegister w5, 879 const VectorRegister w6, const VectorRegister w7, 880 const VectorRegister kpw0, const VectorRegister kpw1, const Register j, 881 const VectorRegister vRb, const Register k); 882 883 public: 884 void sha256(bool multi_block); 885 void sha512(bool multi_block); 886 887 888 // 889 // Debugging 890 // 891 892 // assert on cr0 893 void asm_assert(bool check_equal, const char* msg, int id); 894 void asm_assert_eq(const char* msg, int id) { asm_assert(true, msg, id); } 895 void asm_assert_ne(const char* msg, int id) { asm_assert(false, msg, id); } 896 897 private: 898 void asm_assert_mems_zero(bool check_equal, int size, int mem_offset, Register mem_base, 899 const char* msg, int id); 900 901 public: 902 903 void asm_assert_mem8_is_zero(int mem_offset, Register mem_base, const char* msg, int id) { 904 asm_assert_mems_zero(true, 8, mem_offset, mem_base, msg, id); 905 } 906 void asm_assert_mem8_isnot_zero(int mem_offset, Register mem_base, const char* msg, int id) { 907 asm_assert_mems_zero(false, 8, mem_offset, mem_base, msg, id); 908 } 909 910 // Verify R16_thread contents. 911 void verify_thread(); 912 913 // Emit code to verify that reg contains a valid oop if +VerifyOops is set. 914 void verify_oop(Register reg, const char* s = "broken oop"); 915 void verify_oop_addr(RegisterOrConstant offs, Register base, const char* s = "contains broken oop"); 916 917 // TODO: verify method and klass metadata (compare against vptr?) 918 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} 919 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {} 920 921 // Convenience method returning function entry. For the ELFv1 case 922 // creates function descriptor at the current address and returs 923 // the pointer to it. For the ELFv2 case returns the current address. 924 inline address function_entry(); 925 926 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 927 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 928 929 private: 930 931 enum { 932 stop_stop = 0, 933 stop_untested = 1, 934 stop_unimplemented = 2, 935 stop_shouldnotreachhere = 3, 936 stop_end = 4 937 }; 938 void stop(int type, const char* msg, int id); 939 940 public: 941 // Prints msg, dumps registers and stops execution. 942 void stop (const char* msg = "", int id = 0) { stop(stop_stop, msg, id); } 943 void untested (const char* msg = "", int id = 0) { stop(stop_untested, msg, id); } 944 void unimplemented(const char* msg = "", int id = 0) { stop(stop_unimplemented, msg, id); } 945 void should_not_reach_here() { stop(stop_shouldnotreachhere, "", -1); } 946 947 void zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) PRODUCT_RETURN; 948 }; 949 950 // class SkipIfEqualZero: 951 // 952 // Instantiating this class will result in assembly code being output that will 953 // jump around any code emitted between the creation of the instance and it's 954 // automatic destruction at the end of a scope block, depending on the value of 955 // the flag passed to the constructor, which will be checked at run-time. 956 class SkipIfEqualZero : public StackObj { 957 private: 958 MacroAssembler* _masm; 959 Label _label; 960 961 public: 962 // 'Temp' is a temp register that this object can use (and trash). 963 explicit SkipIfEqualZero(MacroAssembler*, Register temp, const bool* flag_addr); 964 static void skip_to_label_if_equal_zero(MacroAssembler*, Register temp, 965 const bool* flag_addr, Label& label); 966 ~SkipIfEqualZero(); 967 }; 968 969 #endif // CPU_PPC_MACROASSEMBLER_PPC_HPP