1 /* 2 * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2017, SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef CPU_PPC_VM_MACROASSEMBLER_PPC_HPP 27 #define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP 28 29 #include "asm/assembler.hpp" 30 #include "runtime/rtmLocking.hpp" 31 #include "utilities/macros.hpp" 32 33 // MacroAssembler extends Assembler by a few frequently used macros. 34 35 class ciTypeArray; 36 37 class MacroAssembler: public Assembler { 38 public: 39 MacroAssembler(CodeBuffer* code) : Assembler(code) {} 40 41 // 42 // Optimized instruction emitters 43 // 44 45 inline static int largeoffset_si16_si16_hi(int si31) { return (si31 + (1<<15)) >> 16; } 46 inline static int largeoffset_si16_si16_lo(int si31) { return si31 - (((si31 + (1<<15)) >> 16) << 16); } 47 48 // load d = *[a+si31] 49 // Emits several instructions if the offset is not encodable in one instruction. 50 void ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop); 51 void ld_largeoffset (Register d, int si31, Register a, int emit_filler_nop); 52 inline static bool is_ld_largeoffset(address a); 53 inline static int get_ld_largeoffset_offset(address a); 54 55 inline void round_to(Register r, int modulus); 56 57 // Load/store with type given by parameter. 58 void load_sized_value( Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes, bool is_signed); 59 void store_sized_value(Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes); 60 61 // Move register if destination register and target register are different 62 inline void mr_if_needed(Register rd, Register rs); 63 inline void fmr_if_needed(FloatRegister rd, FloatRegister rs); 64 // This is dedicated for emitting scheduled mach nodes. For better 65 // readability of the ad file I put it here. 66 // Endgroups are not needed if 67 // - the scheduler is off 68 // - the scheduler found that there is a natural group end, in that 69 // case it reduced the size of the instruction used in the test 70 // yielding 'needed'. 71 inline void endgroup_if_needed(bool needed); 72 73 // Memory barriers. 74 inline void membar(int bits); 75 inline void release(); 76 inline void acquire(); 77 inline void fence(); 78 79 // nop padding 80 void align(int modulus, int max = 252, int rem = 0); 81 82 // 83 // Constants, loading constants, TOC support 84 // 85 86 // Address of the global TOC. 87 inline static address global_toc(); 88 // Offset of given address to the global TOC. 89 inline static int offset_to_global_toc(const address addr); 90 91 // Address of TOC of the current method. 92 inline address method_toc(); 93 // Offset of given address to TOC of the current method. 94 inline int offset_to_method_toc(const address addr); 95 96 // Global TOC. 97 void calculate_address_from_global_toc(Register dst, address addr, 98 bool hi16 = true, bool lo16 = true, 99 bool add_relocation = true, bool emit_dummy_addr = false); 100 inline void calculate_address_from_global_toc_hi16only(Register dst, address addr) { 101 calculate_address_from_global_toc(dst, addr, true, false); 102 }; 103 inline void calculate_address_from_global_toc_lo16only(Register dst, address addr) { 104 calculate_address_from_global_toc(dst, addr, false, true); 105 }; 106 107 inline static bool is_calculate_address_from_global_toc_at(address a, address bound); 108 // Returns address of first instruction in sequence. 109 static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr); 110 static address get_address_of_calculate_address_from_global_toc_at(address a, address addr); 111 112 #ifdef _LP64 113 // Patch narrow oop constant. 114 inline static bool is_set_narrow_oop(address a, address bound); 115 // Returns address of first instruction in sequence. 116 static address patch_set_narrow_oop(address a, address bound, narrowOop data); 117 static narrowOop get_narrow_oop(address a, address bound); 118 #endif 119 120 inline static bool is_load_const_at(address a); 121 122 // Emits an oop const to the constant pool, loads the constant, and 123 // sets a relocation info with address current_pc. 124 // Returns true if successful. 125 bool load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc, bool fixed_size = false); 126 127 static bool is_load_const_from_method_toc_at(address a); 128 static int get_offset_of_load_const_from_method_toc_at(address a); 129 130 // Get the 64 bit constant from a `load_const' sequence. 131 static long get_const(address load_const); 132 133 // Patch the 64 bit constant of a `load_const' sequence. This is a 134 // low level procedure. It neither flushes the instruction cache nor 135 // is it atomic. 136 static void patch_const(address load_const, long x); 137 138 // Metadata in code that we have to keep track of. 139 AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index 140 AddressLiteral constant_metadata_address(Metadata* obj); // find_index 141 // Oops used directly in compiled code are stored in the constant pool, 142 // and loaded from there. 143 // Allocate new entry for oop in constant pool. Generate relocation. 144 AddressLiteral allocate_oop_address(jobject obj); 145 // Find oop obj in constant pool. Return relocation with it's index. 146 AddressLiteral constant_oop_address(jobject obj); 147 148 // Find oop in constant pool and emit instructions to load it. 149 // Uses constant_oop_address. 150 inline void set_oop_constant(jobject obj, Register d); 151 // Same as load_address. 152 inline void set_oop (AddressLiteral obj_addr, Register d); 153 154 // Read runtime constant: Issue load if constant not yet established, 155 // else use real constant. 156 virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, 157 Register tmp, 158 int offset); 159 160 // 161 // branch, jump 162 // 163 164 inline void pd_patch_instruction(address branch, address target); 165 NOT_PRODUCT(static void pd_print_patched_instruction(address branch);) 166 167 // Conditional far branch for destinations encodable in 24+2 bits. 168 // Same interface as bc, e.g. no inverse boint-field. 169 enum { 170 bc_far_optimize_not = 0, 171 bc_far_optimize_on_relocate = 1 172 }; 173 // optimize: flag for telling the conditional far branch to optimize 174 // itself when relocated. 175 void bc_far(int boint, int biint, Label& dest, int optimize); 176 void bc_far_optimized(int boint, int biint, Label& dest); // 1 or 2 instructions 177 // Relocation of conditional far branches. 178 static bool is_bc_far_at(address instruction_addr); 179 static address get_dest_of_bc_far_at(address instruction_addr); 180 static void set_dest_of_bc_far_at(address instruction_addr, address dest); 181 private: 182 static bool inline is_bc_far_variant1_at(address instruction_addr); 183 static bool inline is_bc_far_variant2_at(address instruction_addr); 184 static bool inline is_bc_far_variant3_at(address instruction_addr); 185 public: 186 187 // Convenience bc_far versions. 188 inline void blt_far(ConditionRegister crx, Label& L, int optimize); 189 inline void bgt_far(ConditionRegister crx, Label& L, int optimize); 190 inline void beq_far(ConditionRegister crx, Label& L, int optimize); 191 inline void bso_far(ConditionRegister crx, Label& L, int optimize); 192 inline void bge_far(ConditionRegister crx, Label& L, int optimize); 193 inline void ble_far(ConditionRegister crx, Label& L, int optimize); 194 inline void bne_far(ConditionRegister crx, Label& L, int optimize); 195 inline void bns_far(ConditionRegister crx, Label& L, int optimize); 196 197 // Emit, identify and patch a NOT mt-safe patchable 64 bit absolute call/jump. 198 private: 199 enum { 200 bxx64_patchable_instruction_count = (2/*load_codecache_const*/ + 3/*5load_const*/ + 1/*mtctr*/ + 1/*bctrl*/), 201 bxx64_patchable_size = bxx64_patchable_instruction_count * BytesPerInstWord, 202 bxx64_patchable_ret_addr_offset = bxx64_patchable_size 203 }; 204 void bxx64_patchable(address target, relocInfo::relocType rt, bool link); 205 static bool is_bxx64_patchable_at( address instruction_addr, bool link); 206 // Does the instruction use a pc-relative encoding of the destination? 207 static bool is_bxx64_patchable_pcrelative_at( address instruction_addr, bool link); 208 static bool is_bxx64_patchable_variant1_at( address instruction_addr, bool link); 209 // Load destination relative to global toc. 210 static bool is_bxx64_patchable_variant1b_at( address instruction_addr, bool link); 211 static bool is_bxx64_patchable_variant2_at( address instruction_addr, bool link); 212 static void set_dest_of_bxx64_patchable_at( address instruction_addr, address target, bool link); 213 static address get_dest_of_bxx64_patchable_at(address instruction_addr, bool link); 214 215 public: 216 // call 217 enum { 218 bl64_patchable_instruction_count = bxx64_patchable_instruction_count, 219 bl64_patchable_size = bxx64_patchable_size, 220 bl64_patchable_ret_addr_offset = bxx64_patchable_ret_addr_offset 221 }; 222 inline void bl64_patchable(address target, relocInfo::relocType rt) { 223 bxx64_patchable(target, rt, /*link=*/true); 224 } 225 inline static bool is_bl64_patchable_at(address instruction_addr) { 226 return is_bxx64_patchable_at(instruction_addr, /*link=*/true); 227 } 228 inline static bool is_bl64_patchable_pcrelative_at(address instruction_addr) { 229 return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/true); 230 } 231 inline static void set_dest_of_bl64_patchable_at(address instruction_addr, address target) { 232 set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/true); 233 } 234 inline static address get_dest_of_bl64_patchable_at(address instruction_addr) { 235 return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/true); 236 } 237 // jump 238 enum { 239 b64_patchable_instruction_count = bxx64_patchable_instruction_count, 240 b64_patchable_size = bxx64_patchable_size, 241 }; 242 inline void b64_patchable(address target, relocInfo::relocType rt) { 243 bxx64_patchable(target, rt, /*link=*/false); 244 } 245 inline static bool is_b64_patchable_at(address instruction_addr) { 246 return is_bxx64_patchable_at(instruction_addr, /*link=*/false); 247 } 248 inline static bool is_b64_patchable_pcrelative_at(address instruction_addr) { 249 return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/false); 250 } 251 inline static void set_dest_of_b64_patchable_at(address instruction_addr, address target) { 252 set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/false); 253 } 254 inline static address get_dest_of_b64_patchable_at(address instruction_addr) { 255 return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/false); 256 } 257 258 // 259 // Support for frame handling 260 // 261 262 // some ABI-related functions 263 void save_nonvolatile_gprs( Register dst_base, int offset); 264 void restore_nonvolatile_gprs(Register src_base, int offset); 265 enum { num_volatile_regs = 11 + 14 }; // GPR + FPR 266 void save_volatile_gprs( Register dst_base, int offset); 267 void restore_volatile_gprs(Register src_base, int offset); 268 void save_LR_CR( Register tmp); // tmp contains LR on return. 269 void restore_LR_CR(Register tmp); 270 271 // Get current PC using bl-next-instruction trick. 272 address get_PC_trash_LR(Register result); 273 274 // Resize current frame either relatively wrt to current SP or absolute. 275 void resize_frame(Register offset, Register tmp); 276 void resize_frame(int offset, Register tmp); 277 void resize_frame_absolute(Register addr, Register tmp1, Register tmp2); 278 279 // Push a frame of size bytes. 280 void push_frame(Register bytes, Register tmp); 281 282 // Push a frame of size `bytes'. No abi space provided. 283 void push_frame(unsigned int bytes, Register tmp); 284 285 // Push a frame of size `bytes' plus abi_reg_args on top. 286 void push_frame_reg_args(unsigned int bytes, Register tmp); 287 288 // Setup up a new C frame with a spill area for non-volatile GPRs and additional 289 // space for local variables 290 void push_frame_reg_args_nonvolatiles(unsigned int bytes, Register tmp); 291 292 // pop current C frame 293 void pop_frame(); 294 295 // 296 // Calls 297 // 298 299 private: 300 address _last_calls_return_pc; 301 302 #if defined(ABI_ELFv2) 303 // Generic version of a call to C function. 304 // Updates and returns _last_calls_return_pc. 305 address branch_to(Register function_entry, bool and_link); 306 #else 307 // Generic version of a call to C function via a function descriptor 308 // with variable support for C calling conventions (TOC, ENV, etc.). 309 // updates and returns _last_calls_return_pc. 310 address branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call, 311 bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee); 312 #endif 313 314 public: 315 316 // Get the pc where the last call will return to. returns _last_calls_return_pc. 317 inline address last_calls_return_pc(); 318 319 #if defined(ABI_ELFv2) 320 // Call a C function via a function descriptor and use full C 321 // calling conventions. Updates and returns _last_calls_return_pc. 322 address call_c(Register function_entry); 323 // For tail calls: only branch, don't link, so callee returns to caller of this function. 324 address call_c_and_return_to_caller(Register function_entry); 325 address call_c(address function_entry, relocInfo::relocType rt); 326 #else 327 // Call a C function via a function descriptor and use full C 328 // calling conventions. Updates and returns _last_calls_return_pc. 329 address call_c(Register function_descriptor); 330 // For tail calls: only branch, don't link, so callee returns to caller of this function. 331 address call_c_and_return_to_caller(Register function_descriptor); 332 address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt); 333 address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt, 334 Register toc); 335 #endif 336 337 protected: 338 339 // It is imperative that all calls into the VM are handled via the 340 // call_VM macros. They make sure that the stack linkage is setup 341 // correctly. call_VM's correspond to ENTRY/ENTRY_X entry points 342 // while call_VM_leaf's correspond to LEAF entry points. 343 // 344 // This is the base routine called by the different versions of 345 // call_VM. The interpreter may customize this version by overriding 346 // it for its purposes (e.g., to save/restore additional registers 347 // when doing a VM call). 348 // 349 // If no last_java_sp is specified (noreg) then SP will be used instead. 350 virtual void call_VM_base( 351 // where an oop-result ends up if any; use noreg otherwise 352 Register oop_result, 353 // to set up last_Java_frame in stubs; use noreg otherwise 354 Register last_java_sp, 355 // the entry point 356 address entry_point, 357 // flag which indicates if exception should be checked 358 bool check_exception = true 359 ); 360 361 // Support for VM calls. This is the base routine called by the 362 // different versions of call_VM_leaf. The interpreter may customize 363 // this version by overriding it for its purposes (e.g., to 364 // save/restore additional registers when doing a VM call). 365 void call_VM_leaf_base(address entry_point); 366 367 public: 368 // Call into the VM. 369 // Passes the thread pointer (in R3_ARG1) as a prepended argument. 370 // Makes sure oop return values are visible to the GC. 371 void call_VM(Register oop_result, address entry_point, bool check_exceptions = true); 372 void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true); 373 void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); 374 void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg3, bool check_exceptions = true); 375 void call_VM_leaf(address entry_point); 376 void call_VM_leaf(address entry_point, Register arg_1); 377 void call_VM_leaf(address entry_point, Register arg_1, Register arg_2); 378 void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); 379 380 // Call a stub function via a function descriptor, but don't save 381 // TOC before call, don't setup TOC and ENV for call, and don't 382 // restore TOC after call. Updates and returns _last_calls_return_pc. 383 inline address call_stub(Register function_entry); 384 inline void call_stub_and_return_to(Register function_entry, Register return_pc); 385 386 // 387 // Java utilities 388 // 389 390 // Read from the polling page, its address is already in a register. 391 inline void load_from_polling_page(Register polling_page_address, int offset = 0); 392 // Check whether instruction is a read access to the polling page 393 // which was emitted by load_from_polling_page(..). 394 static bool is_load_from_polling_page(int instruction, void* ucontext/*may be NULL*/, 395 address* polling_address_ptr = NULL); 396 397 // Check whether instruction is a write access to the memory 398 // serialization page realized by one of the instructions stw, stwu, 399 // stwx, or stwux. 400 static bool is_memory_serialization(int instruction, JavaThread* thread, void* ucontext); 401 402 // Support for NULL-checks 403 // 404 // Generates code that causes a NULL OS exception if the content of reg is NULL. 405 // If the accessed location is M[reg + offset] and the offset is known, provide the 406 // offset. No explicit code generation is needed if the offset is within a certain 407 // range (0 <= offset <= page_size). 408 409 // Stack overflow checking 410 void bang_stack_with_offset(int offset); 411 412 // If instruction is a stack bang of the form ld, stdu, or 413 // stdux, return the banged address. Otherwise, return 0. 414 static address get_stack_bang_address(int instruction, void* ucontext); 415 416 // Check for reserved stack access in method being exited. If the reserved 417 // stack area was accessed, protect it again and throw StackOverflowError. 418 void reserved_stack_check(Register return_pc); 419 420 // Atomics 421 // CmpxchgX sets condition register to cmpX(current, compare). 422 // (flag == ne) => (dest_current_value != compare_value), (!swapped) 423 // (flag == eq) => (dest_current_value == compare_value), ( swapped) 424 static inline bool cmpxchgx_hint_acquire_lock() { return true; } 425 // The stxcx will probably not be succeeded by a releasing store. 426 static inline bool cmpxchgx_hint_release_lock() { return false; } 427 static inline bool cmpxchgx_hint_atomic_update() { return false; } 428 429 // Cmpxchg semantics 430 enum { 431 MemBarNone = 0, 432 MemBarRel = 1, 433 MemBarAcq = 2, 434 MemBarFenceAfter = 4 // use powers of 2 435 }; 436 private: 437 // Helper functions for word/sub-word atomics. 438 void atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value, 439 Register addr_base, Register tmp1, Register tmp2, Register tmp3, 440 bool cmpxchgx_hint, bool is_add, int size); 441 void cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value, 442 Register compare_value, Register exchange_value, 443 Register addr_base, Register tmp1, Register tmp2, 444 Label &retry, Label &failed, bool cmpxchgx_hint, int size); 445 void cmpxchg_generic(ConditionRegister flag, 446 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, 447 Register tmp1, Register tmp2, 448 int semantics, bool cmpxchgx_hint, Register int_flag_success, bool contention_hint, bool weak, int size); 449 public: 450 // Temps and addr_base are killed if processor does not support Power 8 instructions. 451 // Result will be sign extended. 452 void getandsetb(Register dest_current_value, Register exchange_value, Register addr_base, 453 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 454 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 1); 455 } 456 // Temps and addr_base are killed if processor does not support Power 8 instructions. 457 // Result will be sign extended. 458 void getandseth(Register dest_current_value, Register exchange_value, Register addr_base, 459 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 460 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 2); 461 } 462 void getandsetw(Register dest_current_value, Register exchange_value, Register addr_base, 463 bool cmpxchgx_hint) { 464 atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, noreg, noreg, noreg, cmpxchgx_hint, false, 4); 465 } 466 void getandsetd(Register dest_current_value, Register exchange_value, Register addr_base, 467 bool cmpxchgx_hint); 468 // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed). 469 // Result will be sign extended. 470 void getandaddb(Register dest_current_value, Register inc_value, Register addr_base, 471 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 472 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 1); 473 } 474 // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed). 475 // Result will be sign extended. 476 void getandaddh(Register dest_current_value, Register inc_value, Register addr_base, 477 Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) { 478 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 2); 479 } 480 void getandaddw(Register dest_current_value, Register inc_value, Register addr_base, 481 Register tmp1, bool cmpxchgx_hint) { 482 atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, noreg, noreg, cmpxchgx_hint, true, 4); 483 } 484 void getandaddd(Register dest_current_value, Register exchange_value, Register addr_base, 485 Register tmp, bool cmpxchgx_hint); 486 // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions. 487 // compare_value must be at least 32 bit sign extended. Result will be sign extended. 488 void cmpxchgb(ConditionRegister flag, 489 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, 490 Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false, 491 Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) { 492 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, 493 semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 1); 494 } 495 // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions. 496 // compare_value must be at least 32 bit sign extended. Result will be sign extended. 497 void cmpxchgh(ConditionRegister flag, 498 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, 499 Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false, 500 Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) { 501 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2, 502 semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 2); 503 } 504 void cmpxchgw(ConditionRegister flag, 505 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, 506 int semantics, bool cmpxchgx_hint = false, 507 Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) { 508 cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, noreg, noreg, 509 semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 4); 510 } 511 void cmpxchgd(ConditionRegister flag, 512 Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, 513 Register addr_base, int semantics, bool cmpxchgx_hint = false, 514 Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false, bool weak = false); 515 516 // interface method calling 517 void lookup_interface_method(Register recv_klass, 518 Register intf_klass, 519 RegisterOrConstant itable_index, 520 Register method_result, 521 Register temp_reg, Register temp2_reg, 522 Label& no_such_interface); 523 524 // virtual method calling 525 void lookup_virtual_method(Register recv_klass, 526 RegisterOrConstant vtable_index, 527 Register method_result); 528 529 // Test sub_klass against super_klass, with fast and slow paths. 530 531 // The fast path produces a tri-state answer: yes / no / maybe-slow. 532 // One of the three labels can be NULL, meaning take the fall-through. 533 // If super_check_offset is -1, the value is loaded up from super_klass. 534 // No registers are killed, except temp_reg and temp2_reg. 535 // If super_check_offset is not -1, temp2_reg is not used and can be noreg. 536 void check_klass_subtype_fast_path(Register sub_klass, 537 Register super_klass, 538 Register temp1_reg, 539 Register temp2_reg, 540 Label* L_success, 541 Label* L_failure, 542 Label* L_slow_path = NULL, // default fall through 543 RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); 544 545 // The rest of the type check; must be wired to a corresponding fast path. 546 // It does not repeat the fast path logic, so don't use it standalone. 547 // The temp_reg can be noreg, if no temps are available. 548 // It can also be sub_klass or super_klass, meaning it's OK to kill that one. 549 // Updates the sub's secondary super cache as necessary. 550 void check_klass_subtype_slow_path(Register sub_klass, 551 Register super_klass, 552 Register temp1_reg, 553 Register temp2_reg, 554 Label* L_success = NULL, 555 Register result_reg = noreg); 556 557 // Simplified, combined version, good for typical uses. 558 // Falls through on failure. 559 void check_klass_subtype(Register sub_klass, 560 Register super_klass, 561 Register temp1_reg, 562 Register temp2_reg, 563 Label& L_success); 564 565 // Method handle support (JSR 292). 566 void check_method_handle_type(Register mtype_reg, Register mh_reg, Register temp_reg, Label& wrong_method_type); 567 568 RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, Register temp_reg, int extra_slot_offset = 0); 569 570 // Biased locking support 571 // Upon entry,obj_reg must contain the target object, and mark_reg 572 // must contain the target object's header. 573 // Destroys mark_reg if an attempt is made to bias an anonymously 574 // biased lock. In this case a failure will go either to the slow 575 // case or fall through with the notEqual condition code set with 576 // the expectation that the slow case in the runtime will be called. 577 // In the fall-through case where the CAS-based lock is done, 578 // mark_reg is not destroyed. 579 void biased_locking_enter(ConditionRegister cr_reg, Register obj_reg, Register mark_reg, Register temp_reg, 580 Register temp2_reg, Label& done, Label* slow_case = NULL); 581 // Upon entry, the base register of mark_addr must contain the oop. 582 // Destroys temp_reg. 583 // If allow_delay_slot_filling is set to true, the next instruction 584 // emitted after this one will go in an annulled delay slot if the 585 // biased locking exit case failed. 586 void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done); 587 588 // allocation (for C1) 589 void eden_allocate( 590 Register obj, // result: pointer to object after successful allocation 591 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 592 int con_size_in_bytes, // object size in bytes if known at compile time 593 Register t1, // temp register 594 Register t2, // temp register 595 Label& slow_case // continuation point if fast allocation fails 596 ); 597 void tlab_allocate( 598 Register obj, // result: pointer to object after successful allocation 599 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 600 int con_size_in_bytes, // object size in bytes if known at compile time 601 Register t1, // temp register 602 Label& slow_case // continuation point if fast allocation fails 603 ); 604 void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); 605 void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2); 606 607 enum { trampoline_stub_size = 6 * 4 }; 608 address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg); 609 610 void atomic_inc_ptr(Register addr, Register result, int simm16 = 1); 611 void atomic_ori_int(Register addr, Register result, int uimm16); 612 613 #if INCLUDE_RTM_OPT 614 void rtm_counters_update(Register abort_status, Register rtm_counters); 615 void branch_on_random_using_tb(Register tmp, int count, Label& brLabel); 616 void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters, 617 Metadata* method_data); 618 void rtm_profiling(Register abort_status_Reg, Register temp_Reg, 619 RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); 620 void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, 621 Label& retryLabel, Label* checkRetry = NULL); 622 void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel); 623 void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp, 624 Register retry_on_abort_count, 625 RTMLockingCounters* stack_rtm_counters, 626 Metadata* method_data, bool profile_rtm, 627 Label& DONE_LABEL, Label& IsInflated); 628 void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box, 629 Register retry_on_busy_count, Register retry_on_abort_count, 630 RTMLockingCounters* rtm_counters, 631 Metadata* method_data, bool profile_rtm, 632 Label& DONE_LABEL); 633 #endif 634 635 void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, 636 Register tmp1, Register tmp2, Register tmp3, 637 bool try_bias = UseBiasedLocking, 638 RTMLockingCounters* rtm_counters = NULL, 639 RTMLockingCounters* stack_rtm_counters = NULL, 640 Metadata* method_data = NULL, 641 bool use_rtm = false, bool profile_rtm = false); 642 643 void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, 644 Register tmp1, Register tmp2, Register tmp3, 645 bool try_bias = UseBiasedLocking, bool use_rtm = false); 646 647 // Support for serializing memory accesses between threads 648 void serialize_memory(Register thread, Register tmp1, Register tmp2); 649 650 void safepoint_poll(Label& slow_path, Register temp_reg); 651 652 // GC barrier support. 653 void card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp); 654 void card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj); 655 656 void resolve_jobject(Register value, Register tmp1, Register tmp2, bool needs_frame); 657 658 #if INCLUDE_ALL_GCS 659 // General G1 pre-barrier generator. 660 void g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val, 661 Register Rtmp1, Register Rtmp2, bool needs_frame = false); 662 // General G1 post-barrier generator 663 void g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1, 664 Register Rtmp2, Register Rtmp3, Label *filtered_ext = NULL); 665 #endif 666 667 // Support for managing the JavaThread pointer (i.e.; the reference to 668 // thread-local information). 669 670 // Support for last Java frame (but use call_VM instead where possible): 671 // access R16_thread->last_Java_sp. 672 void set_last_Java_frame(Register last_java_sp, Register last_Java_pc); 673 void reset_last_Java_frame(void); 674 void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1); 675 676 // Read vm result from thread: oop_result = R16_thread->result; 677 void get_vm_result (Register oop_result); 678 void get_vm_result_2(Register metadata_result); 679 680 static bool needs_explicit_null_check(intptr_t offset); 681 682 // Trap-instruction-based checks. 683 // Range checks can be distinguished from zero checks as they check 32 bit, 684 // zero checks all 64 bits (tw, td). 685 inline void trap_null_check(Register a, trap_to_bits cmp = traptoEqual); 686 static bool is_trap_null_check(int x) { 687 return is_tdi(x, traptoEqual, -1/*any reg*/, 0) || 688 is_tdi(x, traptoGreaterThanUnsigned, -1/*any reg*/, 0); 689 } 690 691 inline void trap_zombie_not_entrant(); 692 static bool is_trap_zombie_not_entrant(int x) { return is_tdi(x, traptoUnconditional, 0/*reg 0*/, 1); } 693 694 inline void trap_should_not_reach_here(); 695 static bool is_trap_should_not_reach_here(int x) { return is_tdi(x, traptoUnconditional, 0/*reg 0*/, 2); } 696 697 inline void trap_ic_miss_check(Register a, Register b); 698 static bool is_trap_ic_miss_check(int x) { 699 return is_td(x, traptoGreaterThanUnsigned | traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/); 700 } 701 702 // Implicit or explicit null check, jumps to static address exception_entry. 703 inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry); 704 inline void null_check(Register a, int offset, Label *Lis_null); // implicit only if Lis_null not provided 705 706 // Load heap oop and decompress. Loaded oop may not be null. 707 // Specify tmp to save one cycle. 708 inline void load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1 = noreg, 709 Register tmp = noreg); 710 // Store heap oop and decompress. Decompressed oop may not be null. 711 // Specify tmp register if d should not be changed. 712 inline void store_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1, 713 Register tmp = noreg); 714 715 // Null allowed. 716 inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg, Label *is_null = NULL); 717 718 // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong. 719 // src == d allowed. 720 inline Register encode_heap_oop_not_null(Register d, Register src = noreg); 721 inline Register decode_heap_oop_not_null(Register d, Register src = noreg); 722 723 // Null allowed. 724 inline Register encode_heap_oop(Register d, Register src); // Prefer null check in GC barrier! 725 inline void decode_heap_oop(Register d); 726 727 // Load/Store klass oop from klass field. Compress. 728 void load_klass(Register dst, Register src); 729 void store_klass(Register dst_oop, Register klass, Register tmp = R0); 730 void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified. 731 732 void resolve_oop_handle(Register result); 733 void load_mirror_from_const_method(Register mirror, Register const_method); 734 735 static int instr_size_for_decode_klass_not_null(); 736 void decode_klass_not_null(Register dst, Register src = noreg); 737 Register encode_klass_not_null(Register dst, Register src = noreg); 738 739 // SIGTRAP-based range checks for arrays. 740 inline void trap_range_check_l(Register a, Register b); 741 inline void trap_range_check_l(Register a, int si16); 742 static bool is_trap_range_check_l(int x) { 743 return (is_tw (x, traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/) || 744 is_twi(x, traptoLessThanUnsigned, -1/*any reg*/) ); 745 } 746 inline void trap_range_check_le(Register a, int si16); 747 static bool is_trap_range_check_le(int x) { 748 return is_twi(x, traptoEqual | traptoLessThanUnsigned, -1/*any reg*/); 749 } 750 inline void trap_range_check_g(Register a, int si16); 751 static bool is_trap_range_check_g(int x) { 752 return is_twi(x, traptoGreaterThanUnsigned, -1/*any reg*/); 753 } 754 inline void trap_range_check_ge(Register a, Register b); 755 inline void trap_range_check_ge(Register a, int si16); 756 static bool is_trap_range_check_ge(int x) { 757 return (is_tw (x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/, -1/*any reg*/) || 758 is_twi(x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/) ); 759 } 760 static bool is_trap_range_check(int x) { 761 return is_trap_range_check_l(x) || is_trap_range_check_le(x) || 762 is_trap_range_check_g(x) || is_trap_range_check_ge(x); 763 } 764 765 void clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp = R0, int offset = 0); 766 void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0); 767 void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1); 768 769 #ifdef COMPILER2 770 // Intrinsics for CompactStrings 771 // Compress char[] to byte[] by compressing 16 bytes at once. 772 void string_compress_16(Register src, Register dst, Register cnt, 773 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 774 Label& Lfailure); 775 776 // Compress char[] to byte[]. cnt must be positive int. 777 void string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure); 778 779 // Inflate byte[] to char[] by inflating 16 bytes at once. 780 void string_inflate_16(Register src, Register dst, Register cnt, 781 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); 782 783 // Inflate byte[] to char[]. cnt must be positive int. 784 void string_inflate(Register src, Register dst, Register cnt, Register tmp); 785 786 void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, 787 Register tmp1, Register result, int ae); 788 789 void array_equals(bool is_array_equ, Register ary1, Register ary2, 790 Register limit, Register tmp1, Register result, bool is_byte); 791 792 void string_indexof(Register result, Register haystack, Register haycnt, 793 Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, 794 Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae); 795 796 void string_indexof_char(Register result, Register haystack, Register haycnt, 797 Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte); 798 799 void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2); 800 #endif 801 802 // Emitters for BigInteger.multiplyToLen intrinsic. 803 inline void multiply64(Register dest_hi, Register dest_lo, 804 Register x, Register y); 805 void add2_with_carry(Register dest_hi, Register dest_lo, 806 Register src1, Register src2); 807 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 808 Register y, Register y_idx, Register z, 809 Register carry, Register product_high, Register product, 810 Register idx, Register kdx, Register tmp); 811 void multiply_add_128_x_128(Register x_xstart, Register y, Register z, 812 Register yz_idx, Register idx, Register carry, 813 Register product_high, Register product, Register tmp, 814 int offset); 815 void multiply_128_x_128_loop(Register x_xstart, 816 Register y, Register z, 817 Register yz_idx, Register idx, Register carry, 818 Register product_high, Register product, 819 Register carry2, Register tmp); 820 void muladd(Register out, Register in, Register offset, Register len, Register k, 821 Register tmp1, Register tmp2, Register carry); 822 void multiply_to_len(Register x, Register xlen, 823 Register y, Register ylen, 824 Register z, Register zlen, 825 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 826 Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10, 827 Register tmp11, Register tmp12, Register tmp13); 828 829 // Emitters for CRC32 calculation. 830 // A note on invertCRC: 831 // Unfortunately, internal representation of crc differs between CRC32 and CRC32C. 832 // CRC32 holds it's current crc value in the externally visible representation. 833 // CRC32C holds it's current crc value in internal format, ready for updating. 834 // Thus, the crc value must be bit-flipped before updating it in the CRC32 case. 835 // In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()). 836 // The bool invertCRC parameter indicates whether bit-flipping is required before updates. 837 void load_reverse_32(Register dst, Register src); 838 int crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3); 839 void fold_byte_crc32(Register crc, Register val, Register table, Register tmp); 840 void fold_8bit_crc32(Register crc, Register table, Register tmp); 841 void update_byte_crc32(Register crc, Register val, Register table); 842 void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, 843 Register data, bool loopAlignment); 844 void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, 845 Register t0, Register t1, Register t2, Register t3, 846 Register tc0, Register tc1, Register tc2, Register tc3); 847 void kernel_crc32_2word(Register crc, Register buf, Register len, Register table, 848 Register t0, Register t1, Register t2, Register t3, 849 Register tc0, Register tc1, Register tc2, Register tc3, 850 bool invertCRC); 851 void kernel_crc32_1word(Register crc, Register buf, Register len, Register table, 852 Register t0, Register t1, Register t2, Register t3, 853 Register tc0, Register tc1, Register tc2, Register tc3, 854 bool invertCRC); 855 void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, 856 Register t0, Register t1, Register t2, Register t3, 857 bool invertCRC); 858 void kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table, 859 Register constants, Register barretConstants, 860 Register t0, Register t1, Register t2, Register t3, Register t4, 861 bool invertCRC); 862 void kernel_crc32_1word_aligned(Register crc, Register buf, Register len, 863 Register constants, Register barretConstants, 864 Register t0, Register t1, Register t2); 865 866 void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, 867 bool invertCRC); 868 void kernel_crc32_singleByteReg(Register crc, Register val, Register table, 869 bool invertCRC); 870 871 // SHA-2 auxiliary functions and public interfaces 872 private: 873 void sha256_deque(const VectorRegister src, 874 const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3); 875 void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr); 876 void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw); 877 void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws, 878 const int total_ws, const Register k, const VectorRegister* kpws, 879 const int total_kpws); 880 void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1, 881 const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0, 882 const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3, 883 const Register j, const Register k); 884 void sha256_update_sha_state(const VectorRegister a, const VectorRegister b, 885 const VectorRegister c, const VectorRegister d, const VectorRegister e, 886 const VectorRegister f, const VectorRegister g, const VectorRegister h, 887 const Register hptr); 888 889 void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws); 890 void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs); 891 void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw); 892 void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs); 893 void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1, 894 const VectorRegister w2, const VectorRegister w3, 895 const VectorRegister w4, const VectorRegister w5, 896 const VectorRegister w6, const VectorRegister w7, 897 const VectorRegister kpw0, const VectorRegister kpw1, const Register j, 898 const VectorRegister vRb, const Register k); 899 900 public: 901 void sha256(bool multi_block); 902 void sha512(bool multi_block); 903 904 905 // 906 // Debugging 907 // 908 909 // assert on cr0 910 void asm_assert(bool check_equal, const char* msg, int id); 911 void asm_assert_eq(const char* msg, int id) { asm_assert(true, msg, id); } 912 void asm_assert_ne(const char* msg, int id) { asm_assert(false, msg, id); } 913 914 private: 915 void asm_assert_mems_zero(bool check_equal, int size, int mem_offset, Register mem_base, 916 const char* msg, int id); 917 918 public: 919 920 void asm_assert_mem8_is_zero(int mem_offset, Register mem_base, const char* msg, int id) { 921 asm_assert_mems_zero(true, 8, mem_offset, mem_base, msg, id); 922 } 923 void asm_assert_mem8_isnot_zero(int mem_offset, Register mem_base, const char* msg, int id) { 924 asm_assert_mems_zero(false, 8, mem_offset, mem_base, msg, id); 925 } 926 927 // Verify R16_thread contents. 928 void verify_thread(); 929 930 // Emit code to verify that reg contains a valid oop if +VerifyOops is set. 931 void verify_oop(Register reg, const char* s = "broken oop"); 932 void verify_oop_addr(RegisterOrConstant offs, Register base, const char* s = "contains broken oop"); 933 934 // TODO: verify method and klass metadata (compare against vptr?) 935 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} 936 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {} 937 938 // Convenience method returning function entry. For the ELFv1 case 939 // creates function descriptor at the current address and returs 940 // the pointer to it. For the ELFv2 case returns the current address. 941 inline address function_entry(); 942 943 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 944 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 945 946 private: 947 948 enum { 949 stop_stop = 0, 950 stop_untested = 1, 951 stop_unimplemented = 2, 952 stop_shouldnotreachhere = 3, 953 stop_end = 4 954 }; 955 void stop(int type, const char* msg, int id); 956 957 public: 958 // Prints msg, dumps registers and stops execution. 959 void stop (const char* msg = "", int id = 0) { stop(stop_stop, msg, id); } 960 void untested (const char* msg = "", int id = 0) { stop(stop_untested, msg, id); } 961 void unimplemented(const char* msg = "", int id = 0) { stop(stop_unimplemented, msg, id); } 962 void should_not_reach_here() { stop(stop_shouldnotreachhere, "", -1); } 963 964 void zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) PRODUCT_RETURN; 965 }; 966 967 // class SkipIfEqualZero: 968 // 969 // Instantiating this class will result in assembly code being output that will 970 // jump around any code emitted between the creation of the instance and it's 971 // automatic destruction at the end of a scope block, depending on the value of 972 // the flag passed to the constructor, which will be checked at run-time. 973 class SkipIfEqualZero : public StackObj { 974 private: 975 MacroAssembler* _masm; 976 Label _label; 977 978 public: 979 // 'Temp' is a temp register that this object can use (and trash). 980 explicit SkipIfEqualZero(MacroAssembler*, Register temp, const bool* flag_addr); 981 ~SkipIfEqualZero(); 982 }; 983 984 #endif // CPU_PPC_VM_MACROASSEMBLER_PPC_HPP