// // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License version 2 only, as // published by the Free Software Foundation. // // This code is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // version 2 for more details (a copy is included in the LICENSE file that // accompanied this code). // // You should have received a copy of the GNU General Public License version // 2 along with this work; if not, write to the Free Software Foundation, // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. // // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA // or visit www.oracle.com if you need additional information or have any // questions. // // // X86 Architecture Description File //----------REGISTER DEFINITION BLOCK------------------------------------------ // This information is used by the matcher and the register allocator to // describe individual registers and classes of registers within the target // archtecture. register %{ //----------Architecture Description Register Definitions---------------------- // General Registers // "reg_def" name ( register save type, C convention save type, // ideal register type, encoding ); // Register Save Types: // // NS = No-Save: The register allocator assumes that these registers // can be used without saving upon entry to the method, & // that they do not need to be saved at call sites. // // SOC = Save-On-Call: The register allocator assumes that these registers // can be used without saving upon entry to the method, // but that they must be saved at call sites. // // SOE = Save-On-Entry: The register allocator assumes that these registers // must be saved before using them upon entry to the // method, but they do not need to be saved at call // sites. // // AS = Always-Save: The register allocator assumes that these registers // must be saved before using them upon entry to the // method, & that they must be saved at call sites. // // Ideal Register Type is used to determine how to save & restore a // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. // // The encoding number is the actual bit-pattern placed into the opcodes. // General Registers // Previously set EBX, ESI, and EDI as save-on-entry for java code // Turn off SOE in java-code due to frequent use of uncommon-traps. // Now that allocator is better, turn on ESI and EDI as SOE registers. reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); // Float registers. We treat TOS/FPR0 special. It is invisible to the // allocator, and only shows up in the encodings. reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); // Ok so here's the trick FPR1 is really st(0) except in the midst // of emission of assembly for a machnode. During the emission the fpu stack // is pushed making FPR1 == st(1) temporarily. However at any safepoint // the stack will not have this element so FPR1 == st(0) from the // oopMap viewpoint. This same weirdness with numbering causes // instruction encoding to have to play games with the register // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation // where it does flt->flt moves to see an example // reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); // // Empty fill registers, which are never used, but supply alignment to xmm regs // reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); // Specify priority of register selection within phases of register // allocation. Highest priority is first. A useful heuristic is to // give registers a low priority when they are required by machine // instructions, like EAX and EDX. Registers which are used as // pairs must fall on an even boundary (witness the FPR#L's in this list). // For the Intel integer registers, the equivalent Long pairs are // EDX:EAX, EBX:ECX, and EDI:EBP. alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, FPR6L, FPR6H, FPR7L, FPR7H, FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); //----------Architecture Description Register Classes-------------------------- // Several register classes are automatically defined based upon information in // this architecture description. // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) // // Class for no registers (empty set). reg_class no_reg(); // Class for all registers reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); // Class for all registers (excluding EBP) reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); // Dynamic register class that selects at runtime between register classes // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); // Class for general registers reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); // Class for general registers (excluding EBP). // This register class can be used for implicit null checks on win95. // It is also safe for use by tailjumps (we don't want to allocate in ebp). // Used also if the PreserveFramePointer flag is true. reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); // Dynamic register class that selects between int_reg and int_reg_no_ebp. reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); // Class of "X" registers reg_class int_x_reg(EBX, ECX, EDX, EAX); // Class of registers that can appear in an address with no offset. // EBP and ESP require an extra instruction byte for zero offset. // Used in fast-unlock reg_class p_reg(EDX, EDI, ESI, EBX); // Class for general registers excluding ECX reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); // Class for general registers excluding ECX (and EBP) reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); // Class for general registers excluding EAX reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); // Class for general registers excluding EAX and EBX. reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); // Class for general registers excluding EAX and EBX (and EBP) reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); // Class of EAX (for multiply and divide operations) reg_class eax_reg(EAX); // Class of EBX (for atomic add) reg_class ebx_reg(EBX); // Class of ECX (for shift and JCXZ operations and cmpLTMask) reg_class ecx_reg(ECX); // Class of EDX (for multiply and divide operations) reg_class edx_reg(EDX); // Class of EDI (for synchronization) reg_class edi_reg(EDI); // Class of ESI (for synchronization) reg_class esi_reg(ESI); // Singleton class for stack pointer reg_class sp_reg(ESP); // Singleton class for instruction pointer // reg_class ip_reg(EIP); // Class of integer register pairs reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); // Class of integer register pairs (excluding EBP and EDI); reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); // Dynamic register class that selects between long_reg and long_reg_no_ebp. reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); // Class of integer register pairs that aligns with calling convention reg_class eadx_reg( EAX,EDX ); reg_class ebcx_reg( ECX,EBX ); // Not AX or DX, used in divides reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); // Not AX or DX (and neither EBP), used in divides reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); // Floating point registers. Notice FPR0 is not a choice. // FPR0 is not ever allocated; we use clever encodings to fake // a 2-address instructions out of Intels FP stack. reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); reg_class fp_flt_reg0( FPR1L ); reg_class fp_dbl_reg0( FPR1L,FPR1H ); reg_class fp_dbl_reg1( FPR2L,FPR2H ); reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); %} //----------SOURCE BLOCK------------------------------------------------------- // This is a block of C++ code which provides values, functions, and // definitions necessary in the rest of the architecture description source_hpp %{ // Must be visible to the DFA in dfa_x86_32.cpp extern bool is_operand_hi32_zero(Node* n); %} source %{ #define RELOC_IMM32 Assembler::imm_operand #define RELOC_DISP32 Assembler::disp32_operand #define __ _masm. // How to find the high register of a Long pair, given the low register #define HIGH_FROM_LOW(x) ((x)+2) // These masks are used to provide 128-bit aligned bitmasks to the XMM // instructions, to allow sign-masking or sign-bit flipping. They allow // fast versions of NegF/NegD and AbsF/AbsD. // Note: 'double' and 'long long' have 32-bits alignment on x86. static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { // Use the expression (adr)&(~0xF) to provide 128-bits aligned address // of 128-bits operands for SSE instructions. jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); // Store the value to a 128-bits operand. operand[0] = lo; operand[1] = hi; return operand; } // Buffer for 128-bits masks used by SSE instructions. static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) // Static initialization during VM startup. static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); // Offset hacking within calls. static int pre_call_resets_size() { int size = 0; Compile* C = Compile::current(); if (C->in_24_bit_fp_mode()) { size += 6; // fldcw } if (C->max_vector_size() > 16) { size += 3; // vzeroupper } return size; } // !!!!! Special hack to get all type of calls to specify the byte offset // from the start of the call to the point where the return address // will point. int MachCallStaticJavaNode::ret_addr_offset() { return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points } int MachCallDynamicJavaNode::ret_addr_offset() { return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points } static int sizeof_FFree_Float_Stack_All = -1; int MachCallRuntimeNode::ret_addr_offset() { assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); } // Indicate if the safepoint node needs the polling page as an input. // Since x86 does have absolute addressing, it doesn't. bool SafePointNode::needs_polling_address_input() { return false; } // // Compute padding required for nodes which need alignment // // The address of the call instruction needs to be 4-byte aligned to // ensure that it does not span a cache line so that it can be patched. int CallStaticJavaDirectNode::compute_padding(int current_offset) const { current_offset += pre_call_resets_size(); // skip fldcw, if any current_offset += 1; // skip call opcode byte return round_to(current_offset, alignment_required()) - current_offset; } // The address of the call instruction needs to be 4-byte aligned to // ensure that it does not span a cache line so that it can be patched. int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { current_offset += pre_call_resets_size(); // skip fldcw, if any current_offset += 5; // skip MOV instruction current_offset += 1; // skip call opcode byte return round_to(current_offset, alignment_required()) - current_offset; } // EMIT_RM() void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); cbuf.insts()->emit_int8(c); } // EMIT_CC() void emit_cc(CodeBuffer &cbuf, int f1, int f2) { unsigned char c = (unsigned char)( f1 | f2 ); cbuf.insts()->emit_int8(c); } // EMIT_OPCODE() void emit_opcode(CodeBuffer &cbuf, int code) { cbuf.insts()->emit_int8((unsigned char) code); } // EMIT_OPCODE() w/ relocation information void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { cbuf.relocate(cbuf.insts_mark() + offset, reloc); emit_opcode(cbuf, code); } // EMIT_D8() void emit_d8(CodeBuffer &cbuf, int d8) { cbuf.insts()->emit_int8((unsigned char) d8); } // EMIT_D16() void emit_d16(CodeBuffer &cbuf, int d16) { cbuf.insts()->emit_int16(d16); } // EMIT_D32() void emit_d32(CodeBuffer &cbuf, int d32) { cbuf.insts()->emit_int32(d32); } // emit 32 bit value and construct relocation entry from relocInfo::relocType void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, int format) { cbuf.relocate(cbuf.insts_mark(), reloc, format); cbuf.insts()->emit_int32(d32); } // emit 32 bit value and construct relocation entry from RelocationHolder void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, int format) { #ifdef ASSERT if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); } #endif cbuf.relocate(cbuf.insts_mark(), rspec, format); cbuf.insts()->emit_int32(d32); } // Access stack slot for load or store void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) if( -128 <= disp && disp <= 127 ) { emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte emit_d8 (cbuf, disp); // Displacement // R/M byte } else { emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte emit_d32(cbuf, disp); // Displacement // R/M byte } } // rRegI ereg, memory mem) %{ // emit_reg_mem void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { // There is no index & no scale, use form without SIB byte if ((index == 0x4) && (scale == 0) && (base != ESP_enc)) { // If no displacement, mode is 0x0; unless base is [EBP] if ( (displace == 0) && (base != EBP_enc) ) { emit_rm(cbuf, 0x0, reg_encoding, base); } else { // If 8-bit displacement, mode 0x1 if ((displace >= -128) && (displace <= 127) && (disp_reloc == relocInfo::none) ) { emit_rm(cbuf, 0x1, reg_encoding, base); emit_d8(cbuf, displace); } else { // If 32-bit displacement if (base == -1) { // Special flag for absolute address emit_rm(cbuf, 0x0, reg_encoding, 0x5); // (manual lies; no SIB needed here) if ( disp_reloc != relocInfo::none ) { emit_d32_reloc(cbuf, displace, disp_reloc, 1); } else { emit_d32 (cbuf, displace); } } else { // Normal base + offset emit_rm(cbuf, 0x2, reg_encoding, base); if ( disp_reloc != relocInfo::none ) { emit_d32_reloc(cbuf, displace, disp_reloc, 1); } else { emit_d32 (cbuf, displace); } } } } } else { // Else, encode with the SIB byte // If no displacement, mode is 0x0; unless base is [EBP] if (displace == 0 && (base != EBP_enc)) { // If no displacement emit_rm(cbuf, 0x0, reg_encoding, 0x4); emit_rm(cbuf, scale, index, base); } else { // If 8-bit displacement, mode 0x1 if ((displace >= -128) && (displace <= 127) && (disp_reloc == relocInfo::none) ) { emit_rm(cbuf, 0x1, reg_encoding, 0x4); emit_rm(cbuf, scale, index, base); emit_d8(cbuf, displace); } else { // If 32-bit displacement if (base == 0x04 ) { emit_rm(cbuf, 0x2, reg_encoding, 0x4); emit_rm(cbuf, scale, index, 0x04); } else { emit_rm(cbuf, 0x2, reg_encoding, 0x4); emit_rm(cbuf, scale, index, base); } if ( disp_reloc != relocInfo::none ) { emit_d32_reloc(cbuf, displace, disp_reloc, 1); } else { emit_d32 (cbuf, displace); } } } } } void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { if( dst_encoding == src_encoding ) { // reg-reg copy, use an empty encoding } else { emit_opcode( cbuf, 0x8B ); emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); } } void emit_cmpfp_fixup(MacroAssembler& _masm) { Label exit; __ jccb(Assembler::noParity, exit); __ pushf(); // // comiss/ucomiss instructions set ZF,PF,CF flags and // zero OF,AF,SF for NaN values. // Fixup flags by zeroing ZF,PF so that compare of NaN // values returns 'less than' result (CF is set). // Leave the rest of flags unchanged. // // 7 6 5 4 3 2 1 0 // |S|Z|r|A|r|P|r|C| (r - reserved bit) // 0 0 1 0 1 0 1 1 (0x2B) // __ andl(Address(rsp, 0), 0xffffff2b); __ popf(); __ bind(exit); } void emit_cmpfp3(MacroAssembler& _masm, Register dst) { Label done; __ movl(dst, -1); __ jcc(Assembler::parity, done); __ jcc(Assembler::below, done); __ setb(Assembler::notEqual, dst); __ movzbl(dst, dst); __ bind(done); } //============================================================================= const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; int Compile::ConstantTable::calculate_table_base_offset() const { return 0; // absolute addressing, no offset } bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ShouldNotReachHere(); } void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { // Empty encoding } uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { return 0; } #ifndef PRODUCT void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { st->print("# MachConstantBaseNode (empty encoding)"); } #endif //============================================================================= #ifndef PRODUCT void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { Compile* C = ra_->C; int framesize = C->frame_size_in_bytes(); int bangsize = C->bang_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove wordSize for return addr which is already pushed. framesize -= wordSize; if (C->need_stack_bang(bangsize)) { framesize -= wordSize; st->print("# stack bang (%d bytes)", bangsize); st->print("\n\t"); st->print("PUSH EBP\t# Save EBP"); if (PreserveFramePointer) { st->print("\n\t"); st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); } if (framesize) { st->print("\n\t"); st->print("SUB ESP, #%d\t# Create frame",framesize); } } else { st->print("SUB ESP, #%d\t# Create frame",framesize); st->print("\n\t"); framesize -= wordSize; st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); if (PreserveFramePointer) { st->print("\n\t"); st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); if (framesize > 0) { st->print("\n\t"); st->print("ADD EBP, #%d", framesize); } } } if (VerifyStackAtCalls) { st->print("\n\t"); framesize -= wordSize; st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); } if( C->in_24_bit_fp_mode() ) { st->print("\n\t"); st->print("FLDCW \t# load 24 bit fpu control word"); } if (UseSSE >= 2 && VerifyFPU) { st->print("\n\t"); st->print("# verify FPU stack (must be clean on entry)"); } #ifdef ASSERT if (VerifyStackAtCalls) { st->print("\n\t"); st->print("# stack alignment check"); } #endif st->cr(); } #endif void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { Compile* C = ra_->C; MacroAssembler _masm(&cbuf); int framesize = C->frame_size_in_bytes(); int bangsize = C->bang_size_in_bytes(); __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); C->set_frame_complete(cbuf.insts_size()); if (C->has_mach_constant_base_node()) { // NOTE: We set the table base offset here because users might be // emitted before MachConstantBaseNode. Compile::ConstantTable& constant_table = C->constant_table(); constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); } } uint MachPrologNode::size(PhaseRegAlloc *ra_) const { return MachNode::size(ra_); // too many variables; just compute it the hard way } int MachPrologNode::reloc() const { return 0; // a large enough number } //============================================================================= #ifndef PRODUCT void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { Compile *C = ra_->C; int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; if (C->max_vector_size() > 16) { st->print("VZEROUPPER"); st->cr(); st->print("\t"); } if (C->in_24_bit_fp_mode()) { st->print("FLDCW standard control word"); st->cr(); st->print("\t"); } if (framesize) { st->print("ADD ESP,%d\t# Destroy frame",framesize); st->cr(); st->print("\t"); } st->print_cr("POPL EBP"); st->print("\t"); if (do_polling() && C->is_method_compilation()) { st->print("TEST PollPage,EAX\t! Poll Safepoint"); st->cr(); st->print("\t"); } } #endif void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { Compile *C = ra_->C; MacroAssembler _masm(&cbuf); if (C->max_vector_size() > 16) { // Clear upper bits of YMM registers when current compiled code uses // wide vectors to avoid AVX <-> SSE transition penalty during call. _masm.vzeroupper(); } // If method set FPU control word, restore to standard control word if (C->in_24_bit_fp_mode()) { _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); } int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here if (framesize >= 128) { emit_opcode(cbuf, 0x81); // add SP, #framesize emit_rm(cbuf, 0x3, 0x00, ESP_enc); emit_d32(cbuf, framesize); } else if (framesize) { emit_opcode(cbuf, 0x83); // add SP, #framesize emit_rm(cbuf, 0x3, 0x00, ESP_enc); emit_d8(cbuf, framesize); } emit_opcode(cbuf, 0x58 | EBP_enc); if (StackReservedPages > 0 && C->has_reserved_stack_access()) { __ reserved_stack_check(); } if (do_polling() && C->is_method_compilation()) { cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); emit_opcode(cbuf,0x85); emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX emit_d32(cbuf, (intptr_t)os::get_polling_page()); } } uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { Compile *C = ra_->C; // If method set FPU control word, restore to standard control word int size = C->in_24_bit_fp_mode() ? 6 : 0; if (C->max_vector_size() > 16) size += 3; // vzeroupper if (do_polling() && C->is_method_compilation()) size += 6; int framesize = C->frame_size_in_bytes(); assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); // Remove two words for return addr and rbp, framesize -= 2*wordSize; size++; // popl rbp, if (framesize >= 128) { size += 6; } else { size += framesize ? 3 : 0; } size += 64; // added to support ReservedStackAccess return size; } int MachEpilogNode::reloc() const { return 0; // a large enough number } const Pipeline * MachEpilogNode::pipeline() const { return MachNode::pipeline_class(); } int MachEpilogNode::safepoint_offset() const { return 0; } //============================================================================= enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; static enum RC rc_class( OptoReg::Name reg ) { if( !OptoReg::is_valid(reg) ) return rc_bad; if (OptoReg::is_stack(reg)) return rc_stack; VMReg r = OptoReg::as_VMReg(reg); if (r->is_Register()) return rc_int; if (r->is_FloatRegister()) { assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); return rc_float; } assert(r->is_XMMRegister(), "must be"); return rc_xmm; } static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, int opcode, const char *op_str, int size, outputStream* st ) { if( cbuf ) { emit_opcode (*cbuf, opcode ); encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); #ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); if( opcode == 0x8B || opcode == 0x89 ) { // MOV if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); } else { // FLD, FST, PUSH, POP st->print("%s [ESP + #%d]",op_str,offset); } #endif } int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); return size+3+offset_size; } // Helper for XMM registers. Extra opcode bits, limited syntax. static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { int in_size_in_bits = Assembler::EVEX_32bit; int evex_encoding = 0; if (reg_lo+1 == reg_hi) { in_size_in_bits = Assembler::EVEX_64bit; evex_encoding = Assembler::VEX_W; } if (cbuf) { MacroAssembler _masm(cbuf); if (reg_lo+1 == reg_hi) { // double move? if (is_load) { __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); } else { __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); } } else { if (is_load) { __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); } else { __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); } } #ifndef PRODUCT } else if (!do_size) { if (size != 0) st->print("\n\t"); if (reg_lo+1 == reg_hi) { // double move? if (is_load) st->print("%s %s,[ESP + #%d]", UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", Matcher::regName[reg_lo], offset); else st->print("MOVSD [ESP + #%d],%s", offset, Matcher::regName[reg_lo]); } else { if (is_load) st->print("MOVSS %s,[ESP + #%d]", Matcher::regName[reg_lo], offset); else st->print("MOVSS [ESP + #%d],%s", offset, Matcher::regName[reg_lo]); } #endif } bool is_single_byte = false; if ((UseAVX > 2) && (offset != 0)) { is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); } int offset_size = 0; if (UseAVX > 2 ) { offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); } else { offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); } size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. return size+5+offset_size; } static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, int size, outputStream* st ) { if (cbuf) { MacroAssembler _masm(cbuf); if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); } else { __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); } #ifndef PRODUCT } else if (!do_size) { if (size != 0) st->print("\n\t"); if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } else { st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } } else { if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } else { st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } } #endif } // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. int sz = (UseAVX > 2) ? 6 : 4; if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; return size + sz; } static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, int size, outputStream* st ) { // 32-bit if (cbuf) { MacroAssembler _masm(cbuf); __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo])); #ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); #endif } return (UseAVX> 2) ? 6 : 4; } static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, int size, outputStream* st ) { // 32-bit if (cbuf) { MacroAssembler _masm(cbuf); __ movdl(as_Register(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); #ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); #endif } return (UseAVX> 2) ? 6 : 4; } static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { if( cbuf ) { emit_opcode(*cbuf, 0x8B ); emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); #ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); #endif } return size+2; } static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, int offset, int size, outputStream* st ) { if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there if( cbuf ) { emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); #ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); st->print("FLD %s",Matcher::regName[src_lo]); #endif } size += 2; } int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; const char *op_str; int op; if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; op = 0xDD; } else { // 32-bit store op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; op = 0xD9; assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); } return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); } // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, uint ireg, outputStream* st); static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, int stack_offset, int reg, uint ireg, outputStream* st); static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, int dst_offset, uint ireg, outputStream* st) { int calc_size = 0; int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); switch (ireg) { case Op_VecS: calc_size = 3+src_offset_size + 3+dst_offset_size; break; case Op_VecD: { calc_size = 3+src_offset_size + 3+dst_offset_size; int tmp_src_offset = src_offset + 4; int tmp_dst_offset = dst_offset + 4; src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); calc_size += 3+src_offset_size + 3+dst_offset_size; break; } case Op_VecX: case Op_VecY: case Op_VecZ: calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; break; default: ShouldNotReachHere(); } if (cbuf) { MacroAssembler _masm(cbuf); int offset = __ offset(); switch (ireg) { case Op_VecS: __ pushl(Address(rsp, src_offset)); __ popl (Address(rsp, dst_offset)); break; case Op_VecD: __ pushl(Address(rsp, src_offset)); __ popl (Address(rsp, dst_offset)); __ pushl(Address(rsp, src_offset+4)); __ popl (Address(rsp, dst_offset+4)); break; case Op_VecX: __ movdqu(Address(rsp, -16), xmm0); __ movdqu(xmm0, Address(rsp, src_offset)); __ movdqu(Address(rsp, dst_offset), xmm0); __ movdqu(xmm0, Address(rsp, -16)); break; case Op_VecY: __ vmovdqu(Address(rsp, -32), xmm0); __ vmovdqu(xmm0, Address(rsp, src_offset)); __ vmovdqu(Address(rsp, dst_offset), xmm0); __ vmovdqu(xmm0, Address(rsp, -32)); break; case Op_VecZ: __ evmovdqul(Address(rsp, -64), xmm0, 2); __ evmovdqul(xmm0, Address(rsp, src_offset), 2); __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); __ evmovdqul(xmm0, Address(rsp, -64), 2); break; default: ShouldNotReachHere(); } int size = __ offset() - offset; assert(size == calc_size, "incorrect size calculation"); return size; #ifndef PRODUCT } else if (!do_size) { switch (ireg) { case Op_VecS: st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" "popl [rsp + #%d]", src_offset, dst_offset); break; case Op_VecD: st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" "popq [rsp + #%d]\n\t" "pushl [rsp + #%d]\n\t" "popq [rsp + #%d]", src_offset, dst_offset, src_offset+4, dst_offset+4); break; case Op_VecX: st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" "movdqu xmm0, [rsp + #%d]\n\t" "movdqu [rsp + #%d], xmm0\n\t" "movdqu xmm0, [rsp - #16]", src_offset, dst_offset); break; case Op_VecY: st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" "vmovdqu xmm0, [rsp + #%d]\n\t" "vmovdqu [rsp + #%d], xmm0\n\t" "vmovdqu xmm0, [rsp - #32]", src_offset, dst_offset); break; case Op_VecZ: st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" "vmovdqu xmm0, [rsp + #%d]\n\t" "vmovdqu [rsp + #%d], xmm0\n\t" "vmovdqu xmm0, [rsp - #64]", src_offset, dst_offset); break; default: ShouldNotReachHere(); } #endif } return calc_size; } uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { // Get registers to move OptoReg::Name src_second = ra_->get_reg_second(in(1)); OptoReg::Name src_first = ra_->get_reg_first(in(1)); OptoReg::Name dst_second = ra_->get_reg_second(this ); OptoReg::Name dst_first = ra_->get_reg_first(this ); enum RC src_second_rc = rc_class(src_second); enum RC src_first_rc = rc_class(src_first); enum RC dst_second_rc = rc_class(dst_second); enum RC dst_first_rc = rc_class(dst_first); assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); // Generate spill code! int size = 0; if( src_first == dst_first && src_second == dst_second ) return size; // Self copy, no move if (bottom_type()->isa_vect() != NULL) { uint ireg = ideal_reg(); assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { // mem -> mem int src_offset = ra_->reg2offset(src_first); int dst_offset = ra_->reg2offset(dst_first); return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { int stack_offset = ra_->reg2offset(dst_first); return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { int stack_offset = ra_->reg2offset(src_first); return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); } else { ShouldNotReachHere(); } } // -------------------------------------- // Check for mem-mem move. push/pop to move. if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { if( src_second == dst_first ) { // overlapping stack copy ranges assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits } // move low bits size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); } return size; } // -------------------------------------- // Check for integer reg-reg copy if( src_first_rc == rc_int && dst_first_rc == rc_int ) size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); // Check for integer store if( src_first_rc == rc_int && dst_first_rc == rc_stack ) size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); // Check for integer load if( dst_first_rc == rc_int && src_first_rc == rc_stack ) size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); // Check for integer reg-xmm reg copy if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), "no 64 bit integer-float reg moves" ); return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); } // -------------------------------------- // Check for float reg-reg copy if( src_first_rc == rc_float && dst_first_rc == rc_float ) { assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); if( cbuf ) { // Note the mucking with the register encode to compensate for the 0/1 // indexing issue mentioned in a comment in the reg_def sections // for FPR registers many lines above here. if( src_first != FPR1L_num ) { emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); } else { emit_opcode (*cbuf, 0xDD ); // FST ST(i) emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); } #ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); else st->print( "FST %s", Matcher::regName[dst_first]); #endif } return size + ((src_first != FPR1L_num) ? 2+2 : 2); } // Check for float store if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); } // Check for float load if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { int offset = ra_->reg2offset(src_first); const char *op_str; int op; if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? op_str = "FLD_D"; op = 0xDD; } else { // 32-bit load op_str = "FLD_S"; op = 0xD9; assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); } if( cbuf ) { emit_opcode (*cbuf, op ); encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); #ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); #endif } int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); return size + 3+offset_size+2; } // Check for xmm reg-reg copy if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); } // Check for xmm reg-integer reg copy if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), "no 64 bit float-integer reg moves" ); return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); } // Check for xmm store if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); } // Check for float xmm load if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); } // Copy from float reg to xmm reg if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { // copy to the top of stack from floating point reg // and use LEA to preserve flags if( cbuf ) { emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] emit_rm(*cbuf, 0x1, ESP_enc, 0x04); emit_rm(*cbuf, 0x0, 0x04, ESP_enc); emit_d8(*cbuf,0xF8); #ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); st->print("LEA ESP,[ESP-8]"); #endif } size += 4; size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); // Copy from the temp memory to the xmm reg. size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); if( cbuf ) { emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] emit_rm(*cbuf, 0x1, ESP_enc, 0x04); emit_rm(*cbuf, 0x0, 0x04, ESP_enc); emit_d8(*cbuf,0x08); #ifndef PRODUCT } else if( !do_size ) { if( size != 0 ) st->print("\n\t"); st->print("LEA ESP,[ESP+8]"); #endif } size += 4; return size; } assert( size > 0, "missed a case" ); // -------------------------------------------------------------------- // Check for second bits still needing moving. if( src_second == dst_second ) return size; // Self copy; no move assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); // Check for second word int-int move if( src_second_rc == rc_int && dst_second_rc == rc_int ) return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); // Check for second word integer store if( src_second_rc == rc_int && dst_second_rc == rc_stack ) return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); // Check for second word integer load if( dst_second_rc == rc_int && src_second_rc == rc_stack ) return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); Unimplemented(); return 0; // Mute compiler } #ifndef PRODUCT void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { implementation( NULL, ra_, false, st ); } #endif void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { implementation( &cbuf, ra_, false, NULL ); } uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { return implementation( NULL, ra_, true, NULL ); } //============================================================================= #ifndef PRODUCT void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); int reg = ra_->get_reg_first(this); st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); } #endif void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); int reg = ra_->get_encode(this); if( offset >= 128 ) { emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] emit_rm(cbuf, 0x2, reg, 0x04); emit_rm(cbuf, 0x0, 0x04, ESP_enc); emit_d32(cbuf, offset); } else { emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] emit_rm(cbuf, 0x1, reg, 0x04); emit_rm(cbuf, 0x0, 0x04, ESP_enc); emit_d8(cbuf, offset); } } uint BoxLockNode::size(PhaseRegAlloc *ra_) const { int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); if( offset >= 128 ) { return 7; } else { return 4; } } //============================================================================= #ifndef PRODUCT void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); st->print_cr("\tNOP"); st->print_cr("\tNOP"); if( !OptoBreakpoint ) st->print_cr("\tNOP"); } #endif void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { MacroAssembler masm(&cbuf); #ifdef ASSERT uint insts_size = cbuf.insts_size(); #endif masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub())); /* WARNING these NOPs are critical so that verified entry point is properly aligned for patching by NativeJump::patch_verified_entry() */ int nops_cnt = 2; if( !OptoBreakpoint ) // Leave space for int3 nops_cnt += 1; masm.nop(nops_cnt); assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); } uint MachUEPNode::size(PhaseRegAlloc *ra_) const { return OptoBreakpoint ? 11 : 12; } //============================================================================= int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk } // This is UltraSparc specific, true just means we have fast l2f conversion const bool Matcher::convL2FSupported(void) { return true; } // Is this branch offset short enough that a short branch can be used? // // NOTE: If the platform does not provide any short branch variants, then // this method should return false for offset 0. bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { // The passed offset is relative to address of the branch. // On 86 a branch displacement is calculated relative to address // of a next instruction. offset -= br_size; // the short version of jmpConUCF2 contains multiple branches, // making the reach slightly less if (rule == jmpConUCF2_rule) return (-126 <= offset && offset <= 125); return (-128 <= offset && offset <= 127); } const bool Matcher::isSimpleConstant64(jlong value) { // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. return false; } // The ecx parameter to rep stos for the ClearArray node is in dwords. const bool Matcher::init_array_count_is_in_bytes = false; // Threshold size for cleararray. const int Matcher::init_array_short_size = 8 * BytesPerLong; // Needs 2 CMOV's for longs. const int Matcher::long_cmove_cost() { return 1; } // No CMOVF/CMOVD with SSE/SSE2 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } // Does the CPU require late expand (see block.cpp for description of late expand)? const bool Matcher::require_postalloc_expand = false; // Should the Matcher clone shifts on addressing modes, expecting them to // be subsumed into complex addressing expressions or compute them into // registers? True for Intel but false for most RISCs const bool Matcher::clone_shift_expressions = true; // Do we need to mask the count passed to shift instructions or does // the cpu only look at the lower 5/6 bits anyway? const bool Matcher::need_masked_shift_count = false; bool Matcher::narrow_oop_use_complex_address() { ShouldNotCallThis(); return true; } bool Matcher::narrow_klass_use_complex_address() { ShouldNotCallThis(); return true; } // Is it better to copy float constants, or load them directly from memory? // Intel can load a float constant from a direct address, requiring no // extra registers. Most RISCs will have to materialize an address into a // register first, so they would do better to copy the constant from stack. const bool Matcher::rematerialize_float_constants = true; // If CPU can load and store mis-aligned doubles directly then no fixup is // needed. Else we split the double into 2 integer pieces and move it // piece-by-piece. Only happens when passing doubles into C code as the // Java calling convention forces doubles to be aligned. const bool Matcher::misaligned_doubles_ok = true; void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { // Get the memory operand from the node uint numopnds = node->num_opnds(); // Virtual call for number of operands uint skipped = node->oper_input_base(); // Sum of leaves skipped so far assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); uint opcnt = 1; // First operand uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand while( idx >= skipped+num_edges ) { skipped += num_edges; opcnt++; // Bump operand count assert( opcnt < numopnds, "Accessing non-existent operand" ); num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand } MachOper *memory = node->_opnds[opcnt]; MachOper *new_memory = NULL; switch (memory->opcode()) { case DIRECT: case INDOFFSET32X: // No transformation necessary. return; case INDIRECT: new_memory = new indirect_win95_safeOper( ); break; case INDOFFSET8: new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); break; case INDOFFSET32: new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); break; case INDINDEXOFFSET: new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); break; case INDINDEXSCALE: new_memory = new indIndexScale_win95_safeOper(memory->scale()); break; case INDINDEXSCALEOFFSET: new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); break; case LOAD_LONG_INDIRECT: case LOAD_LONG_INDOFFSET32: // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} return; default: assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); return; } node->_opnds[opcnt] = new_memory; } // Advertise here if the CPU requires explicit rounding operations // to implement the UseStrictFP mode. const bool Matcher::strict_fp_requires_explicit_rounding = true; // Are floats conerted to double when stored to stack during deoptimization? // On x32 it is stored with convertion only when FPU is used for floats. bool Matcher::float_in_double() { return (UseSSE == 0); } // Do ints take an entire long register or just half? const bool Matcher::int_in_long = false; // Return whether or not this register is ever used as an argument. This // function is used on startup to build the trampoline stubs in generateOptoStub. // Registers not mentioned will be killed by the VM call in the trampoline, and // arguments in those registers not be available to the callee. bool Matcher::can_be_java_arg( int reg ) { if( reg == ECX_num || reg == EDX_num ) return true; if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; return false; } bool Matcher::is_spillable_arg( int reg ) { return can_be_java_arg(reg); } bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { // Use hardware integer DIV instruction when // it is faster than a code which use multiply. // Only when constant divisor fits into 32 bit // (min_jint is excluded to get only correct // positive 32 bit values from negative). return VM_Version::has_fast_idiv() && (divisor == (int)divisor && divisor != min_jint); } // Register for DIVI projection of divmodI RegMask Matcher::divI_proj_mask() { return EAX_REG_mask(); } // Register for MODI projection of divmodI RegMask Matcher::modI_proj_mask() { return EDX_REG_mask(); } // Register for DIVL projection of divmodL RegMask Matcher::divL_proj_mask() { ShouldNotReachHere(); return RegMask(); } // Register for MODL projection of divmodL RegMask Matcher::modL_proj_mask() { ShouldNotReachHere(); return RegMask(); } const RegMask Matcher::method_handle_invoke_SP_save_mask() { return NO_REG_mask(); } // Returns true if the high 32 bits of the value is known to be zero. bool is_operand_hi32_zero(Node* n) { int opc = n->Opcode(); if (opc == Op_AndL) { Node* o2 = n->in(2); if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { return true; } } if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { return true; } return false; } %} //----------ENCODING BLOCK----------------------------------------------------- // This block specifies the encoding classes used by the compiler to output // byte streams. Encoding classes generate functions which are called by // Machine Instruction Nodes in order to generate the bit encoding of the // instruction. Operands specify their base encoding interface with the // interface keyword. There are currently supported four interfaces, // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an // operand to generate a function which returns its register number when // queried. CONST_INTER causes an operand to generate a function which // returns the value of the constant when queried. MEMORY_INTER causes an // operand to generate four functions which return the Base Register, the // Index Register, the Scale Value, and the Offset Value of the operand when // queried. COND_INTER causes an operand to generate six functions which // return the encoding code (ie - encoding bits for the instruction) // associated with each basic boolean condition for a conditional instruction. // Instructions specify two basic values for encoding. They use the // ins_encode keyword to specify their encoding class (which must be one of // the class names specified in the encoding block), and they use the // opcode keyword to specify, in order, their primary, secondary, and // tertiary opcode. Only the opcode sections which a particular instruction // needs for encoding need to be specified. encode %{ // Build emit functions for each basic byte or larger field in the intel // encoding scheme (opcode, rm, sib, immediate), and call them from C++ // code in the enc_class source block. Emit functions will live in the // main source block for now. In future, we can generalize this by // adding a syntax that specifies the sizes of fields in an order, // so that the adlc can build the emit functions automagically // Emit primary opcode enc_class OpcP %{ emit_opcode(cbuf, $primary); %} // Emit secondary opcode enc_class OpcS %{ emit_opcode(cbuf, $secondary); %} // Emit opcode directly enc_class Opcode(immI d8) %{ emit_opcode(cbuf, $d8$$constant); %} enc_class SizePrefix %{ emit_opcode(cbuf,0x66); %} enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) emit_opcode(cbuf,$opcode$$constant); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class mov_r32_imm0( rRegI dst ) %{ emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 %} enc_class cdq_enc %{ // Full implementation of Java idiv and irem; checks for // special case as described in JVM spec., p.243 & p.271. // // normal case special case // // input : rax,: dividend min_int // reg: divisor -1 // // output: rax,: quotient (= rax, idiv reg) min_int // rdx: remainder (= rax, irem reg) 0 // // Code sequnce: // // 81 F8 00 00 00 80 cmp rax,80000000h // 0F 85 0B 00 00 00 jne normal_case // 33 D2 xor rdx,edx // 83 F9 FF cmp rcx,0FFh // 0F 84 03 00 00 00 je done // normal_case: // 99 cdq // F7 F9 idiv rax,ecx // done: // emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done // normal_case: emit_opcode(cbuf,0x99); // cdq // idiv (note: must be emitted by the user of this rule) // normal: %} // Dense encoding for older common ops enc_class Opc_plus(immI opcode, rRegI reg) %{ emit_opcode(cbuf, $opcode$$constant + $reg$$reg); %} // Opcde enc_class for 8/32 bit immediate instructions with sign-extension enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit // Check for 8-bit immediate, and set sign extend bit in opcode if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { emit_opcode(cbuf, $primary | 0x02); } else { // If 32-bit immediate emit_opcode(cbuf, $primary); } %} enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m // Emit primary opcode and set sign-extend bit // Check for 8-bit immediate, and set sign extend bit in opcode if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { emit_opcode(cbuf, $primary | 0x02); } else { // If 32-bit immediate emit_opcode(cbuf, $primary); } // Emit r/m byte with secondary opcode, after primary opcode. emit_rm(cbuf, 0x3, $secondary, $dst$$reg); %} enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits // Check for 8-bit immediate, and set sign extend bit in opcode if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { $$$emit8$imm$$constant; } else { // If 32-bit immediate // Output immediate $$$emit32$imm$$constant; } %} enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ // Emit primary opcode and set sign-extend bit // Check for 8-bit immediate, and set sign extend bit in opcode int con = (int)$imm$$constant; // Throw away top bits emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); // Emit r/m byte with secondary opcode, after primary opcode. emit_rm(cbuf, 0x3, $secondary, $dst$$reg); if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); else emit_d32(cbuf,con); %} enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ // Emit primary opcode and set sign-extend bit // Check for 8-bit immediate, and set sign extend bit in opcode int con = (int)($imm$$constant >> 32); // Throw away bottom bits emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); // Emit r/m byte with tertiary opcode, after primary opcode. emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); else emit_d32(cbuf,con); %} enc_class OpcSReg (rRegI dst) %{ // BSWAP emit_cc(cbuf, $secondary, $dst$$reg ); %} enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP int destlo = $dst$$reg; int desthi = HIGH_FROM_LOW(destlo); // bswap lo emit_opcode(cbuf, 0x0F); emit_cc(cbuf, 0xC8, destlo); // bswap hi emit_opcode(cbuf, 0x0F); emit_cc(cbuf, 0xC8, desthi); // xchg lo and hi emit_opcode(cbuf, 0x87); emit_rm(cbuf, 0x3, destlo, desthi); %} enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... emit_rm(cbuf, 0x3, $secondary, $div$$reg ); %} enc_class enc_cmov(cmpOp cop ) %{ // CMOV $$$emit8$primary; emit_cc(cbuf, $secondary, $cop$$cmpcode); %} enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); emit_d8(cbuf, op >> 8 ); emit_d8(cbuf, op & 255); %} // emulate a CMOV with a conditional branch around a MOV enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV // Invert sense of branch from sense of CMOV emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); emit_d8( cbuf, $brOffs$$constant ); %} enc_class enc_PartialSubtypeCheck( ) %{ Register Redi = as_Register(EDI_enc); // result register Register Reax = as_Register(EAX_enc); // super class Register Recx = as_Register(ECX_enc); // killed Register Resi = as_Register(ESI_enc); // sub class Label miss; MacroAssembler _masm(&cbuf); __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, NULL, &miss, /*set_cond_codes:*/ true); if ($primary) { __ xorptr(Redi, Redi); } __ bind(miss); %} enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All MacroAssembler masm(&cbuf); int start = masm.offset(); if (UseSSE >= 2) { if (VerifyFPU) { masm.verify_FPU(0, "must be empty in SSE2+ mode"); } } else { // External c_calling_convention expects the FPU stack to be 'clean'. // Compiled code leaves it dirty. Do cleanup now. masm.empty_FPU_stack(); } if (sizeof_FFree_Float_Stack_All == -1) { sizeof_FFree_Float_Stack_All = masm.offset() - start; } else { assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); } %} enc_class Verify_FPU_For_Leaf %{ if( VerifyFPU ) { MacroAssembler masm(&cbuf); masm.verify_FPU( -3, "Returning from Runtime Leaf call"); } %} enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf // This is the instruction starting address for relocation info. cbuf.set_insts_mark(); $$$emit8$primary; // CALL directly to the runtime emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), runtime_call_Relocation::spec(), RELOC_IMM32 ); if (UseSSE >= 2) { MacroAssembler _masm(&cbuf); BasicType rt = tf()->return_type(); if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { // A C runtime call where the return value is unused. In SSE2+ // mode the result needs to be removed from the FPU stack. It's // likely that this function call could be removed by the // optimizer if the C function is a pure function. __ ffree(0); } else if (rt == T_FLOAT) { __ lea(rsp, Address(rsp, -4)); __ fstp_s(Address(rsp, 0)); __ movflt(xmm0, Address(rsp, 0)); __ lea(rsp, Address(rsp, 4)); } else if (rt == T_DOUBLE) { __ lea(rsp, Address(rsp, -8)); __ fstp_d(Address(rsp, 0)); __ movdbl(xmm0, Address(rsp, 0)); __ lea(rsp, Address(rsp, 8)); } } %} enc_class pre_call_resets %{ // If method sets FPU control word restore it here debug_only(int off0 = cbuf.insts_size()); if (ra_->C->in_24_bit_fp_mode()) { MacroAssembler _masm(&cbuf); __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); } if (ra_->C->max_vector_size() > 16) { // Clear upper bits of YMM registers when current compiled code uses // wide vectors to avoid AVX <-> SSE transition penalty during call. MacroAssembler _masm(&cbuf); __ vzeroupper(); } debug_only(int off1 = cbuf.insts_size()); assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); %} enc_class post_call_FPU %{ // If method sets FPU control word do it here also if (Compile::current()->in_24_bit_fp_mode()) { MacroAssembler masm(&cbuf); masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); } %} enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine // who we intended to call. cbuf.set_insts_mark(); $$$emit8$primary; if (!_method) { emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), runtime_call_Relocation::spec(), RELOC_IMM32); } else { int method_index = resolved_method_index(cbuf); RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) : static_call_Relocation::spec(method_index); emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), rspec, RELOC_DISP32); // Emit stubs for static call. address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); if (stub == NULL) { ciEnv::current()->record_failure("CodeCache is full"); return; } } %} enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL MacroAssembler _masm(&cbuf); __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); %} enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL int disp = in_bytes(Method::from_compiled_offset()); assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] cbuf.set_insts_mark(); $$$emit8$primary; emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte emit_d8(cbuf, disp); // Displacement %} // Following encoding is no longer used, but may be restored if calling // convention changes significantly. // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) // // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL // // int ic_reg = Matcher::inline_cache_reg(); // // int ic_encode = Matcher::_regEncode[ic_reg]; // // int imo_reg = Matcher::interpreter_method_oop_reg(); // // int imo_encode = Matcher::_regEncode[imo_reg]; // // // // Interpreter expects method_oop in EBX, currently a callee-saved register, // // // so we load it immediately before the call // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte // // // xor rbp,ebp // emit_opcode(cbuf, 0x33); // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); // // // CALL to interpreter. // cbuf.set_insts_mark(); // $$$emit8$primary; // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), // runtime_call_Relocation::spec(), RELOC_IMM32 ); // %} enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR $$$emit8$primary; emit_rm(cbuf, 0x3, $secondary, $dst$$reg); $$$emit8$shift$$constant; %} enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates emit_opcode(cbuf, 0xB8 + $dst$$reg); $$$emit32$src$$constant; %} enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates emit_opcode(cbuf, $primary + $dst$$reg); $$$emit32$src$$constant; %} enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates int dst_enc = $dst$$reg; int src_con = $src$$constant & 0x0FFFFFFFFL; if (src_con == 0) { // xor dst, dst emit_opcode(cbuf, 0x33); emit_rm(cbuf, 0x3, dst_enc, dst_enc); } else { emit_opcode(cbuf, $primary + dst_enc); emit_d32(cbuf, src_con); } %} enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates int dst_enc = $dst$$reg + 2; int src_con = ((julong)($src$$constant)) >> 32; if (src_con == 0) { // xor dst, dst emit_opcode(cbuf, 0x33); emit_rm(cbuf, 0x3, dst_enc, dst_enc); } else { emit_opcode(cbuf, $primary + dst_enc); emit_d32(cbuf, src_con); } %} // Encode a reg-reg copy. If it is useless, then empty encoding. enc_class enc_Copy( rRegI dst, rRegI src ) %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); %} enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); %} enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) $$$emit8$primary; emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) $$$emit8$secondary; emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); %} enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); %} enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); %} enc_class Con32 (immI src) %{ // Con32(storeImmI) // Output immediate $$$emit32$src$$constant; %} enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm // Output Float immediate bits jfloat jf = $src$$constant; int jf_as_bits = jint_cast( jf ); emit_d32(cbuf, jf_as_bits); %} enc_class Con32F_as_bits(immF src) %{ // storeX_imm // Output Float immediate bits jfloat jf = $src$$constant; int jf_as_bits = jint_cast( jf ); emit_d32(cbuf, jf_as_bits); %} enc_class Con16 (immI src) %{ // Con16(storeImmI) // Output immediate $$$emit16$src$$constant; %} enc_class Con_d32(immI src) %{ emit_d32(cbuf,$src$$constant); %} enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) // Output immediate memory reference emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); emit_d32(cbuf, 0x00); %} enc_class lock_prefix( ) %{ if( os::is_MP() ) emit_opcode(cbuf,0xF0); // [Lock] %} // Cmp-xchg long value. // Note: we need to swap rbx, and rcx before and after the // cmpxchg8 instruction because the instruction uses // rcx as the high order word of the new value to store but // our register encoding uses rbx,. enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ // XCHG rbx,ecx emit_opcode(cbuf,0x87); emit_opcode(cbuf,0xD9); // [Lock] if( os::is_MP() ) emit_opcode(cbuf,0xF0); // CMPXCHG8 [Eptr] emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0xC7); emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); // XCHG rbx,ecx emit_opcode(cbuf,0x87); emit_opcode(cbuf,0xD9); %} enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ // [Lock] if( os::is_MP() ) emit_opcode(cbuf,0xF0); // CMPXCHG [Eptr] emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0xB1); emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); %} enc_class enc_flags_ne_to_boolean( iRegI res ) %{ int res_encoding = $res$$reg; // MOV res,0 emit_opcode( cbuf, 0xB8 + res_encoding); emit_d32( cbuf, 0 ); // JNE,s fail emit_opcode(cbuf,0x75); emit_d8(cbuf, 5 ); // MOV res,1 emit_opcode( cbuf, 0xB8 + res_encoding); emit_d32( cbuf, 1 ); // fail: %} enc_class set_instruction_start( ) %{ cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand %} enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem int reg_encoding = $ereg$$reg; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; relocInfo::relocType disp_reloc = $mem->disp_reloc(); encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); %} enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp + 4; // Offset is 4 further in memory assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); %} enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ int r1, r2; if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } emit_opcode(cbuf,0x0F); emit_opcode(cbuf,$tertiary); emit_rm(cbuf, 0x3, r1, r2); emit_d8(cbuf,$cnt$$constant); emit_d8(cbuf,$primary); emit_rm(cbuf, 0x3, $secondary, r1); emit_d8(cbuf,$cnt$$constant); %} enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ emit_opcode( cbuf, 0x8B ); // Move emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); if( $cnt$$constant > 32 ) { // Shift, if not by zero emit_d8(cbuf,$primary); emit_rm(cbuf, 0x3, $secondary, $dst$$reg); emit_d8(cbuf,$cnt$$constant-32); } emit_d8(cbuf,$primary); emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); emit_d8(cbuf,31); %} enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ int r1, r2; if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } emit_opcode( cbuf, 0x8B ); // Move r1,r2 emit_rm(cbuf, 0x3, r1, r2); if( $cnt$$constant > 32 ) { // Shift, if not by zero emit_opcode(cbuf,$primary); emit_rm(cbuf, 0x3, $secondary, r1); emit_d8(cbuf,$cnt$$constant-32); } emit_opcode(cbuf,0x33); // XOR r2,r2 emit_rm(cbuf, 0x3, r2, r2); %} // Clone of RegMem but accepts an extra parameter to access each // half of a double in memory; it never needs relocation info. enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ emit_opcode(cbuf,$opcode$$constant); int reg_encoding = $rm_reg$$reg; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp + $disp_for_half$$constant; relocInfo::relocType disp_reloc = relocInfo::none; encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); %} // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! // // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant // and it never needs relocation information. // Frequently used to move data between FPU's Stack Top and memory. enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ int rm_byte_opcode = $rm_opcode$$constant; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); %} enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ int rm_byte_opcode = $rm_opcode$$constant; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); %} enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea int reg_encoding = $dst$$reg; int base = $src0$$reg; // 0xFFFFFFFF indicates no base int index = 0x04; // 0x04 indicates no index int scale = 0x00; // 0x00 indicates no scale int displace = $src1$$constant; // 0x00 indicates no displacement relocInfo::relocType disp_reloc = relocInfo::none; encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); %} enc_class min_enc (rRegI dst, rRegI src) %{ // MIN // Compare dst,src emit_opcode(cbuf,0x3B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); // jmp dst < src around move emit_opcode(cbuf,0x7C); emit_d8(cbuf,2); // move dst,src emit_opcode(cbuf,0x8B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class max_enc (rRegI dst, rRegI src) %{ // MAX // Compare dst,src emit_opcode(cbuf,0x3B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); // jmp dst > src around move emit_opcode(cbuf,0x7F); emit_d8(cbuf,2); // move dst,src emit_opcode(cbuf,0x8B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} enc_class enc_FPR_store(memory mem, regDPR src) %{ // If src is FPR1, we can just FST to store it. // Else we need to FLD it to FPR1, then FSTP to store/pop it. int reg_encoding = 0x2; // Just store int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals if( $src$$reg != FPR1L_enc ) { reg_encoding = 0x3; // Store & pop emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) emit_d8( cbuf, 0xC0-1+$src$$reg ); } cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand emit_opcode(cbuf,$primary); encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); %} enc_class neg_reg(rRegI dst) %{ // NEG $dst emit_opcode(cbuf,0xF7); emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); %} enc_class setLT_reg(eCXRegI dst) %{ // SETLT $dst emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x9C); emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); %} enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT int tmpReg = $tmp$$reg; // SUB $p,$q emit_opcode(cbuf,0x2B); emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); // SBB $tmp,$tmp emit_opcode(cbuf,0x1B); emit_rm(cbuf, 0x3, tmpReg, tmpReg); // AND $tmp,$y emit_opcode(cbuf,0x23); emit_rm(cbuf, 0x3, tmpReg, $y$$reg); // ADD $p,$tmp emit_opcode(cbuf,0x03); emit_rm(cbuf, 0x3, $p$$reg, tmpReg); %} enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ // TEST shift,32 emit_opcode(cbuf,0xF7); emit_rm(cbuf, 0x3, 0, ECX_enc); emit_d32(cbuf,0x20); // JEQ,s small emit_opcode(cbuf, 0x74); emit_d8(cbuf, 0x04); // MOV $dst.hi,$dst.lo emit_opcode( cbuf, 0x8B ); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); // CLR $dst.lo emit_opcode(cbuf, 0x33); emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); // small: // SHLD $dst.hi,$dst.lo,$shift emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0xA5); emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); // SHL $dst.lo,$shift" emit_opcode(cbuf,0xD3); emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); %} enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ // TEST shift,32 emit_opcode(cbuf,0xF7); emit_rm(cbuf, 0x3, 0, ECX_enc); emit_d32(cbuf,0x20); // JEQ,s small emit_opcode(cbuf, 0x74); emit_d8(cbuf, 0x04); // MOV $dst.lo,$dst.hi emit_opcode( cbuf, 0x8B ); emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); // CLR $dst.hi emit_opcode(cbuf, 0x33); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); // small: // SHRD $dst.lo,$dst.hi,$shift emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0xAD); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); // SHR $dst.hi,$shift" emit_opcode(cbuf,0xD3); emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); %} enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ // TEST shift,32 emit_opcode(cbuf,0xF7); emit_rm(cbuf, 0x3, 0, ECX_enc); emit_d32(cbuf,0x20); // JEQ,s small emit_opcode(cbuf, 0x74); emit_d8(cbuf, 0x05); // MOV $dst.lo,$dst.hi emit_opcode( cbuf, 0x8B ); emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); // SAR $dst.hi,31 emit_opcode(cbuf, 0xC1); emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); emit_d8(cbuf, 0x1F ); // small: // SHRD $dst.lo,$dst.hi,$shift emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0xAD); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); // SAR $dst.hi,$shift" emit_opcode(cbuf,0xD3); emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); %} // ----------------- Encodings for floating point unit ----------------- // May leave result in FPU-TOS or FPU reg depending on opcodes enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV $$$emit8$primary; emit_rm(cbuf, 0x3, $secondary, $src$$reg ); %} // Pop argument in FPR0 with FSTP ST(0) enc_class PopFPU() %{ emit_opcode( cbuf, 0xDD ); emit_d8( cbuf, 0xD8 ); %} // !!!!! equivalent to Pop_Reg_F enc_class Pop_Reg_DPR( regDPR dst ) %{ emit_opcode( cbuf, 0xDD ); // FSTP ST(i) emit_d8( cbuf, 0xD8+$dst$$reg ); %} enc_class Push_Reg_DPR( regDPR dst ) %{ emit_opcode( cbuf, 0xD9 ); emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) %} enc_class strictfp_bias1( regDPR dst ) %{ emit_opcode( cbuf, 0xDB ); // FLD m80real emit_opcode( cbuf, 0x2D ); emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 emit_opcode( cbuf, 0xC8+$dst$$reg ); %} enc_class strictfp_bias2( regDPR dst ) %{ emit_opcode( cbuf, 0xDB ); // FLD m80real emit_opcode( cbuf, 0x2D ); emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 emit_opcode( cbuf, 0xC8+$dst$$reg ); %} // Special case for moving an integer register to a stack slot. enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); %} // Special case for moving a register to a stack slot. enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS // Opcode already emitted emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte emit_d32(cbuf, $dst$$disp); // Displacement %} // Push the integer in stackSlot 'src' onto FP-stack enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); %} // Push FPU's TOS float to a stack-slot, and pop FPU-stack enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); %} // Same as Pop_Mem_F except for opcode // Push FPU's TOS double to a stack-slot, and pop FPU-stack enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); %} enc_class Pop_Reg_FPR( regFPR dst ) %{ emit_opcode( cbuf, 0xDD ); // FSTP ST(i) emit_d8( cbuf, 0xD8+$dst$$reg ); %} enc_class Push_Reg_FPR( regFPR dst ) %{ emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) emit_d8( cbuf, 0xC0-1+$dst$$reg ); %} // Push FPU's float to a stack-slot, and pop FPU-stack enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ int pop = 0x02; if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) emit_d8( cbuf, 0xC0-1+$src$$reg ); pop = 0x03; } store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST

_S [ESP+dst] %} // Push FPU's double to a stack-slot, and pop FPU-stack enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ int pop = 0x02; if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) emit_d8( cbuf, 0xC0-1+$src$$reg ); pop = 0x03; } store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST

_D [ESP+dst] %} // Push FPU's double to a FPU-stack-slot, and pop FPU-stack enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ int pop = 0xD0 - 1; // -1 since we skip FLD if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) emit_d8( cbuf, 0xC0-1+$src$$reg ); pop = 0xD8; } emit_opcode( cbuf, 0xDD ); emit_d8( cbuf, pop+$dst$$reg ); // FST

ST(i) %} enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ // load dst in FPR0 emit_opcode( cbuf, 0xD9 ); emit_d8( cbuf, 0xC0-1+$dst$$reg ); if ($src$$reg != FPR1L_enc) { // fincstp emit_opcode (cbuf, 0xD9); emit_opcode (cbuf, 0xF7); // swap src with FPR1: // FXCH FPR1 with src emit_opcode(cbuf, 0xD9); emit_d8(cbuf, 0xC8-1+$src$$reg ); // fdecstp emit_opcode (cbuf, 0xD9); emit_opcode (cbuf, 0xF6); } %} enc_class Push_ModD_encoding(regD src0, regD src1) %{ MacroAssembler _masm(&cbuf); __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src1$$XMMRegister); __ fld_d(Address(rsp, 0)); __ movdbl(Address(rsp, 0), $src0$$XMMRegister); __ fld_d(Address(rsp, 0)); %} enc_class Push_ModF_encoding(regF src0, regF src1) %{ MacroAssembler _masm(&cbuf); __ subptr(rsp, 4); __ movflt(Address(rsp, 0), $src1$$XMMRegister); __ fld_s(Address(rsp, 0)); __ movflt(Address(rsp, 0), $src0$$XMMRegister); __ fld_s(Address(rsp, 0)); %} enc_class Push_ResultD(regD dst) %{ MacroAssembler _masm(&cbuf); __ fstp_d(Address(rsp, 0)); __ movdbl($dst$$XMMRegister, Address(rsp, 0)); __ addptr(rsp, 8); %} enc_class Push_ResultF(regF dst, immI d8) %{ MacroAssembler _masm(&cbuf); __ fstp_s(Address(rsp, 0)); __ movflt($dst$$XMMRegister, Address(rsp, 0)); __ addptr(rsp, $d8$$constant); %} enc_class Push_SrcD(regD src) %{ MacroAssembler _masm(&cbuf); __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); %} enc_class push_stack_temp_qword() %{ MacroAssembler _masm(&cbuf); __ subptr(rsp, 8); %} enc_class pop_stack_temp_qword() %{ MacroAssembler _masm(&cbuf); __ addptr(rsp, 8); %} enc_class push_xmm_to_fpr1(regD src) %{ MacroAssembler _masm(&cbuf); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); %} enc_class Push_Result_Mod_DPR( regDPR src) %{ if ($src$$reg != FPR1L_enc) { // fincstp emit_opcode (cbuf, 0xD9); emit_opcode (cbuf, 0xF7); // FXCH FPR1 with src emit_opcode(cbuf, 0xD9); emit_d8(cbuf, 0xC8-1+$src$$reg ); // fdecstp emit_opcode (cbuf, 0xD9); emit_opcode (cbuf, 0xF6); } // // following asm replaced with Pop_Reg_F or Pop_Mem_F // // FSTP FPR$dst$$reg // emit_opcode( cbuf, 0xDD ); // emit_d8( cbuf, 0xD8+$dst$$reg ); %} enc_class fnstsw_sahf_skip_parity() %{ // fnstsw ax emit_opcode( cbuf, 0xDF ); emit_opcode( cbuf, 0xE0 ); // sahf emit_opcode( cbuf, 0x9E ); // jnp ::skip emit_opcode( cbuf, 0x7B ); emit_opcode( cbuf, 0x05 ); %} enc_class emitModDPR() %{ // fprem must be iterative // :: loop // fprem emit_opcode( cbuf, 0xD9 ); emit_opcode( cbuf, 0xF8 ); // wait emit_opcode( cbuf, 0x9b ); // fnstsw ax emit_opcode( cbuf, 0xDF ); emit_opcode( cbuf, 0xE0 ); // sahf emit_opcode( cbuf, 0x9E ); // jp ::loop emit_opcode( cbuf, 0x0F ); emit_opcode( cbuf, 0x8A ); emit_opcode( cbuf, 0xF4 ); emit_opcode( cbuf, 0xFF ); emit_opcode( cbuf, 0xFF ); emit_opcode( cbuf, 0xFF ); %} enc_class fpu_flags() %{ // fnstsw_ax emit_opcode( cbuf, 0xDF); emit_opcode( cbuf, 0xE0); // test ax,0x0400 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate emit_opcode( cbuf, 0xA9 ); emit_d16 ( cbuf, 0x0400 ); // // // This sequence works, but stalls for 12-16 cycles on PPro // // test rax,0x0400 // emit_opcode( cbuf, 0xA9 ); // emit_d32 ( cbuf, 0x00000400 ); // // jz exit (no unordered comparison) emit_opcode( cbuf, 0x74 ); emit_d8 ( cbuf, 0x02 ); // mov ah,1 - treat as LT case (set carry flag) emit_opcode( cbuf, 0xB4 ); emit_d8 ( cbuf, 0x01 ); // sahf emit_opcode( cbuf, 0x9E); %} enc_class cmpF_P6_fixup() %{ // Fixup the integer flags in case comparison involved a NaN // // JNP exit (no unordered comparison, P-flag is set by NaN) emit_opcode( cbuf, 0x7B ); emit_d8 ( cbuf, 0x03 ); // MOV AH,1 - treat as LT case (set carry flag) emit_opcode( cbuf, 0xB4 ); emit_d8 ( cbuf, 0x01 ); // SAHF emit_opcode( cbuf, 0x9E); // NOP // target for branch to avoid branch to branch emit_opcode( cbuf, 0x90); %} // fnstsw_ax(); // sahf(); // movl(dst, nan_result); // jcc(Assembler::parity, exit); // movl(dst, less_result); // jcc(Assembler::below, exit); // movl(dst, equal_result); // jcc(Assembler::equal, exit); // movl(dst, greater_result); // less_result = 1; // greater_result = -1; // equal_result = 0; // nan_result = -1; enc_class CmpF_Result(rRegI dst) %{ // fnstsw_ax(); emit_opcode( cbuf, 0xDF); emit_opcode( cbuf, 0xE0); // sahf emit_opcode( cbuf, 0x9E); // movl(dst, nan_result); emit_opcode( cbuf, 0xB8 + $dst$$reg); emit_d32( cbuf, -1 ); // jcc(Assembler::parity, exit); emit_opcode( cbuf, 0x7A ); emit_d8 ( cbuf, 0x13 ); // movl(dst, less_result); emit_opcode( cbuf, 0xB8 + $dst$$reg); emit_d32( cbuf, -1 ); // jcc(Assembler::below, exit); emit_opcode( cbuf, 0x72 ); emit_d8 ( cbuf, 0x0C ); // movl(dst, equal_result); emit_opcode( cbuf, 0xB8 + $dst$$reg); emit_d32( cbuf, 0 ); // jcc(Assembler::equal, exit); emit_opcode( cbuf, 0x74 ); emit_d8 ( cbuf, 0x05 ); // movl(dst, greater_result); emit_opcode( cbuf, 0xB8 + $dst$$reg); emit_d32( cbuf, 1 ); %} // Compare the longs and set flags // BROKEN! Do Not use as-is enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ // CMP $src1.hi,$src2.hi emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); // JNE,s done emit_opcode(cbuf,0x75); emit_d8(cbuf, 2 ); // CMP $src1.lo,$src2.lo emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); // done: %} enc_class convert_int_long( regL dst, rRegI src ) %{ // mov $dst.lo,$src int dst_encoding = $dst$$reg; int src_encoding = $src$$reg; encode_Copy( cbuf, dst_encoding , src_encoding ); // mov $dst.hi,$src encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); // sar $dst.hi,31 emit_opcode( cbuf, 0xC1 ); emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); emit_d8(cbuf, 0x1F ); %} enc_class convert_long_double( eRegL src ) %{ // push $src.hi emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); // push $src.lo emit_opcode(cbuf, 0x50+$src$$reg ); // fild 64-bits at [SP] emit_opcode(cbuf,0xdf); emit_d8(cbuf, 0x6C); emit_d8(cbuf, 0x24); emit_d8(cbuf, 0x00); // pop stack emit_opcode(cbuf, 0x83); // add SP, #8 emit_rm(cbuf, 0x3, 0x00, ESP_enc); emit_d8(cbuf, 0x8); %} enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ // IMUL EDX:EAX,$src1 emit_opcode( cbuf, 0xF7 ); emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); // SAR EDX,$cnt-32 int shift_count = ((int)$cnt$$constant) - 32; if (shift_count > 0) { emit_opcode(cbuf, 0xC1); emit_rm(cbuf, 0x3, 7, $dst$$reg ); emit_d8(cbuf, shift_count); } %} // this version doesn't have add sp, 8 enc_class convert_long_double2( eRegL src ) %{ // push $src.hi emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); // push $src.lo emit_opcode(cbuf, 0x50+$src$$reg ); // fild 64-bits at [SP] emit_opcode(cbuf,0xdf); emit_d8(cbuf, 0x6C); emit_d8(cbuf, 0x24); emit_d8(cbuf, 0x00); %} enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ // Basic idea: long = (long)int * (long)int // IMUL EDX:EAX, src emit_opcode( cbuf, 0xF7 ); emit_rm( cbuf, 0x3, 0x5, $src$$reg); %} enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) // MUL EDX:EAX, src emit_opcode( cbuf, 0xF7 ); emit_rm( cbuf, 0x3, 0x4, $src$$reg); %} enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ // Basic idea: lo(result) = lo(x_lo * y_lo) // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) // MOV $tmp,$src.lo encode_Copy( cbuf, $tmp$$reg, $src$$reg ); // IMUL $tmp,EDX emit_opcode( cbuf, 0x0F ); emit_opcode( cbuf, 0xAF ); emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); // MOV EDX,$src.hi encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); // IMUL EDX,EAX emit_opcode( cbuf, 0x0F ); emit_opcode( cbuf, 0xAF ); emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); // ADD $tmp,EDX emit_opcode( cbuf, 0x03 ); emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); // MUL EDX:EAX,$src.lo emit_opcode( cbuf, 0xF7 ); emit_rm( cbuf, 0x3, 0x4, $src$$reg ); // ADD EDX,ESI emit_opcode( cbuf, 0x03 ); emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); %} enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ // Basic idea: lo(result) = lo(src * y_lo) // hi(result) = hi(src * y_lo) + lo(src * y_hi) // IMUL $tmp,EDX,$src emit_opcode( cbuf, 0x6B ); emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); emit_d8( cbuf, (int)$src$$constant ); // MOV EDX,$src emit_opcode(cbuf, 0xB8 + EDX_enc); emit_d32( cbuf, (int)$src$$constant ); // MUL EDX:EAX,EDX emit_opcode( cbuf, 0xF7 ); emit_rm( cbuf, 0x3, 0x4, EDX_enc ); // ADD EDX,ESI emit_opcode( cbuf, 0x03 ); emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); %} enc_class long_div( eRegL src1, eRegL src2 ) %{ // PUSH src1.hi emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); // PUSH src1.lo emit_opcode(cbuf, 0x50+$src1$$reg ); // PUSH src2.hi emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); // PUSH src2.lo emit_opcode(cbuf, 0x50+$src2$$reg ); // CALL directly to the runtime cbuf.set_insts_mark(); emit_opcode(cbuf,0xE8); // Call into runtime emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); // Restore stack emit_opcode(cbuf, 0x83); // add SP, #framesize emit_rm(cbuf, 0x3, 0x00, ESP_enc); emit_d8(cbuf, 4*4); %} enc_class long_mod( eRegL src1, eRegL src2 ) %{ // PUSH src1.hi emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); // PUSH src1.lo emit_opcode(cbuf, 0x50+$src1$$reg ); // PUSH src2.hi emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); // PUSH src2.lo emit_opcode(cbuf, 0x50+$src2$$reg ); // CALL directly to the runtime cbuf.set_insts_mark(); emit_opcode(cbuf,0xE8); // Call into runtime emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); // Restore stack emit_opcode(cbuf, 0x83); // add SP, #framesize emit_rm(cbuf, 0x3, 0x00, ESP_enc); emit_d8(cbuf, 4*4); %} enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ // MOV $tmp,$src.lo emit_opcode(cbuf, 0x8B); emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); // OR $tmp,$src.hi emit_opcode(cbuf, 0x0B); emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); %} enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ // CMP $src1.lo,$src2.lo emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); // JNE,s skip emit_cc(cbuf, 0x70, 0x5); emit_d8(cbuf,2); // CMP $src1.hi,$src2.hi emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); %} enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); // MOV $tmp,$src1.hi emit_opcode( cbuf, 0x8B ); emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); // SBB $tmp,$src2.hi\t! Compute flags for long compare emit_opcode( cbuf, 0x1B ); emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); %} enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ // XOR $tmp,$tmp emit_opcode(cbuf,0x33); // XOR emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); // CMP $tmp,$src.lo emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); // SBB $tmp,$src.hi emit_opcode( cbuf, 0x1B ); emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); %} // Sniff, sniff... smells like Gnu Superoptimizer enc_class neg_long( eRegL dst ) %{ emit_opcode(cbuf,0xF7); // NEG hi emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); emit_opcode(cbuf,0xF7); // NEG lo emit_rm (cbuf,0x3, 0x3, $dst$$reg ); emit_opcode(cbuf,0x83); // SBB hi,0 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); emit_d8 (cbuf,0 ); %} enc_class enc_pop_rdx() %{ emit_opcode(cbuf,0x5A); %} enc_class enc_rethrow() %{ cbuf.set_insts_mark(); emit_opcode(cbuf, 0xE9); // jmp entry emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, runtime_call_Relocation::spec(), RELOC_IMM32 ); %} // Convert a double to an int. Java semantics require we do complex // manglelations in the corner cases. So we set the rounding mode to // 'zero', store the darned double down as an int, and reset the // rounding mode to 'nearest'. The hardware throws an exception which // patches up the correct value directly to the stack. enc_class DPR2I_encoding( regDPR src ) %{ // Flip to round-to-zero mode. We attempted to allow invalid-op // exceptions here, so that a NAN or other corner-case value will // thrown an exception (but normal values get converted at full speed). // However, I2C adapters and other float-stack manglers leave pending // invalid-op exceptions hanging. We would have to clear them before // enabling them and that is more expensive than just testing for the // invalid value Intel stores down in the corner cases. emit_opcode(cbuf,0xD9); // FLDCW trunc emit_opcode(cbuf,0x2D); emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); // Allocate a word emit_opcode(cbuf,0x83); // SUB ESP,4 emit_opcode(cbuf,0xEC); emit_d8(cbuf,0x04); // Encoding assumes a double has been pushed into FPR0. // Store down the double as an int, popping the FPU stack emit_opcode(cbuf,0xDB); // FISTP [ESP] emit_opcode(cbuf,0x1C); emit_d8(cbuf,0x24); // Restore the rounding mode; mask the exception emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode emit_opcode(cbuf,0x2D); emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); // Load the converted int; adjust CPU stack emit_opcode(cbuf,0x58); // POP EAX emit_opcode(cbuf,0x3D); // CMP EAX,imm emit_d32 (cbuf,0x80000000); // 0x80000000 emit_opcode(cbuf,0x75); // JNE around_slow_call emit_d8 (cbuf,0x07); // Size of slow_call // Push src onto stack slow-path emit_opcode(cbuf,0xD9 ); // FLD ST(i) emit_d8 (cbuf,0xC0-1+$src$$reg ); // CALL directly to the runtime cbuf.set_insts_mark(); emit_opcode(cbuf,0xE8); // Call into runtime emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); // Carry on here... %} enc_class DPR2L_encoding( regDPR src ) %{ emit_opcode(cbuf,0xD9); // FLDCW trunc emit_opcode(cbuf,0x2D); emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); // Allocate a word emit_opcode(cbuf,0x83); // SUB ESP,8 emit_opcode(cbuf,0xEC); emit_d8(cbuf,0x08); // Encoding assumes a double has been pushed into FPR0. // Store down the double as a long, popping the FPU stack emit_opcode(cbuf,0xDF); // FISTP [ESP] emit_opcode(cbuf,0x3C); emit_d8(cbuf,0x24); // Restore the rounding mode; mask the exception emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode emit_opcode(cbuf,0x2D); emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); // Load the converted int; adjust CPU stack emit_opcode(cbuf,0x58); // POP EAX emit_opcode(cbuf,0x5A); // POP EDX emit_opcode(cbuf,0x81); // CMP EDX,imm emit_d8 (cbuf,0xFA); // rdx emit_d32 (cbuf,0x80000000); // 0x80000000 emit_opcode(cbuf,0x75); // JNE around_slow_call emit_d8 (cbuf,0x07+4); // Size of slow_call emit_opcode(cbuf,0x85); // TEST EAX,EAX emit_opcode(cbuf,0xC0); // 2/rax,/rax, emit_opcode(cbuf,0x75); // JNE around_slow_call emit_d8 (cbuf,0x07); // Size of slow_call // Push src onto stack slow-path emit_opcode(cbuf,0xD9 ); // FLD ST(i) emit_d8 (cbuf,0xC0-1+$src$$reg ); // CALL directly to the runtime cbuf.set_insts_mark(); emit_opcode(cbuf,0xE8); // Call into runtime emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); // Carry on here... %} enc_class FMul_ST_reg( eRegFPR src1 ) %{ // Operand was loaded from memory into fp ST (stack top) // FMUL ST,$src /* D8 C8+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC8 + $src1$$reg); %} enc_class FAdd_ST_reg( eRegFPR src2 ) %{ // FADDP ST,src2 /* D8 C0+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC0 + $src2$$reg); //could use FADDP src2,fpST /* DE C0+i */ %} enc_class FAddP_reg_ST( eRegFPR src2 ) %{ // FADDP src2,ST /* DE C0+i */ emit_opcode(cbuf, 0xDE); emit_opcode(cbuf, 0xC0 + $src2$$reg); %} enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ // Operand has been loaded into fp ST (stack top) // FSUB ST,$src1 emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xE0 + $src1$$reg); // FDIV emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xF0 + $src2$$reg); %} enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ // Operand was loaded from memory into fp ST (stack top) // FADD ST,$src /* D8 C0+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC0 + $src1$$reg); // FMUL ST,src2 /* D8 C*+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC8 + $src2$$reg); %} enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ // Operand was loaded from memory into fp ST (stack top) // FADD ST,$src /* D8 C0+i */ emit_opcode(cbuf, 0xD8); emit_opcode(cbuf, 0xC0 + $src1$$reg); // FMULP src2,ST /* DE C8+i */ emit_opcode(cbuf, 0xDE); emit_opcode(cbuf, 0xC8 + $src2$$reg); %} // Atomically load the volatile long enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ emit_opcode(cbuf,0xDF); int rm_byte_opcode = 0x05; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); %} // Volatile Store Long. Must be atomic, so move it into // the FP TOS and then do a 64-bit FIST. Has to probe the // target address before the store (for null-ptr checks) // so the memory operand is used twice in the encoding. enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop emit_opcode(cbuf,0xDF); int rm_byte_opcode = 0x07; int base = $mem$$base; int index = $mem$$index; int scale = $mem$$scale; int displace = $mem$$disp; relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); %} // Safepoint Poll. This polls the safepoint page, and causes an // exception if it is not readable. Unfortunately, it kills the condition code // in the process // We current use TESTL [spp],EDI // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 enc_class Safepoint_Poll() %{ cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); emit_opcode(cbuf,0x85); emit_rm (cbuf, 0x0, 0x7, 0x5); emit_d32(cbuf, (intptr_t)os::get_polling_page()); %} %} //----------FRAME-------------------------------------------------------------- // Definition of frame structure and management information. // // S T A C K L A Y O U T Allocators stack-slot number // | (to get allocators register number // G Owned by | | v add OptoReg::stack0()) // r CALLER | | // o | +--------+ pad to even-align allocators stack-slot // w V | pad0 | numbers; owned by CALLER // t -----------+--------+----> Matcher::_in_arg_limit, unaligned // h ^ | in | 5 // | | args | 4 Holes in incoming args owned by SELF // | | | | 3 // | | +--------+ // V | | old out| Empty on Intel, window on Sparc // | old |preserve| Must be even aligned. // | SP-+--------+----> Matcher::_old_SP, even aligned // | | in | 3 area for Intel ret address // Owned by |preserve| Empty on Sparc. // SELF +--------+ // | | pad2 | 2 pad to align old SP // | +--------+ 1 // | | locks | 0 // | +--------+----> OptoReg::stack0(), even aligned // | | pad1 | 11 pad to align new SP // | +--------+ // | | | 10 // | | spills | 9 spills // V | | 8 (pad0 slot for callee) // -----------+--------+----> Matcher::_out_arg_limit, unaligned // ^ | out | 7 // | | args | 6 Holes in outgoing args owned by CALLEE // Owned by +--------+ // CALLEE | new out| 6 Empty on Intel, window on Sparc // | new |preserve| Must be even-aligned. // | SP-+--------+----> Matcher::_new_SP, even aligned // | | | // // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is // known from SELF's arguments and the Java calling convention. // Region 6-7 is determined per call site. // Note 2: If the calling convention leaves holes in the incoming argument // area, those holes are owned by SELF. Holes in the outgoing area // are owned by the CALLEE. Holes should not be nessecary in the // incoming area, as the Java calling convention is completely under // the control of the AD file. Doubles can be sorted and packed to // avoid holes. Holes in the outgoing arguments may be nessecary for // varargs C calling conventions. // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is // even aligned with pad0 as needed. // Region 6 is even aligned. Region 6-7 is NOT even aligned; // region 6-11 is even aligned; it may be padded out more so that // the region from SP to FP meets the minimum stack alignment. frame %{ // What direction does stack grow in (assumed to be same for C & Java) stack_direction(TOWARDS_LOW); // These three registers define part of the calling convention // between compiled code and the interpreter. inline_cache_reg(EAX); // Inline Cache Register interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] cisc_spilling_operand_name(indOffset32); // Number of stack slots consumed by locking an object sync_stack_slots(1); // Compiled code's Frame Pointer frame_pointer(ESP); // Interpreter stores its frame pointer in a register which is // stored to the stack by I2CAdaptors. // I2CAdaptors convert from interpreted java to compiled java. interpreter_frame_pointer(EBP); // Stack alignment requirement // Alignment size in bytes (128-bit -> 16 bytes) stack_alignment(StackAlignmentInBytes); // Number of stack slots between incoming argument block and the start of // a new frame. The PROLOG must add this many slots to the stack. The // EPILOG must remove this many slots. Intel needs one slot for // return address and one for rbp, (must save rbp) in_preserve_stack_slots(2+VerifyStackAtCalls); // Number of outgoing stack slots killed above the out_preserve_stack_slots // for calls to C. Supports the var-args backing area for register parms. varargs_C_out_slots_killed(0); // The after-PROLOG location of the return address. Location of // return address specifies a type (REG or STACK) and a number // representing the register number (i.e. - use a register name) or // stack slot. // Ret Addr is on stack in slot 0 if no locks or verification or alignment. // Otherwise, it is above the locks and verification slot and alignment word return_addr(STACK - 1 + round_to((Compile::current()->in_preserve_stack_slots() + Compile::current()->fixed_slots()), stack_alignment_in_slots())); // Body of function which returns an integer array locating // arguments either in registers or in stack slots. Passed an array // of ideal registers called "sig" and a "length" count. Stack-slot // offsets are based on outgoing arguments, i.e. a CALLER setting up // arguments for a CALLEE. Incoming stack arguments are // automatically biased by the preserve_stack_slots field above. calling_convention %{ // No difference between ingoing/outgoing just pass false SharedRuntime::java_calling_convention(sig_bt, regs, length, false); %} // Body of function which returns an integer array locating // arguments either in registers or in stack slots. Passed an array // of ideal registers called "sig" and a "length" count. Stack-slot // offsets are based on outgoing arguments, i.e. a CALLER setting up // arguments for a CALLEE. Incoming stack arguments are // automatically biased by the preserve_stack_slots field above. c_calling_convention %{ // This is obviously always outgoing (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); %} // Location of C & interpreter return values c_return_value %{ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; // in SSE2+ mode we want to keep the FPU stack clean so pretend // that C functions return float and double results in XMM0. if( ideal_reg == Op_RegD && UseSSE>=2 ) return OptoRegPair(XMM0b_num,XMM0_num); if( ideal_reg == Op_RegF && UseSSE>=2 ) return OptoRegPair(OptoReg::Bad,XMM0_num); return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); %} // Location of return values return_value %{ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; if( ideal_reg == Op_RegD && UseSSE>=2 ) return OptoRegPair(XMM0b_num,XMM0_num); if( ideal_reg == Op_RegF && UseSSE>=1 ) return OptoRegPair(OptoReg::Bad,XMM0_num); return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); %} %} //----------ATTRIBUTES--------------------------------------------------------- //----------Operand Attributes------------------------------------------------- op_attrib op_cost(0); // Required cost attribute //----------Instruction Attributes--------------------------------------------- ins_attrib ins_cost(100); // Required cost attribute ins_attrib ins_size(8); // Required size attribute (in bits) ins_attrib ins_short_branch(0); // Required flag: is this instruction a // non-matching short branch variant of some // long branch? ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) // specifies the alignment that some part of the instruction (not // necessarily the start) requires. If > 1, a compute_padding() // function must be provided for the instruction //----------OPERANDS----------------------------------------------------------- // Operand definitions must precede instruction definitions for correct parsing // in the ADLC because operands constitute user defined types which are used in // instruction definitions. //----------Simple Operands---------------------------------------------------- // Immediate Operands // Integer Immediate operand immI() %{ match(ConI); op_cost(10); format %{ %} interface(CONST_INTER); %} // Constant for test vs zero operand immI0() %{ predicate(n->get_int() == 0); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} // Constant for increment operand immI1() %{ predicate(n->get_int() == 1); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} // Constant for decrement operand immI_M1() %{ predicate(n->get_int() == -1); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} // Valid scale values for addressing modes operand immI2() %{ predicate(0 <= n->get_int() && (n->get_int() <= 3)); match(ConI); format %{ %} interface(CONST_INTER); %} operand immI8() %{ predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); match(ConI); op_cost(5); format %{ %} interface(CONST_INTER); %} operand immI16() %{ predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); match(ConI); op_cost(10); format %{ %} interface(CONST_INTER); %} // Int Immediate non-negative operand immU31() %{ predicate(n->get_int() >= 0); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} // Constant for long shifts operand immI_32() %{ predicate( n->get_int() == 32 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} operand immI_1_31() %{ predicate( n->get_int() >= 1 && n->get_int() <= 31 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} operand immI_32_63() %{ predicate( n->get_int() >= 32 && n->get_int() <= 63 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} operand immI_1() %{ predicate( n->get_int() == 1 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} operand immI_2() %{ predicate( n->get_int() == 2 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} operand immI_3() %{ predicate( n->get_int() == 3 ); match(ConI); op_cost(0); format %{ %} interface(CONST_INTER); %} // Pointer Immediate operand immP() %{ match(ConP); op_cost(10); format %{ %} interface(CONST_INTER); %} // NULL Pointer Immediate operand immP0() %{ predicate( n->get_ptr() == 0 ); match(ConP); op_cost(0); format %{ %} interface(CONST_INTER); %} // Long Immediate operand immL() %{ match(ConL); op_cost(20); format %{ %} interface(CONST_INTER); %} // Long Immediate zero operand immL0() %{ predicate( n->get_long() == 0L ); match(ConL); op_cost(0); format %{ %} interface(CONST_INTER); %} // Long Immediate zero operand immL_M1() %{ predicate( n->get_long() == -1L ); match(ConL); op_cost(0); format %{ %} interface(CONST_INTER); %} // Long immediate from 0 to 127. // Used for a shorter form of long mul by 10. operand immL_127() %{ predicate((0 <= n->get_long()) && (n->get_long() <= 127)); match(ConL); op_cost(0); format %{ %} interface(CONST_INTER); %} // Long Immediate: low 32-bit mask operand immL_32bits() %{ predicate(n->get_long() == 0xFFFFFFFFL); match(ConL); op_cost(0); format %{ %} interface(CONST_INTER); %} // Long Immediate: low 32-bit mask operand immL32() %{ predicate(n->get_long() == (int)(n->get_long())); match(ConL); op_cost(20); format %{ %} interface(CONST_INTER); %} //Double Immediate zero operand immDPR0() %{ // Do additional (and counter-intuitive) test against NaN to work around VC++ // bug that generates code such that NaNs compare equal to 0.0 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); match(ConD); op_cost(5); format %{ %} interface(CONST_INTER); %} // Double Immediate one operand immDPR1() %{ predicate( UseSSE<=1 && n->getd() == 1.0 ); match(ConD); op_cost(5); format %{ %} interface(CONST_INTER); %} // Double Immediate operand immDPR() %{ predicate(UseSSE<=1); match(ConD); op_cost(5); format %{ %} interface(CONST_INTER); %} operand immD() %{ predicate(UseSSE>=2); match(ConD); op_cost(5); format %{ %} interface(CONST_INTER); %} // Double Immediate zero operand immD0() %{ // Do additional (and counter-intuitive) test against NaN to work around VC++ // bug that generates code such that NaNs compare equal to 0.0 AND do not // compare equal to -0.0. predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); match(ConD); format %{ %} interface(CONST_INTER); %} // Float Immediate zero operand immFPR0() %{ predicate(UseSSE == 0 && n->getf() == 0.0F); match(ConF); op_cost(5); format %{ %} interface(CONST_INTER); %} // Float Immediate one operand immFPR1() %{ predicate(UseSSE == 0 && n->getf() == 1.0F); match(ConF); op_cost(5); format %{ %} interface(CONST_INTER); %} // Float Immediate operand immFPR() %{ predicate( UseSSE == 0 ); match(ConF); op_cost(5); format %{ %} interface(CONST_INTER); %} // Float Immediate operand immF() %{ predicate(UseSSE >= 1); match(ConF); op_cost(5); format %{ %} interface(CONST_INTER); %} // Float Immediate zero. Zero and not -0.0 operand immF0() %{ predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); match(ConF); op_cost(5); format %{ %} interface(CONST_INTER); %} // Immediates for special shifts (sign extend) // Constants for increment operand immI_16() %{ predicate( n->get_int() == 16 ); match(ConI); format %{ %} interface(CONST_INTER); %} operand immI_24() %{ predicate( n->get_int() == 24 ); match(ConI); format %{ %} interface(CONST_INTER); %} // Constant for byte-wide masking operand immI_255() %{ predicate( n->get_int() == 255 ); match(ConI); format %{ %} interface(CONST_INTER); %} // Constant for short-wide masking operand immI_65535() %{ predicate(n->get_int() == 65535); match(ConI); format %{ %} interface(CONST_INTER); %} // Register Operands // Integer Register operand rRegI() %{ constraint(ALLOC_IN_RC(int_reg)); match(RegI); match(xRegI); match(eAXRegI); match(eBXRegI); match(eCXRegI); match(eDXRegI); match(eDIRegI); match(eSIRegI); format %{ %} interface(REG_INTER); %} // Subset of Integer Register operand xRegI(rRegI reg) %{ constraint(ALLOC_IN_RC(int_x_reg)); match(reg); match(eAXRegI); match(eBXRegI); match(eCXRegI); match(eDXRegI); format %{ %} interface(REG_INTER); %} // Special Registers operand eAXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(eax_reg)); match(reg); match(rRegI); format %{ "EAX" %} interface(REG_INTER); %} // Special Registers operand eBXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(ebx_reg)); match(reg); match(rRegI); format %{ "EBX" %} interface(REG_INTER); %} operand eCXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(ecx_reg)); match(reg); match(rRegI); format %{ "ECX" %} interface(REG_INTER); %} operand eDXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(edx_reg)); match(reg); match(rRegI); format %{ "EDX" %} interface(REG_INTER); %} operand eDIRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(edi_reg)); match(reg); match(rRegI); format %{ "EDI" %} interface(REG_INTER); %} operand naxRegI() %{ constraint(ALLOC_IN_RC(nax_reg)); match(RegI); match(eCXRegI); match(eDXRegI); match(eSIRegI); match(eDIRegI); format %{ %} interface(REG_INTER); %} operand nadxRegI() %{ constraint(ALLOC_IN_RC(nadx_reg)); match(RegI); match(eBXRegI); match(eCXRegI); match(eSIRegI); match(eDIRegI); format %{ %} interface(REG_INTER); %} operand ncxRegI() %{ constraint(ALLOC_IN_RC(ncx_reg)); match(RegI); match(eAXRegI); match(eDXRegI); match(eSIRegI); match(eDIRegI); format %{ %} interface(REG_INTER); %} // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg // // operand eSIRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(esi_reg)); match(reg); match(rRegI); format %{ "ESI" %} interface(REG_INTER); %} // Pointer Register operand anyRegP() %{ constraint(ALLOC_IN_RC(any_reg)); match(RegP); match(eAXRegP); match(eBXRegP); match(eCXRegP); match(eDIRegP); match(eRegP); format %{ %} interface(REG_INTER); %} operand eRegP() %{ constraint(ALLOC_IN_RC(int_reg)); match(RegP); match(eAXRegP); match(eBXRegP); match(eCXRegP); match(eDIRegP); format %{ %} interface(REG_INTER); %} // On windows95, EBP is not safe to use for implicit null tests. operand eRegP_no_EBP() %{ constraint(ALLOC_IN_RC(int_reg_no_ebp)); match(RegP); match(eAXRegP); match(eBXRegP); match(eCXRegP); match(eDIRegP); op_cost(100); format %{ %} interface(REG_INTER); %} operand naxRegP() %{ constraint(ALLOC_IN_RC(nax_reg)); match(RegP); match(eBXRegP); match(eDXRegP); match(eCXRegP); match(eSIRegP); match(eDIRegP); format %{ %} interface(REG_INTER); %} operand nabxRegP() %{ constraint(ALLOC_IN_RC(nabx_reg)); match(RegP); match(eCXRegP); match(eDXRegP); match(eSIRegP); match(eDIRegP); format %{ %} interface(REG_INTER); %} operand pRegP() %{ constraint(ALLOC_IN_RC(p_reg)); match(RegP); match(eBXRegP); match(eDXRegP); match(eSIRegP); match(eDIRegP); format %{ %} interface(REG_INTER); %} // Special Registers // Return a pointer value operand eAXRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(eax_reg)); match(reg); format %{ "EAX" %} interface(REG_INTER); %} // Used in AtomicAdd operand eBXRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(ebx_reg)); match(reg); format %{ "EBX" %} interface(REG_INTER); %} // Tail-call (interprocedural jump) to interpreter operand eCXRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(ecx_reg)); match(reg); format %{ "ECX" %} interface(REG_INTER); %} operand eSIRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(esi_reg)); match(reg); format %{ "ESI" %} interface(REG_INTER); %} // Used in rep stosw operand eDIRegP(eRegP reg) %{ constraint(ALLOC_IN_RC(edi_reg)); match(reg); format %{ "EDI" %} interface(REG_INTER); %} operand eRegL() %{ constraint(ALLOC_IN_RC(long_reg)); match(RegL); match(eADXRegL); format %{ %} interface(REG_INTER); %} operand eADXRegL( eRegL reg ) %{ constraint(ALLOC_IN_RC(eadx_reg)); match(reg); format %{ "EDX:EAX" %} interface(REG_INTER); %} operand eBCXRegL( eRegL reg ) %{ constraint(ALLOC_IN_RC(ebcx_reg)); match(reg); format %{ "EBX:ECX" %} interface(REG_INTER); %} // Special case for integer high multiply operand eADXRegL_low_only() %{ constraint(ALLOC_IN_RC(eadx_reg)); match(RegL); format %{ "EAX" %} interface(REG_INTER); %} // Flags register, used as output of compare instructions operand eFlagsReg() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); format %{ "EFLAGS" %} interface(REG_INTER); %} // Flags register, used as output of FLOATING POINT compare instructions operand eFlagsRegU() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); format %{ "EFLAGS_U" %} interface(REG_INTER); %} operand eFlagsRegUCF() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); predicate(false); format %{ "EFLAGS_U_CF" %} interface(REG_INTER); %} // Condition Code Register used by long compare operand flagsReg_long_LTGE() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); format %{ "FLAGS_LTGE" %} interface(REG_INTER); %} operand flagsReg_long_EQNE() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); format %{ "FLAGS_EQNE" %} interface(REG_INTER); %} operand flagsReg_long_LEGT() %{ constraint(ALLOC_IN_RC(int_flags)); match(RegFlags); format %{ "FLAGS_LEGT" %} interface(REG_INTER); %} // Float register operands operand regDPR() %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_dbl_reg)); match(RegD); match(regDPR1); match(regDPR2); format %{ %} interface(REG_INTER); %} operand regDPR1(regDPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_dbl_reg0)); match(reg); format %{ "FPR1" %} interface(REG_INTER); %} operand regDPR2(regDPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_dbl_reg1)); match(reg); format %{ "FPR2" %} interface(REG_INTER); %} operand regnotDPR1(regDPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_dbl_notreg0)); match(reg); format %{ %} interface(REG_INTER); %} // Float register operands operand regFPR() %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_flt_reg)); match(RegF); match(regFPR1); format %{ %} interface(REG_INTER); %} // Float register operands operand regFPR1(regFPR reg) %{ predicate( UseSSE < 2 ); constraint(ALLOC_IN_RC(fp_flt_reg0)); match(reg); format %{ "FPR1" %} interface(REG_INTER); %} // XMM Float register operands operand regF() %{ predicate( UseSSE>=1 ); constraint(ALLOC_IN_RC(float_reg_legacy)); match(RegF); format %{ %} interface(REG_INTER); %} // XMM Double register operands operand regD() %{ predicate( UseSSE>=2 ); constraint(ALLOC_IN_RC(double_reg_legacy)); match(RegD); format %{ %} interface(REG_INTER); %} // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) // runtime code generation via reg_class_dynamic. operand vecS() %{ constraint(ALLOC_IN_RC(vectors_reg_legacy)); match(VecS); format %{ %} interface(REG_INTER); %} operand vecD() %{ constraint(ALLOC_IN_RC(vectord_reg_legacy)); match(VecD); format %{ %} interface(REG_INTER); %} operand vecX() %{ constraint(ALLOC_IN_RC(vectorx_reg_legacy)); match(VecX); format %{ %} interface(REG_INTER); %} operand vecY() %{ constraint(ALLOC_IN_RC(vectory_reg_legacy)); match(VecY); format %{ %} interface(REG_INTER); %} //----------Memory Operands---------------------------------------------------- // Direct Memory Operand operand direct(immP addr) %{ match(addr); format %{ "[$addr]" %} interface(MEMORY_INTER) %{ base(0xFFFFFFFF); index(0x4); scale(0x0); disp($addr); %} %} // Indirect Memory Operand operand indirect(eRegP reg) %{ constraint(ALLOC_IN_RC(int_reg)); match(reg); format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp(0x0); %} %} // Indirect Memory Plus Short Offset Operand operand indOffset8(eRegP reg, immI8 off) %{ match(AddP reg off); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %} %} // Indirect Memory Plus Long Offset Operand operand indOffset32(eRegP reg, immI off) %{ match(AddP reg off); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %} %} // Indirect Memory Plus Long Offset Operand operand indOffset32X(rRegI reg, immP off) %{ match(AddP off reg); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %} %} // Indirect Memory Plus Index Register Plus Offset Operand operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ match(AddP (AddP reg ireg) off); op_cost(10); format %{"[$reg + $off + $ireg]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale(0x0); disp($off); %} %} // Indirect Memory Plus Index Register Plus Offset Operand operand indIndex(eRegP reg, rRegI ireg) %{ match(AddP reg ireg); op_cost(10); format %{"[$reg + $ireg]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale(0x0); disp(0x0); %} %} // // ------------------------------------------------------------------------- // // 486 architecture doesn't support "scale * index + offset" with out a base // // ------------------------------------------------------------------------- // // Scaled Memory Operands // // Indirect Memory Times Scale Plus Offset Operand // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ // match(AddP off (LShiftI ireg scale)); // // op_cost(10); // format %{"[$off + $ireg << $scale]" %} // interface(MEMORY_INTER) %{ // base(0x4); // index($ireg); // scale($scale); // disp($off); // %} // %} // Indirect Memory Times Scale Plus Index Register operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ match(AddP reg (LShiftI ireg scale)); op_cost(10); format %{"[$reg + $ireg << $scale]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale($scale); disp(0x0); %} %} // Indirect Memory Times Scale Plus Index Register Plus Offset Operand operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ match(AddP (AddP reg (LShiftI ireg scale)) off); op_cost(10); format %{"[$reg + $off + $ireg << $scale]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale($scale); disp($off); %} %} //----------Load Long Memory Operands------------------------------------------ // The load-long idiom will use it's address expression again after loading // the first word of the long. If the load-long destination overlaps with // registers used in the addressing expression, the 2nd half will be loaded // from a clobbered address. Fix this by requiring that load-long use // address registers that do not overlap with the load-long target. // load-long support operand load_long_RegP() %{ constraint(ALLOC_IN_RC(esi_reg)); match(RegP); match(eSIRegP); op_cost(100); format %{ %} interface(REG_INTER); %} // Indirect Memory Operand Long operand load_long_indirect(load_long_RegP reg) %{ constraint(ALLOC_IN_RC(esi_reg)); match(reg); format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp(0x0); %} %} // Indirect Memory Plus Long Offset Operand operand load_long_indOffset32(load_long_RegP reg, immI off) %{ match(AddP reg off); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %} %} opclass load_long_memory(load_long_indirect, load_long_indOffset32); //----------Special Memory Operands-------------------------------------------- // Stack Slot Operand - This operand is used for loading and storing temporary // values on the stack where a match requires a value to // flow through memory. operand stackSlotP(sRegP reg) %{ constraint(ALLOC_IN_RC(stack_slots)); // No match rule because this operand is only generated in matching format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base(0x4); // ESP index(0x4); // No Index scale(0x0); // No Scale disp($reg); // Stack Offset %} %} operand stackSlotI(sRegI reg) %{ constraint(ALLOC_IN_RC(stack_slots)); // No match rule because this operand is only generated in matching format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base(0x4); // ESP index(0x4); // No Index scale(0x0); // No Scale disp($reg); // Stack Offset %} %} operand stackSlotF(sRegF reg) %{ constraint(ALLOC_IN_RC(stack_slots)); // No match rule because this operand is only generated in matching format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base(0x4); // ESP index(0x4); // No Index scale(0x0); // No Scale disp($reg); // Stack Offset %} %} operand stackSlotD(sRegD reg) %{ constraint(ALLOC_IN_RC(stack_slots)); // No match rule because this operand is only generated in matching format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base(0x4); // ESP index(0x4); // No Index scale(0x0); // No Scale disp($reg); // Stack Offset %} %} operand stackSlotL(sRegL reg) %{ constraint(ALLOC_IN_RC(stack_slots)); // No match rule because this operand is only generated in matching format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base(0x4); // ESP index(0x4); // No Index scale(0x0); // No Scale disp($reg); // Stack Offset %} %} //----------Memory Operands - Win95 Implicit Null Variants---------------- // Indirect Memory Operand operand indirect_win95_safe(eRegP_no_EBP reg) %{ constraint(ALLOC_IN_RC(int_reg)); match(reg); op_cost(100); format %{ "[$reg]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp(0x0); %} %} // Indirect Memory Plus Short Offset Operand operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) %{ match(AddP reg off); op_cost(100); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %} %} // Indirect Memory Plus Long Offset Operand operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) %{ match(AddP reg off); op_cost(100); format %{ "[$reg + $off]" %} interface(MEMORY_INTER) %{ base($reg); index(0x4); scale(0x0); disp($off); %} %} // Indirect Memory Plus Index Register Plus Offset Operand operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) %{ match(AddP (AddP reg ireg) off); op_cost(100); format %{"[$reg + $off + $ireg]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale(0x0); disp($off); %} %} // Indirect Memory Times Scale Plus Index Register operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) %{ match(AddP reg (LShiftI ireg scale)); op_cost(100); format %{"[$reg + $ireg << $scale]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale($scale); disp(0x0); %} %} // Indirect Memory Times Scale Plus Index Register Plus Offset Operand operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) %{ match(AddP (AddP reg (LShiftI ireg scale)) off); op_cost(100); format %{"[$reg + $off + $ireg << $scale]" %} interface(MEMORY_INTER) %{ base($reg); index($ireg); scale($scale); disp($off); %} %} //----------Conditional Branch Operands---------------------------------------- // Comparison Op - This is the operation of the comparison, and is limited to // the following set of codes: // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) // // Other attributes of the comparison, such as unsignedness, are specified // by the comparison instruction that sets a condition code flags register. // That result is represented by a flags operand whose subtype is appropriate // to the unsignedness (etc.) of the comparison. // // Later, the instruction which matches both the Comparison Op (a Bool) and // the flags (produced by the Cmp) specifies the coding of the comparison op // by matching a specific subtype of Bool operand below, such as cmpOpU. // Comparision Code operand cmpOp() %{ match(Bool); format %{ "" %} interface(COND_INTER) %{ equal(0x4, "e"); not_equal(0x5, "ne"); less(0xC, "l"); greater_equal(0xD, "ge"); less_equal(0xE, "le"); greater(0xF, "g"); overflow(0x0, "o"); no_overflow(0x1, "no"); %} %} // Comparison Code, unsigned compare. Used by FP also, with // C2 (unordered) turned into GT or LT already. The other bits // C0 and C3 are turned into Carry & Zero flags. operand cmpOpU() %{ match(Bool); format %{ "" %} interface(COND_INTER) %{ equal(0x4, "e"); not_equal(0x5, "ne"); less(0x2, "b"); greater_equal(0x3, "nb"); less_equal(0x6, "be"); greater(0x7, "nbe"); overflow(0x0, "o"); no_overflow(0x1, "no"); %} %} // Floating comparisons that don't require any fixup for the unordered case operand cmpOpUCF() %{ match(Bool); predicate(n->as_Bool()->_test._test == BoolTest::lt || n->as_Bool()->_test._test == BoolTest::ge || n->as_Bool()->_test._test == BoolTest::le || n->as_Bool()->_test._test == BoolTest::gt); format %{ "" %} interface(COND_INTER) %{ equal(0x4, "e"); not_equal(0x5, "ne"); less(0x2, "b"); greater_equal(0x3, "nb"); less_equal(0x6, "be"); greater(0x7, "nbe"); overflow(0x0, "o"); no_overflow(0x1, "no"); %} %} // Floating comparisons that can be fixed up with extra conditional jumps operand cmpOpUCF2() %{ match(Bool); predicate(n->as_Bool()->_test._test == BoolTest::ne || n->as_Bool()->_test._test == BoolTest::eq); format %{ "" %} interface(COND_INTER) %{ equal(0x4, "e"); not_equal(0x5, "ne"); less(0x2, "b"); greater_equal(0x3, "nb"); less_equal(0x6, "be"); greater(0x7, "nbe"); overflow(0x0, "o"); no_overflow(0x1, "no"); %} %} // Comparison Code for FP conditional move operand cmpOp_fcmov() %{ match(Bool); predicate(n->as_Bool()->_test._test != BoolTest::overflow && n->as_Bool()->_test._test != BoolTest::no_overflow); format %{ "" %} interface(COND_INTER) %{ equal (0x0C8); not_equal (0x1C8); less (0x0C0); greater_equal(0x1C0); less_equal (0x0D0); greater (0x1D0); overflow(0x0, "o"); // not really supported by the instruction no_overflow(0x1, "no"); // not really supported by the instruction %} %} // Comparision Code used in long compares operand cmpOp_commute() %{ match(Bool); format %{ "" %} interface(COND_INTER) %{ equal(0x4, "e"); not_equal(0x5, "ne"); less(0xF, "g"); greater_equal(0xE, "le"); less_equal(0xD, "ge"); greater(0xC, "l"); overflow(0x0, "o"); no_overflow(0x1, "no"); %} %} //----------OPERAND CLASSES---------------------------------------------------- // Operand Classes are groups of operands that are used as to simplify // instruction definitions by not requiring the AD writer to specify separate // instructions for every form of operand when the instruction accepts // multiple operand types with the same basic encoding and format. The classic // case of this is memory operands. opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, indIndex, indIndexScale, indIndexScaleOffset); // Long memory operations are encoded in 2 instructions and a +4 offset. // This means some kind of offset is always required and you cannot use // an oop as the offset (done when working on static globals). opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, indIndex, indIndexScale, indIndexScaleOffset); //----------PIPELINE----------------------------------------------------------- // Rules which define the behavior of the target architectures pipeline. pipeline %{ //----------ATTRIBUTES--------------------------------------------------------- attributes %{ variable_size_instructions; // Fixed size instructions max_instructions_per_bundle = 3; // Up to 3 instructions per bundle instruction_unit_size = 1; // An instruction is 1 bytes long instruction_fetch_unit_size = 16; // The processor fetches one line instruction_fetch_units = 1; // of 16 bytes // List of nop instructions nops( MachNop ); %} //----------RESOURCES---------------------------------------------------------- // Resources are the functional units available to the machine // Generic P2/P3 pipeline // 3 decoders, only D0 handles big operands; a "bundle" is the limit of // 3 instructions decoded per cycle. // 2 load/store ops per cycle, 1 branch, 1 FPU, // 2 ALU op, only ALU0 handles mul/div instructions. resources( D0, D1, D2, DECODE = D0 | D1 | D2, MS0, MS1, MEM = MS0 | MS1, BR, FPU, ALU0, ALU1, ALU = ALU0 | ALU1 ); //----------PIPELINE DESCRIPTION----------------------------------------------- // Pipeline Description specifies the stages in the machine's pipeline // Generic P2/P3 pipeline pipe_desc(S0, S1, S2, S3, S4, S5); //----------PIPELINE CLASSES--------------------------------------------------- // Pipeline Classes describe the stages in which input and output are // referenced by the hardware pipeline. // Naming convention: ialu or fpu // Then: _reg // Then: _reg if there is a 2nd register // Then: _long if it's a pair of instructions implementing a long // Then: _fat if it requires the big decoder // Or: _mem if it requires the big decoder and a memory unit. // Integer ALU reg operation pipe_class ialu_reg(rRegI dst) %{ single_instruction; dst : S4(write); dst : S3(read); DECODE : S0; // any decoder ALU : S3; // any alu %} // Long ALU reg operation pipe_class ialu_reg_long(eRegL dst) %{ instruction_count(2); dst : S4(write); dst : S3(read); DECODE : S0(2); // any 2 decoders ALU : S3(2); // both alus %} // Integer ALU reg operation using big decoder pipe_class ialu_reg_fat(rRegI dst) %{ single_instruction; dst : S4(write); dst : S3(read); D0 : S0; // big decoder only ALU : S3; // any alu %} // Long ALU reg operation using big decoder pipe_class ialu_reg_long_fat(eRegL dst) %{ instruction_count(2); dst : S4(write); dst : S3(read); D0 : S0(2); // big decoder only; twice ALU : S3(2); // any 2 alus %} // Integer ALU reg-reg operation pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ single_instruction; dst : S4(write); src : S3(read); DECODE : S0; // any decoder ALU : S3; // any alu %} // Long ALU reg-reg operation pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ instruction_count(2); dst : S4(write); src : S3(read); DECODE : S0(2); // any 2 decoders ALU : S3(2); // both alus %} // Integer ALU reg-reg operation pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ single_instruction; dst : S4(write); src : S3(read); D0 : S0; // big decoder only ALU : S3; // any alu %} // Long ALU reg-reg operation pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ instruction_count(2); dst : S4(write); src : S3(read); D0 : S0(2); // big decoder only; twice ALU : S3(2); // both alus %} // Integer ALU reg-mem operation pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ single_instruction; dst : S5(write); mem : S3(read); D0 : S0; // big decoder only ALU : S4; // any alu MEM : S3; // any mem %} // Long ALU reg-mem operation pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ instruction_count(2); dst : S5(write); mem : S3(read); D0 : S0(2); // big decoder only; twice ALU : S4(2); // any 2 alus MEM : S3(2); // both mems %} // Integer mem operation (prefetch) pipe_class ialu_mem(memory mem) %{ single_instruction; mem : S3(read); D0 : S0; // big decoder only MEM : S3; // any mem %} // Integer Store to Memory pipe_class ialu_mem_reg(memory mem, rRegI src) %{ single_instruction; mem : S3(read); src : S5(read); D0 : S0; // big decoder only ALU : S4; // any alu MEM : S3; %} // Long Store to Memory pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ instruction_count(2); mem : S3(read); src : S5(read); D0 : S0(2); // big decoder only; twice ALU : S4(2); // any 2 alus MEM : S3(2); // Both mems %} // Integer Store to Memory pipe_class ialu_mem_imm(memory mem) %{ single_instruction; mem : S3(read); D0 : S0; // big decoder only ALU : S4; // any alu MEM : S3; %} // Integer ALU0 reg-reg operation pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ single_instruction; dst : S4(write); src : S3(read); D0 : S0; // Big decoder only ALU0 : S3; // only alu0 %} // Integer ALU0 reg-mem operation pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ single_instruction; dst : S5(write); mem : S3(read); D0 : S0; // big decoder only ALU0 : S4; // ALU0 only MEM : S3; // any mem %} // Integer ALU reg-reg operation pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ single_instruction; cr : S4(write); src1 : S3(read); src2 : S3(read); DECODE : S0; // any decoder ALU : S3; // any alu %} // Integer ALU reg-imm operation pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ single_instruction; cr : S4(write); src1 : S3(read); DECODE : S0; // any decoder ALU : S3; // any alu %} // Integer ALU reg-mem operation pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ single_instruction; cr : S4(write); src1 : S3(read); src2 : S3(read); D0 : S0; // big decoder only ALU : S4; // any alu MEM : S3; %} // Conditional move reg-reg pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ instruction_count(4); y : S4(read); q : S3(read); p : S3(read); DECODE : S0(4); // any decoder %} // Conditional move reg-reg pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ single_instruction; dst : S4(write); src : S3(read); cr : S3(read); DECODE : S0; // any decoder %} // Conditional move reg-mem pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ single_instruction; dst : S4(write); src : S3(read); cr : S3(read); DECODE : S0; // any decoder MEM : S3; %} // Conditional move reg-reg long pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ single_instruction; dst : S4(write); src : S3(read); cr : S3(read); DECODE : S0(2); // any 2 decoders %} // Conditional move double reg-reg pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ single_instruction; dst : S4(write); src : S3(read); cr : S3(read); DECODE : S0; // any decoder %} // Float reg-reg operation pipe_class fpu_reg(regDPR dst) %{ instruction_count(2); dst : S3(read); DECODE : S0(2); // any 2 decoders FPU : S3; %} // Float reg-reg operation pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ instruction_count(2); dst : S4(write); src : S3(read); DECODE : S0(2); // any 2 decoders FPU : S3; %} // Float reg-reg operation pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ instruction_count(3); dst : S4(write); src1 : S3(read); src2 : S3(read); DECODE : S0(3); // any 3 decoders FPU : S3(2); %} // Float reg-reg operation pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ instruction_count(4); dst : S4(write); src1 : S3(read); src2 : S3(read); src3 : S3(read); DECODE : S0(4); // any 3 decoders FPU : S3(2); %} // Float reg-reg operation pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ instruction_count(4); dst : S4(write); src1 : S3(read); src2 : S3(read); src3 : S3(read); DECODE : S1(3); // any 3 decoders D0 : S0; // Big decoder only FPU : S3(2); MEM : S3; %} // Float reg-mem operation pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ instruction_count(2); dst : S5(write); mem : S3(read); D0 : S0; // big decoder only DECODE : S1; // any decoder for FPU POP FPU : S4; MEM : S3; // any mem %} // Float reg-mem operation pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ instruction_count(3); dst : S5(write); src1 : S3(read); mem : S3(read); D0 : S0; // big decoder only DECODE : S1(2); // any decoder for FPU POP FPU : S4; MEM : S3; // any mem %} // Float mem-reg operation pipe_class fpu_mem_reg(memory mem, regDPR src) %{ instruction_count(2); src : S5(read); mem : S3(read); DECODE : S0; // any decoder for FPU PUSH D0 : S1; // big decoder only FPU : S4; MEM : S3; // any mem %} pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ instruction_count(3); src1 : S3(read); src2 : S3(read); mem : S3(read); DECODE : S0(2); // any decoder for FPU PUSH D0 : S1; // big decoder only FPU : S4; MEM : S3; // any mem %} pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ instruction_count(3); src1 : S3(read); src2 : S3(read); mem : S4(read); DECODE : S0; // any decoder for FPU PUSH D0 : S0(2); // big decoder only FPU : S4; MEM : S3(2); // any mem %} pipe_class fpu_mem_mem(memory dst, memory src1) %{ instruction_count(2); src1 : S3(read); dst : S4(read); D0 : S0(2); // big decoder only MEM : S3(2); // any mem %} pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ instruction_count(3); src1 : S3(read); src2 : S3(read); dst : S4(read); D0 : S0(3); // big decoder only FPU : S4; MEM : S3(3); // any mem %} pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ instruction_count(3); src1 : S4(read); mem : S4(read); DECODE : S0; // any decoder for FPU PUSH D0 : S0(2); // big decoder only FPU : S4; MEM : S3(2); // any mem %} // Float load constant pipe_class fpu_reg_con(regDPR dst) %{ instruction_count(2); dst : S5(write); D0 : S0; // big decoder only for the load DECODE : S1; // any decoder for FPU POP FPU : S4; MEM : S3; // any mem %} // Float load constant pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ instruction_count(3); dst : S5(write); src : S3(read); D0 : S0; // big decoder only for the load DECODE : S1(2); // any decoder for FPU POP FPU : S4; MEM : S3; // any mem %} // UnConditional branch pipe_class pipe_jmp( label labl ) %{ single_instruction; BR : S3; %} // Conditional branch pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ single_instruction; cr : S1(read); BR : S3; %} // Allocation idiom pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ instruction_count(1); force_serialization; fixed_latency(6); heap_ptr : S3(read); DECODE : S0(3); D0 : S2; MEM : S3; ALU : S3(2); dst : S5(write); BR : S5; %} // Generic big/slow expanded idiom pipe_class pipe_slow( ) %{ instruction_count(10); multiple_bundles; force_serialization; fixed_latency(100); D0 : S0(2); MEM : S3(2); %} // The real do-nothing guy pipe_class empty( ) %{ instruction_count(0); %} // Define the class for the Nop node define %{ MachNop = empty; %} %} //----------INSTRUCTIONS------------------------------------------------------- // // match -- States which machine-independent subtree may be replaced // by this instruction. // ins_cost -- The estimated cost of this instruction is used by instruction // selection to identify a minimum cost tree of machine // instructions that matches a tree of machine-independent // instructions. // format -- A string providing the disassembly for this instruction. // The value of an instruction's operand may be inserted // by referring to it with a '$' prefix. // opcode -- Three instruction opcodes may be provided. These are referred // to within an encode class as $primary, $secondary, and $tertiary // respectively. The primary opcode is commonly used to // indicate the type of machine instruction, while secondary // and tertiary are often used for prefix options or addressing // modes. // ins_encode -- A list of encode classes with parameters. The encode class // name must have been defined in an 'enc_class' specification // in the encode section of the architecture description. //----------BSWAP-Instruction-------------------------------------------------- instruct bytes_reverse_int(rRegI dst) %{ match(Set dst (ReverseBytesI dst)); format %{ "BSWAP $dst" %} opcode(0x0F, 0xC8); ins_encode( OpcP, OpcSReg(dst) ); ins_pipe( ialu_reg ); %} instruct bytes_reverse_long(eRegL dst) %{ match(Set dst (ReverseBytesL dst)); format %{ "BSWAP $dst.lo\n\t" "BSWAP $dst.hi\n\t" "XCHG $dst.lo $dst.hi" %} ins_cost(125); ins_encode( bswap_long_bytes(dst) ); ins_pipe( ialu_reg_reg); %} instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ match(Set dst (ReverseBytesUS dst)); effect(KILL cr); format %{ "BSWAP $dst\n\t" "SHR $dst,16\n\t" %} ins_encode %{ __ bswapl($dst$$Register); __ shrl($dst$$Register, 16); %} ins_pipe( ialu_reg ); %} instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ match(Set dst (ReverseBytesS dst)); effect(KILL cr); format %{ "BSWAP $dst\n\t" "SAR $dst,16\n\t" %} ins_encode %{ __ bswapl($dst$$Register); __ sarl($dst$$Register, 16); %} ins_pipe( ialu_reg ); %} //---------- Zeros Count Instructions ------------------------------------------ instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosI src)); effect(KILL cr); format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} ins_encode %{ __ lzcntl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg); %} instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(!UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosI src)); effect(KILL cr); format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" "JNZ skip\n\t" "MOV $dst, -1\n" "skip:\n\t" "NEG $dst\n\t" "ADD $dst, 31" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; Label skip; __ bsrl(Rdst, Rsrc); __ jccb(Assembler::notZero, skip); __ movl(Rdst, -1); __ bind(skip); __ negl(Rdst); __ addl(Rdst, BitsPerInt - 1); %} ins_pipe(ialu_reg); %} instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ predicate(UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosL src)); effect(TEMP dst, KILL cr); format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" "JNC done\n\t" "LZCNT $dst, $src.lo\n\t" "ADD $dst, 32\n" "done:" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; Label done; __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); __ jccb(Assembler::carryClear, done); __ lzcntl(Rdst, Rsrc); __ addl(Rdst, BitsPerInt); __ bind(done); %} ins_pipe(ialu_reg); %} instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ predicate(!UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosL src)); effect(TEMP dst, KILL cr); format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" "JZ msw_is_zero\n\t" "ADD $dst, 32\n\t" "JMP not_zero\n" "msw_is_zero:\n\t" "BSR $dst, $src.lo\n\t" "JNZ not_zero\n\t" "MOV $dst, -1\n" "not_zero:\n\t" "NEG $dst\n\t" "ADD $dst, 63\n" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; Label msw_is_zero; Label not_zero; __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); __ jccb(Assembler::zero, msw_is_zero); __ addl(Rdst, BitsPerInt); __ jmpb(not_zero); __ bind(msw_is_zero); __ bsrl(Rdst, Rsrc); __ jccb(Assembler::notZero, not_zero); __ movl(Rdst, -1); __ bind(not_zero); __ negl(Rdst); __ addl(Rdst, BitsPerLong - 1); %} ins_pipe(ialu_reg); %} instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosI src)); effect(KILL cr); format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} ins_encode %{ __ tzcntl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg); %} instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(!UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosI src)); effect(KILL cr); format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" "JNZ done\n\t" "MOV $dst, 32\n" "done:" %} ins_encode %{ Register Rdst = $dst$$Register; Label done; __ bsfl(Rdst, $src$$Register); __ jccb(Assembler::notZero, done); __ movl(Rdst, BitsPerInt); __ bind(done); %} ins_pipe(ialu_reg); %} instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ predicate(UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosL src)); effect(TEMP dst, KILL cr); format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" "JNC done\n\t" "TZCNT $dst, $src.hi\n\t" "ADD $dst, 32\n" "done:" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; Label done; __ tzcntl(Rdst, Rsrc); __ jccb(Assembler::carryClear, done); __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); __ addl(Rdst, BitsPerInt); __ bind(done); %} ins_pipe(ialu_reg); %} instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ predicate(!UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosL src)); effect(TEMP dst, KILL cr); format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" "JNZ done\n\t" "BSF $dst, $src.hi\n\t" "JNZ msw_not_zero\n\t" "MOV $dst, 32\n" "msw_not_zero:\n\t" "ADD $dst, 32\n" "done:" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; Label msw_not_zero; Label done; __ bsfl(Rdst, Rsrc); __ jccb(Assembler::notZero, done); __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); __ jccb(Assembler::notZero, msw_not_zero); __ movl(Rdst, BitsPerInt); __ bind(msw_not_zero); __ addl(Rdst, BitsPerInt); __ bind(done); %} ins_pipe(ialu_reg); %} //---------- Population Count Instructions ------------------------------------- instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI src)); effect(KILL cr); format %{ "POPCNT $dst, $src" %} ins_encode %{ __ popcntl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg); %} instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI (LoadI mem))); effect(KILL cr); format %{ "POPCNT $dst, $mem" %} ins_encode %{ __ popcntl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg); %} // Note: Long.bitCount(long) returns an int. instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountL src)); effect(KILL cr, TEMP tmp, TEMP dst); format %{ "POPCNT $dst, $src.lo\n\t" "POPCNT $tmp, $src.hi\n\t" "ADD $dst, $tmp" %} ins_encode %{ __ popcntl($dst$$Register, $src$$Register); __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); __ addl($dst$$Register, $tmp$$Register); %} ins_pipe(ialu_reg); %} // Note: Long.bitCount(long) returns an int. instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountL (LoadL mem))); effect(KILL cr, TEMP tmp, TEMP dst); format %{ "POPCNT $dst, $mem\n\t" "POPCNT $tmp, $mem+4\n\t" "ADD $dst, $tmp" %} ins_encode %{ //__ popcntl($dst$$Register, $mem$$Address$$first); //__ popcntl($tmp$$Register, $mem$$Address$$second); __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); __ addl($dst$$Register, $tmp$$Register); %} ins_pipe(ialu_reg); %} //----------Load/Store/Move Instructions--------------------------------------- //----------Load Instructions-------------------------------------------------- // Load Byte (8bit signed) instruct loadB(xRegI dst, memory mem) %{ match(Set dst (LoadB mem)); ins_cost(125); format %{ "MOVSX8 $dst,$mem\t# byte" %} ins_encode %{ __ movsbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Byte (8bit signed) into Long Register instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ match(Set dst (ConvI2L (LoadB mem))); effect(KILL cr); ins_cost(375); format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" "MOV $dst.hi,$dst.lo\n\t" "SAR $dst.hi,7" %} ins_encode %{ __ movsbl($dst$$Register, $mem$$Address); __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. %} ins_pipe(ialu_reg_mem); %} // Load Unsigned Byte (8bit UNsigned) instruct loadUB(xRegI dst, memory mem) %{ match(Set dst (LoadUB mem)); ins_cost(125); format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} ins_encode %{ __ movzbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Unsigned Byte (8 bit UNsigned) into Long Register instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ match(Set dst (ConvI2L (LoadUB mem))); effect(KILL cr); ins_cost(250); format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzbl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); %} ins_pipe(ialu_reg_mem); %} // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); effect(KILL cr); format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" "XOR $dst.hi,$dst.hi\n\t" "AND $dst.lo,right_n_bits($mask, 8)" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzbl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); __ andl(Rdst, $mask$$constant & right_n_bits(8)); %} ins_pipe(ialu_reg_mem); %} // Load Short (16bit signed) instruct loadS(rRegI dst, memory mem) %{ match(Set dst (LoadS mem)); ins_cost(125); format %{ "MOVSX $dst,$mem\t# short" %} ins_encode %{ __ movswl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Short (16 bit signed) to Byte (8 bit signed) instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ins_cost(125); format %{ "MOVSX $dst, $mem\t# short -> byte" %} ins_encode %{ __ movsbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Short (16bit signed) into Long Register instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ match(Set dst (ConvI2L (LoadS mem))); effect(KILL cr); ins_cost(375); format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" "MOV $dst.hi,$dst.lo\n\t" "SAR $dst.hi,15" %} ins_encode %{ __ movswl($dst$$Register, $mem$$Address); __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. %} ins_pipe(ialu_reg_mem); %} // Load Unsigned Short/Char (16bit unsigned) instruct loadUS(rRegI dst, memory mem) %{ match(Set dst (LoadUS mem)); ins_cost(125); format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} ins_encode %{ __ movzwl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); ins_cost(125); format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} ins_encode %{ __ movsbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Unsigned Short/Char (16 bit UNsigned) into Long Register instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ match(Set dst (ConvI2L (LoadUS mem))); effect(KILL cr); ins_cost(250); format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ __ movzwl($dst$$Register, $mem$$Address); __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); %} ins_pipe(ialu_reg_mem); %} // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); effect(KILL cr); format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzbl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); %} ins_pipe(ialu_reg_mem); %} // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); effect(KILL cr); format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" "XOR $dst.hi,$dst.hi\n\t" "AND $dst.lo,right_n_bits($mask, 16)" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzwl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); __ andl(Rdst, $mask$$constant & right_n_bits(16)); %} ins_pipe(ialu_reg_mem); %} // Load Integer instruct loadI(rRegI dst, memory mem) %{ match(Set dst (LoadI mem)); ins_cost(125); format %{ "MOV $dst,$mem\t# int" %} ins_encode %{ __ movl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Integer (32 bit signed) to Byte (8 bit signed) instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ins_cost(125); format %{ "MOVSX $dst, $mem\t# int -> byte" %} ins_encode %{ __ movsbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ match(Set dst (AndI (LoadI mem) mask)); ins_cost(125); format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} ins_encode %{ __ movzbl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Integer (32 bit signed) to Short (16 bit signed) instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ins_cost(125); format %{ "MOVSX $dst, $mem\t# int -> short" %} ins_encode %{ __ movswl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ match(Set dst (AndI (LoadI mem) mask)); ins_cost(125); format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} ins_encode %{ __ movzwl($dst$$Register, $mem$$Address); %} ins_pipe(ialu_reg_mem); %} // Load Integer into Long Register instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ match(Set dst (ConvI2L (LoadI mem))); effect(KILL cr); ins_cost(375); format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" "MOV $dst.hi,$dst.lo\n\t" "SAR $dst.hi,31" %} ins_encode %{ __ movl($dst$$Register, $mem$$Address); __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. __ sarl(HIGH_FROM_LOW($dst$$Register), 31); %} ins_pipe(ialu_reg_mem); %} // Load Integer with mask 0xFF into Long Register instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadI mem) mask))); effect(KILL cr); format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzbl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); %} ins_pipe(ialu_reg_mem); %} // Load Integer with mask 0xFFFF into Long Register instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadI mem) mask))); effect(KILL cr); format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ Register Rdst = $dst$$Register; __ movzwl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); %} ins_pipe(ialu_reg_mem); %} // Load Integer with 31-bit mask into Long Register instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ match(Set dst (ConvI2L (AndI (LoadI mem) mask))); effect(KILL cr); format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" "XOR $dst.hi,$dst.hi\n\t" "AND $dst.lo,$mask" %} ins_encode %{ Register Rdst = $dst$$Register; __ movl(Rdst, $mem$$Address); __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); __ andl(Rdst, $mask$$constant); %} ins_pipe(ialu_reg_mem); %} // Load Unsigned Integer into Long Register instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); effect(KILL cr); ins_cost(250); format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" "XOR $dst.hi,$dst.hi" %} ins_encode %{ __ movl($dst$$Register, $mem$$Address); __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); %} ins_pipe(ialu_reg_mem); %} // Load Long. Cannot clobber address while loading, so restrict address // register to ESI instruct loadL(eRegL dst, load_long_memory mem) %{ predicate(!((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); ins_cost(250); format %{ "MOV $dst.lo,$mem\t# long\n\t" "MOV $dst.hi,$mem+4" %} ins_encode %{ Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); __ movl($dst$$Register, Amemlo); __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); %} ins_pipe(ialu_reg_long_mem); %} // Volatile Load Long. Must be atomic, so do 64-bit FILD // then store it down to the stack and reload on the int // side. instruct loadL_volatile(stackSlotL dst, memory mem) %{ predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); ins_cost(200); format %{ "FILD $mem\t# Atomic volatile long load\n\t" "FISTp $dst" %} ins_encode(enc_loadL_volatile(mem,dst)); ins_pipe( fpu_reg_mem ); %} instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); effect(TEMP tmp); ins_cost(180); format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" "MOVSD $dst,$tmp" %} ins_encode %{ __ movdbl($tmp$$XMMRegister, $mem$$Address); __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); match(Set dst (LoadL mem)); effect(TEMP tmp); ins_cost(160); format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" "MOVD $dst.lo,$tmp\n\t" "PSRLQ $tmp,32\n\t" "MOVD $dst.hi,$tmp" %} ins_encode %{ __ movdbl($tmp$$XMMRegister, $mem$$Address); __ movdl($dst$$Register, $tmp$$XMMRegister); __ psrlq($tmp$$XMMRegister, 32); __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Load Range instruct loadRange(rRegI dst, memory mem) %{ match(Set dst (LoadRange mem)); ins_cost(125); format %{ "MOV $dst,$mem" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_mem ); %} // Load Pointer instruct loadP(eRegP dst, memory mem) %{ match(Set dst (LoadP mem)); ins_cost(125); format %{ "MOV $dst,$mem" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_mem ); %} // Load Klass Pointer instruct loadKlass(eRegP dst, memory mem) %{ match(Set dst (LoadKlass mem)); ins_cost(125); format %{ "MOV $dst,$mem" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_mem ); %} // Load Double instruct loadDPR(regDPR dst, memory mem) %{ predicate(UseSSE<=1); match(Set dst (LoadD mem)); ins_cost(150); format %{ "FLD_D ST,$mem\n\t" "FSTP $dst" %} opcode(0xDD); /* DD /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_mem ); %} // Load Double to XMM instruct loadD(regD dst, memory mem) %{ predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); match(Set dst (LoadD mem)); ins_cost(145); format %{ "MOVSD $dst,$mem" %} ins_encode %{ __ movdbl ($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow ); %} instruct loadD_partial(regD dst, memory mem) %{ predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); match(Set dst (LoadD mem)); ins_cost(145); format %{ "MOVLPD $dst,$mem" %} ins_encode %{ __ movdbl ($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow ); %} // Load to XMM register (single-precision floating point) // MOVSS instruction instruct loadF(regF dst, memory mem) %{ predicate(UseSSE>=1); match(Set dst (LoadF mem)); ins_cost(145); format %{ "MOVSS $dst,$mem" %} ins_encode %{ __ movflt ($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow ); %} // Load Float instruct loadFPR(regFPR dst, memory mem) %{ predicate(UseSSE==0); match(Set dst (LoadF mem)); ins_cost(150); format %{ "FLD_S ST,$mem\n\t" "FSTP $dst" %} opcode(0xD9); /* D9 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem ); %} // Load Effective Address instruct leaP8(eRegP dst, indOffset8 mem) %{ match(Set dst mem); ins_cost(110); format %{ "LEA $dst,$mem" %} opcode(0x8D); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_reg_fat ); %} instruct leaP32(eRegP dst, indOffset32 mem) %{ match(Set dst mem); ins_cost(110); format %{ "LEA $dst,$mem" %} opcode(0x8D); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_reg_fat ); %} instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ match(Set dst mem); ins_cost(110); format %{ "LEA $dst,$mem" %} opcode(0x8D); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_reg_fat ); %} instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ match(Set dst mem); ins_cost(110); format %{ "LEA $dst,$mem" %} opcode(0x8D); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_reg_fat ); %} instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ match(Set dst mem); ins_cost(110); format %{ "LEA $dst,$mem" %} opcode(0x8D); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_reg_fat ); %} // Load Constant instruct loadConI(rRegI dst, immI src) %{ match(Set dst src); format %{ "MOV $dst,$src" %} ins_encode( LdImmI(dst, src) ); ins_pipe( ialu_reg_fat ); %} // Load Constant zero instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ match(Set dst src); effect(KILL cr); ins_cost(50); format %{ "XOR $dst,$dst" %} opcode(0x33); /* + rd */ ins_encode( OpcP, RegReg( dst, dst ) ); ins_pipe( ialu_reg ); %} instruct loadConP(eRegP dst, immP src) %{ match(Set dst src); format %{ "MOV $dst,$src" %} opcode(0xB8); /* + rd */ ins_encode( LdImmP(dst, src) ); ins_pipe( ialu_reg_fat ); %} instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst src); effect(KILL cr); ins_cost(200); format %{ "MOV $dst.lo,$src.lo\n\t" "MOV $dst.hi,$src.hi" %} opcode(0xB8); ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); ins_pipe( ialu_reg_long_fat ); %} instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ match(Set dst src); effect(KILL cr); ins_cost(150); format %{ "XOR $dst.lo,$dst.lo\n\t" "XOR $dst.hi,$dst.hi" %} opcode(0x33,0x33); ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); ins_pipe( ialu_reg_long ); %} // The instruction usage is guarded by predicate in operand immFPR(). instruct loadConFPR(regFPR dst, immFPR con) %{ match(Set dst con); ins_cost(125); format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" "FSTP $dst" %} ins_encode %{ __ fld_s($constantaddress($con)); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con); %} // The instruction usage is guarded by predicate in operand immFPR0(). instruct loadConFPR0(regFPR dst, immFPR0 con) %{ match(Set dst con); ins_cost(125); format %{ "FLDZ ST\n\t" "FSTP $dst" %} ins_encode %{ __ fldz(); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con); %} // The instruction usage is guarded by predicate in operand immFPR1(). instruct loadConFPR1(regFPR dst, immFPR1 con) %{ match(Set dst con); ins_cost(125); format %{ "FLD1 ST\n\t" "FSTP $dst" %} ins_encode %{ __ fld1(); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con); %} // The instruction usage is guarded by predicate in operand immF(). instruct loadConF(regF dst, immF con) %{ match(Set dst con); ins_cost(125); format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} ins_encode %{ __ movflt($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} // The instruction usage is guarded by predicate in operand immF0(). instruct loadConF0(regF dst, immF0 src) %{ match(Set dst src); ins_cost(100); format %{ "XORPS $dst,$dst\t# float 0.0" %} ins_encode %{ __ xorps($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe(pipe_slow); %} // The instruction usage is guarded by predicate in operand immDPR(). instruct loadConDPR(regDPR dst, immDPR con) %{ match(Set dst con); ins_cost(125); format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" "FSTP $dst" %} ins_encode %{ __ fld_d($constantaddress($con)); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con); %} // The instruction usage is guarded by predicate in operand immDPR0(). instruct loadConDPR0(regDPR dst, immDPR0 con) %{ match(Set dst con); ins_cost(125); format %{ "FLDZ ST\n\t" "FSTP $dst" %} ins_encode %{ __ fldz(); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con); %} // The instruction usage is guarded by predicate in operand immDPR1(). instruct loadConDPR1(regDPR dst, immDPR1 con) %{ match(Set dst con); ins_cost(125); format %{ "FLD1 ST\n\t" "FSTP $dst" %} ins_encode %{ __ fld1(); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_con); %} // The instruction usage is guarded by predicate in operand immD(). instruct loadConD(regD dst, immD con) %{ match(Set dst con); ins_cost(125); format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} ins_encode %{ __ movdbl($dst$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} // The instruction usage is guarded by predicate in operand immD0(). instruct loadConD0(regD dst, immD0 src) %{ match(Set dst src); ins_cost(100); format %{ "XORPD $dst,$dst\t# double 0.0" %} ins_encode %{ __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Load Stack Slot instruct loadSSI(rRegI dst, stackSlotI src) %{ match(Set dst src); ins_cost(125); format %{ "MOV $dst,$src" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,src)); ins_pipe( ialu_reg_mem ); %} instruct loadSSL(eRegL dst, stackSlotL src) %{ match(Set dst src); ins_cost(200); format %{ "MOV $dst,$src.lo\n\t" "MOV $dst+4,$src.hi" %} opcode(0x8B, 0x8B); ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); ins_pipe( ialu_mem_long_reg ); %} // Load Stack Slot instruct loadSSP(eRegP dst, stackSlotP src) %{ match(Set dst src); ins_cost(125); format %{ "MOV $dst,$src" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,src)); ins_pipe( ialu_reg_mem ); %} // Load Stack Slot instruct loadSSF(regFPR dst, stackSlotF src) %{ match(Set dst src); ins_cost(125); format %{ "FLD_S $src\n\t" "FSTP $dst" %} opcode(0xD9); /* D9 /0, FLD m32real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem ); %} // Load Stack Slot instruct loadSSD(regDPR dst, stackSlotD src) %{ match(Set dst src); ins_cost(125); format %{ "FLD_D $src\n\t" "FSTP $dst" %} opcode(0xDD); /* DD /0, FLD m64real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_mem ); %} // Prefetch instructions for allocation. // Must be safe to execute with invalid address (cannot fault). instruct prefetchAlloc0( memory mem ) %{ predicate(UseSSE==0 && AllocatePrefetchInstr!=3); match(PrefetchAllocation mem); ins_cost(0); size(0); format %{ "Prefetch allocation (non-SSE is empty encoding)" %} ins_encode(); ins_pipe(empty); %} instruct prefetchAlloc( memory mem ) %{ predicate(AllocatePrefetchInstr==3); match( PrefetchAllocation mem ); ins_cost(100); format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} ins_encode %{ __ prefetchw($mem$$Address); %} ins_pipe(ialu_mem); %} instruct prefetchAllocNTA( memory mem ) %{ predicate(UseSSE>=1 && AllocatePrefetchInstr==0); match(PrefetchAllocation mem); ins_cost(100); format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} ins_encode %{ __ prefetchnta($mem$$Address); %} ins_pipe(ialu_mem); %} instruct prefetchAllocT0( memory mem ) %{ predicate(UseSSE>=1 && AllocatePrefetchInstr==1); match(PrefetchAllocation mem); ins_cost(100); format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} ins_encode %{ __ prefetcht0($mem$$Address); %} ins_pipe(ialu_mem); %} instruct prefetchAllocT2( memory mem ) %{ predicate(UseSSE>=1 && AllocatePrefetchInstr==2); match(PrefetchAllocation mem); ins_cost(100); format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} ins_encode %{ __ prefetcht2($mem$$Address); %} ins_pipe(ialu_mem); %} //----------Store Instructions------------------------------------------------- // Store Byte instruct storeB(memory mem, xRegI src) %{ match(Set mem (StoreB mem src)); ins_cost(125); format %{ "MOV8 $mem,$src" %} opcode(0x88); ins_encode( OpcP, RegMem( src, mem ) ); ins_pipe( ialu_mem_reg ); %} // Store Char/Short instruct storeC(memory mem, rRegI src) %{ match(Set mem (StoreC mem src)); ins_cost(125); format %{ "MOV16 $mem,$src" %} opcode(0x89, 0x66); ins_encode( OpcS, OpcP, RegMem( src, mem ) ); ins_pipe( ialu_mem_reg ); %} // Store Integer instruct storeI(memory mem, rRegI src) %{ match(Set mem (StoreI mem src)); ins_cost(125); format %{ "MOV $mem,$src" %} opcode(0x89); ins_encode( OpcP, RegMem( src, mem ) ); ins_pipe( ialu_mem_reg ); %} // Store Long instruct storeL(long_memory mem, eRegL src) %{ predicate(!((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); ins_cost(200); format %{ "MOV $mem,$src.lo\n\t" "MOV $mem+4,$src.hi" %} opcode(0x89, 0x89); ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); ins_pipe( ialu_mem_long_reg ); %} // Store Long to Integer instruct storeL2I(memory mem, eRegL src) %{ match(Set mem (StoreI mem (ConvL2I src))); format %{ "MOV $mem,$src.lo\t# long -> int" %} ins_encode %{ __ movl($mem$$Address, $src$$Register); %} ins_pipe(ialu_mem_reg); %} // Volatile Store Long. Must be atomic, so move it into // the FP TOS and then do a 64-bit FIST. Has to probe the // target address before the store (for null-ptr checks) // so the memory operand is used twice in the encoding. instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); effect( KILL cr ); ins_cost(400); format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" "FILD $src\n\t" "FISTp $mem\t # 64-bit atomic volatile long store" %} opcode(0x3B); ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); ins_pipe( fpu_reg_mem ); %} instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); effect( TEMP tmp, KILL cr ); ins_cost(380); format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" "MOVSD $tmp,$src\n\t" "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} ins_encode %{ __ cmpl(rax, $mem$$Address); __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); __ movdbl($mem$$Address, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); match(Set mem (StoreL mem src)); effect( TEMP tmp2 , TEMP tmp, KILL cr ); ins_cost(360); format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" "MOVD $tmp,$src.lo\n\t" "MOVD $tmp2,$src.hi\n\t" "PUNPCKLDQ $tmp,$tmp2\n\t" "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} ins_encode %{ __ cmpl(rax, $mem$$Address); __ movdl($tmp$$XMMRegister, $src$$Register); __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); __ movdbl($mem$$Address, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Store Pointer; for storing unknown oops and raw pointers instruct storeP(memory mem, anyRegP src) %{ match(Set mem (StoreP mem src)); ins_cost(125); format %{ "MOV $mem,$src" %} opcode(0x89); ins_encode( OpcP, RegMem( src, mem ) ); ins_pipe( ialu_mem_reg ); %} // Store Integer Immediate instruct storeImmI(memory mem, immI src) %{ match(Set mem (StoreI mem src)); ins_cost(150); format %{ "MOV $mem,$src" %} opcode(0xC7); /* C7 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); ins_pipe( ialu_mem_imm ); %} // Store Short/Char Immediate instruct storeImmI16(memory mem, immI16 src) %{ predicate(UseStoreImmI16); match(Set mem (StoreC mem src)); ins_cost(150); format %{ "MOV16 $mem,$src" %} opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); ins_pipe( ialu_mem_imm ); %} // Store Pointer Immediate; null pointers or constant oops that do not // need card-mark barriers. instruct storeImmP(memory mem, immP src) %{ match(Set mem (StoreP mem src)); ins_cost(150); format %{ "MOV $mem,$src" %} opcode(0xC7); /* C7 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); ins_pipe( ialu_mem_imm ); %} // Store Byte Immediate instruct storeImmB(memory mem, immI8 src) %{ match(Set mem (StoreB mem src)); ins_cost(150); format %{ "MOV8 $mem,$src" %} opcode(0xC6); /* C6 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); ins_pipe( ialu_mem_imm ); %} // Store CMS card-mark Immediate instruct storeImmCM(memory mem, immI8 src) %{ match(Set mem (StoreCM mem src)); ins_cost(150); format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} opcode(0xC6); /* C6 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); ins_pipe( ialu_mem_imm ); %} // Store Double instruct storeDPR( memory mem, regDPR1 src) %{ predicate(UseSSE<=1); match(Set mem (StoreD mem src)); ins_cost(100); format %{ "FST_D $mem,$src" %} opcode(0xDD); /* DD /2 */ ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg ); %} // Store double does rounding on x86 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ predicate(UseSSE<=1); match(Set mem (StoreD mem (RoundDouble src))); ins_cost(100); format %{ "FST_D $mem,$src\t# round" %} opcode(0xDD); /* DD /2 */ ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg ); %} // Store XMM register to memory (double-precision floating points) // MOVSD instruction instruct storeD(memory mem, regD src) %{ predicate(UseSSE>=2); match(Set mem (StoreD mem src)); ins_cost(95); format %{ "MOVSD $mem,$src" %} ins_encode %{ __ movdbl($mem$$Address, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Store XMM register to memory (single-precision floating point) // MOVSS instruction instruct storeF(memory mem, regF src) %{ predicate(UseSSE>=1); match(Set mem (StoreF mem src)); ins_cost(95); format %{ "MOVSS $mem,$src" %} ins_encode %{ __ movflt($mem$$Address, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Store Float instruct storeFPR( memory mem, regFPR1 src) %{ predicate(UseSSE==0); match(Set mem (StoreF mem src)); ins_cost(100); format %{ "FST_S $mem,$src" %} opcode(0xD9); /* D9 /2 */ ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg ); %} // Store Float does rounding on x86 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ predicate(UseSSE==0); match(Set mem (StoreF mem (RoundFloat src))); ins_cost(100); format %{ "FST_S $mem,$src\t# round" %} opcode(0xD9); /* D9 /2 */ ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg ); %} // Store Float does rounding on x86 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ predicate(UseSSE<=1); match(Set mem (StoreF mem (ConvD2F src))); ins_cost(100); format %{ "FST_S $mem,$src\t# D-round" %} opcode(0xD9); /* D9 /2 */ ins_encode( enc_FPR_store(mem,src) ); ins_pipe( fpu_mem_reg ); %} // Store immediate Float value (it is faster than store from FPU register) // The instruction usage is guarded by predicate in operand immFPR(). instruct storeFPR_imm( memory mem, immFPR src) %{ match(Set mem (StoreF mem src)); ins_cost(50); format %{ "MOV $mem,$src\t# store float" %} opcode(0xC7); /* C7 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); ins_pipe( ialu_mem_imm ); %} // Store immediate Float value (it is faster than store from XMM register) // The instruction usage is guarded by predicate in operand immF(). instruct storeF_imm( memory mem, immF src) %{ match(Set mem (StoreF mem src)); ins_cost(50); format %{ "MOV $mem,$src\t# store float" %} opcode(0xC7); /* C7 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); ins_pipe( ialu_mem_imm ); %} // Store Integer to stack slot instruct storeSSI(stackSlotI dst, rRegI src) %{ match(Set dst src); ins_cost(100); format %{ "MOV $dst,$src" %} opcode(0x89); ins_encode( OpcPRegSS( dst, src ) ); ins_pipe( ialu_mem_reg ); %} // Store Integer to stack slot instruct storeSSP(stackSlotP dst, eRegP src) %{ match(Set dst src); ins_cost(100); format %{ "MOV $dst,$src" %} opcode(0x89); ins_encode( OpcPRegSS( dst, src ) ); ins_pipe( ialu_mem_reg ); %} // Store Long to stack slot instruct storeSSL(stackSlotL dst, eRegL src) %{ match(Set dst src); ins_cost(200); format %{ "MOV $dst,$src.lo\n\t" "MOV $dst+4,$src.hi" %} opcode(0x89, 0x89); ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); ins_pipe( ialu_mem_long_reg ); %} //----------MemBar Instructions----------------------------------------------- // Memory barrier flavors instruct membar_acquire() %{ match(MemBarAcquire); match(LoadFence); ins_cost(400); size(0); format %{ "MEMBAR-acquire ! (empty encoding)" %} ins_encode(); ins_pipe(empty); %} instruct membar_acquire_lock() %{ match(MemBarAcquireLock); ins_cost(0); size(0); format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} ins_encode( ); ins_pipe(empty); %} instruct membar_release() %{ match(MemBarRelease); match(StoreFence); ins_cost(400); size(0); format %{ "MEMBAR-release ! (empty encoding)" %} ins_encode( ); ins_pipe(empty); %} instruct membar_release_lock() %{ match(MemBarReleaseLock); ins_cost(0); size(0); format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} ins_encode( ); ins_pipe(empty); %} instruct membar_volatile(eFlagsReg cr) %{ match(MemBarVolatile); effect(KILL cr); ins_cost(400); format %{ $$template if (os::is_MP()) { $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" } else { $$emit$$"MEMBAR-volatile ! (empty encoding)" } %} ins_encode %{ __ membar(Assembler::StoreLoad); %} ins_pipe(pipe_slow); %} instruct unnecessary_membar_volatile() %{ match(MemBarVolatile); predicate(Matcher::post_store_load_barrier(n)); ins_cost(0); size(0); format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} ins_encode( ); ins_pipe(empty); %} instruct membar_storestore() %{ match(MemBarStoreStore); ins_cost(0); size(0); format %{ "MEMBAR-storestore (empty encoding)" %} ins_encode( ); ins_pipe(empty); %} //----------Move Instructions-------------------------------------------------- instruct castX2P(eAXRegP dst, eAXRegI src) %{ match(Set dst (CastX2P src)); format %{ "# X2P $dst, $src" %} ins_encode( /*empty encoding*/ ); ins_cost(0); ins_pipe(empty); %} instruct castP2X(rRegI dst, eRegP src ) %{ match(Set dst (CastP2X src)); ins_cost(50); format %{ "MOV $dst, $src\t# CastP2X" %} ins_encode( enc_Copy( dst, src) ); ins_pipe( ialu_reg_reg ); %} //----------Conditional Move--------------------------------------------------- // Conditional move instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ predicate(!VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "J$cop,us skip\t# signed cmove\n\t" "MOV $dst,$src\n" "skip:" %} ins_encode %{ Label Lskip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); __ movl($dst$$Register, $src$$Register); __ bind(Lskip); %} ins_pipe( pipe_cmov_reg ); %} instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ predicate(!VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "J$cop,us skip\t# unsigned cmove\n\t" "MOV $dst,$src\n" "skip:" %} ins_encode %{ Label Lskip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); __ movl($dst$$Register, $src$$Register); __ bind(Lskip); %} ins_pipe( pipe_cmov_reg ); %} instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg ); %} instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg ); %} instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ cmovI_regU(cop, cr, dst, src); %} %} // Conditional move instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); format %{ "CMOV$cop $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegMem( dst, src ) ); ins_pipe( pipe_cmov_mem ); %} // Conditional move instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); format %{ "CMOV$cop $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegMem( dst, src ) ); ins_pipe( pipe_cmov_mem ); %} instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); expand %{ cmovI_memU(cop, cr, dst, src); %} %} // Conditional move instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst,$src\t# ptr" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg ); %} // Conditional move (non-P6 version) // Note: a CMoveP is generated for stubs and native wrappers // regardless of whether we are on a P6, so we // emulate a cmov here instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); ins_cost(300); format %{ "Jn$cop skip\n\t" "MOV $dst,$src\t# pointer\n" "skip:" %} opcode(0x8b); ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); ins_pipe( pipe_cmov_reg ); %} // Conditional move instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst,$src\t# ptr" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg ); %} instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ cmovP_regU(cop, cr, dst, src); %} %} // DISABLED: Requires the ADLC to emit a bottom_type call that // correctly meets the two pointer arguments; one is an incoming // register but the other is a memory operand. ALSO appears to // be buggy with implicit null checks. // //// Conditional move //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ // predicate(VM_Version::supports_cmov() ); // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); // ins_cost(250); // format %{ "CMOV$cop $dst,$src\t# ptr" %} // opcode(0x0F,0x40); // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); // ins_pipe( pipe_cmov_mem ); //%} // //// Conditional move //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ // predicate(VM_Version::supports_cmov() ); // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); // ins_cost(250); // format %{ "CMOV$cop $dst,$src\t# ptr" %} // opcode(0x0F,0x40); // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); // ins_pipe( pipe_cmov_mem ); //%} // Conditional move instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "FCMOV$cop $dst,$src\t# double" %} opcode(0xDA); ins_encode( enc_cmov_dpr(cop,src) ); ins_pipe( pipe_cmovDPR_reg ); %} // Conditional move instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "FCMOV$cop $dst,$src\t# float" %} opcode(0xDA); ins_encode( enc_cmov_dpr(cop,src) ); ins_pipe( pipe_cmovDPR_reg ); %} // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOV $dst,$src\t# double\n" "skip:" %} opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( pipe_cmovDPR_reg ); %} // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOV $dst,$src\t# float\n" "skip:" %} opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( pipe_cmovDPR_reg ); %} // No CMOVE with SSE/SSE2 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ predicate (UseSSE>=1); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOVSS $dst,$src\t# float\n" "skip:" %} ins_encode %{ Label skip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); __ movflt($dst$$XMMRegister, $src$$XMMRegister); __ bind(skip); %} ins_pipe( pipe_slow ); %} // No CMOVE with SSE/SSE2 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ predicate (UseSSE>=2); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOVSD $dst,$src\t# float\n" "skip:" %} ins_encode %{ Label skip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); __ movdbl($dst$$XMMRegister, $src$$XMMRegister); __ bind(skip); %} ins_pipe( pipe_slow ); %} // unsigned version instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ predicate (UseSSE>=1); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOVSS $dst,$src\t# float\n" "skip:" %} ins_encode %{ Label skip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); __ movflt($dst$$XMMRegister, $src$$XMMRegister); __ bind(skip); %} ins_pipe( pipe_slow ); %} instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ predicate (UseSSE>=1); match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ fcmovF_regU(cop, cr, dst, src); %} %} // unsigned version instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ predicate (UseSSE>=2); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "Jn$cop skip\n\t" "MOVSD $dst,$src\t# float\n" "skip:" %} ins_encode %{ Label skip; // Invert sense of branch from sense of CMOV __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); __ movdbl($dst$$XMMRegister, $src$$XMMRegister); __ bind(skip); %} ins_pipe( pipe_slow ); %} instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ predicate (UseSSE>=2); match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ fcmovD_regU(cop, cr, dst, src); %} %} instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst.lo,$src.lo\n\t" "CMOV$cop $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); ins_pipe( pipe_cmov_reg_long ); %} instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); ins_cost(200); format %{ "CMOV$cop $dst.lo,$src.lo\n\t" "CMOV$cop $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); ins_pipe( pipe_cmov_reg_long ); %} instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); ins_cost(200); expand %{ cmovL_regU(cop, cr, dst, src); %} %} //----------Arithmetic Instructions-------------------------------------------- //----------Addition Instructions---------------------------------------------- // Integer Addition Instructions instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AddI dst src)); effect(KILL cr); size(2); format %{ "ADD $dst,$src" %} opcode(0x03); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg ); %} instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (AddI dst src)); effect(KILL cr); format %{ "ADD $dst,$src" %} opcode(0x81, 0x00); /* /0 id */ ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg ); %} instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ predicate(UseIncDec); match(Set dst (AddI dst src)); effect(KILL cr); size(1); format %{ "INC $dst" %} opcode(0x40); /* */ ins_encode( Opc_plus( primary, dst ) ); ins_pipe( ialu_reg ); %} instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ match(Set dst (AddI src0 src1)); ins_cost(110); format %{ "LEA $dst,[$src0 + $src1]" %} opcode(0x8D); /* 0x8D /r */ ins_encode( OpcP, RegLea( dst, src0, src1 ) ); ins_pipe( ialu_reg_reg ); %} instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ match(Set dst (AddP src0 src1)); ins_cost(110); format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} opcode(0x8D); /* 0x8D /r */ ins_encode( OpcP, RegLea( dst, src0, src1 ) ); ins_pipe( ialu_reg_reg ); %} instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ predicate(UseIncDec); match(Set dst (AddI dst src)); effect(KILL cr); size(1); format %{ "DEC $dst" %} opcode(0x48); /* */ ins_encode( Opc_plus( primary, dst ) ); ins_pipe( ialu_reg ); %} instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AddP dst src)); effect(KILL cr); size(2); format %{ "ADD $dst,$src" %} opcode(0x03); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg ); %} instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ match(Set dst (AddP dst src)); effect(KILL cr); format %{ "ADD $dst,$src" %} opcode(0x81,0x00); /* Opcode 81 /0 id */ // ins_encode( RegImm( dst, src) ); ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg ); %} instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (AddI dst (LoadI src))); effect(KILL cr); ins_cost(125); format %{ "ADD $dst,$src" %} opcode(0x03); ins_encode( OpcP, RegMem( dst, src) ); ins_pipe( ialu_reg_mem ); %} instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AddI (LoadI dst) src))); effect(KILL cr); ins_cost(150); format %{ "ADD $dst,$src" %} opcode(0x01); /* Opcode 01 /r */ ins_encode( OpcP, RegMem( src, dst ) ); ins_pipe( ialu_mem_reg ); %} // Add Memory with Immediate instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AddI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "ADD $dst,$src" %} opcode(0x81); /* Opcode 81 /0 id */ ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); ins_pipe( ialu_mem_imm ); %} instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AddI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "INC $dst" %} opcode(0xFF); /* Opcode FF /0 */ ins_encode( OpcP, RMopc_Mem(0x00,dst)); ins_pipe( ialu_mem_imm ); %} instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AddI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "DEC $dst" %} opcode(0xFF); /* Opcode FF /1 */ ins_encode( OpcP, RMopc_Mem(0x01,dst)); ins_pipe( ialu_mem_imm ); %} instruct checkCastPP( eRegP dst ) %{ match(Set dst (CheckCastPP dst)); size(0); format %{ "#checkcastPP of $dst" %} ins_encode( /*empty encoding*/ ); ins_pipe( empty ); %} instruct castPP( eRegP dst ) %{ match(Set dst (CastPP dst)); format %{ "#castPP of $dst" %} ins_encode( /*empty encoding*/ ); ins_pipe( empty ); %} instruct castII( rRegI dst ) %{ match(Set dst (CastII dst)); format %{ "#castII of $dst" %} ins_encode( /*empty encoding*/ ); ins_cost(0); ins_pipe( empty ); %} // Load-locked - same as a regular pointer load when used with compare-swap instruct loadPLocked(eRegP dst, memory mem) %{ match(Set dst (LoadPLocked mem)); ins_cost(125); format %{ "MOV $dst,$mem\t# Load ptr. locked" %} opcode(0x8B); ins_encode( OpcP, RegMem(dst,mem)); ins_pipe( ialu_reg_mem ); %} // Conditional-store of the updated heap-top. // Used during allocation of the shared heap. // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); // EAX is killed if there is contention, but then it's also unused. // In the common case of no contention, EAX holds the new oop address. format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); ins_pipe( pipe_cmpxchg ); %} // Conditional-store of an int value. // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ match(Set cr (StoreIConditional mem (Binary oldval newval))); effect(KILL oldval); format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); ins_pipe( pipe_cmpxchg ); %} // Conditional-store of a long value. // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ match(Set cr (StoreLConditional mem (Binary oldval newval))); effect(KILL oldval); format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" "XCHG EBX,ECX" %} ins_encode %{ // Note: we need to swap rbx, and rcx before and after the // cmpxchg8 instruction because the instruction uses // rcx as the high order word of the new value to store but // our register encoding uses rbx. __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); if( os::is_MP() ) __ lock(); __ cmpxchg8($mem$$Address); __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); %} ins_pipe( pipe_cmpxchg ); %} // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ predicate(VM_Version::supports_cx8()); match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" "MOV $res,0\n\t" "JNE,s fail\n\t" "MOV $res,1\n" "fail:" %} ins_encode( enc_cmpxchg8(mem_ptr), enc_flags_ne_to_boolean(res) ); ins_pipe( pipe_cmpxchg ); %} instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" "MOV $res,0\n\t" "JNE,s fail\n\t" "MOV $res,1\n" "fail:" %} ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); ins_pipe( pipe_cmpxchg ); %} instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" "MOV $res,0\n\t" "JNE,s fail\n\t" "MOV $res,1\n" "fail:" %} ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); ins_pipe( pipe_cmpxchg ); %} instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ predicate(n->as_LoadStore()->result_not_used()); match(Set dummy (GetAndAddI mem add)); effect(KILL cr); format %{ "ADDL [$mem],$add" %} ins_encode %{ if (os::is_MP()) { __ lock(); } __ addl($mem$$Address, $add$$constant); %} ins_pipe( pipe_cmpxchg ); %} instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ match(Set newval (GetAndAddI mem newval)); effect(KILL cr); format %{ "XADDL [$mem],$newval" %} ins_encode %{ if (os::is_MP()) { __ lock(); } __ xaddl($mem$$Address, $newval$$Register); %} ins_pipe( pipe_cmpxchg ); %} instruct xchgI( memory mem, rRegI newval) %{ match(Set newval (GetAndSetI mem newval)); format %{ "XCHGL $newval,[$mem]" %} ins_encode %{ __ xchgl($newval$$Register, $mem$$Address); %} ins_pipe( pipe_cmpxchg ); %} instruct xchgP( memory mem, pRegP newval) %{ match(Set newval (GetAndSetP mem newval)); format %{ "XCHGL $newval,[$mem]" %} ins_encode %{ __ xchgl($newval$$Register, $mem$$Address); %} ins_pipe( pipe_cmpxchg ); %} //----------Subtraction Instructions------------------------------------------- // Integer Subtraction Instructions instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (SubI dst src)); effect(KILL cr); size(2); format %{ "SUB $dst,$src" %} opcode(0x2B); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg ); %} instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (SubI dst src)); effect(KILL cr); format %{ "SUB $dst,$src" %} opcode(0x81,0x05); /* Opcode 81 /5 */ // ins_encode( RegImm( dst, src) ); ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg ); %} instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (SubI dst (LoadI src))); effect(KILL cr); ins_cost(125); format %{ "SUB $dst,$src" %} opcode(0x2B); ins_encode( OpcP, RegMem( dst, src) ); ins_pipe( ialu_reg_mem ); %} instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (SubI (LoadI dst) src))); effect(KILL cr); ins_cost(150); format %{ "SUB $dst,$src" %} opcode(0x29); /* Opcode 29 /r */ ins_encode( OpcP, RegMem( src, dst ) ); ins_pipe( ialu_mem_reg ); %} // Subtract from a pointer instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ match(Set dst (AddP dst (SubI zero src))); effect(KILL cr); size(2); format %{ "SUB $dst,$src" %} opcode(0x2B); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg ); %} instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ match(Set dst (SubI zero dst)); effect(KILL cr); size(2); format %{ "NEG $dst" %} opcode(0xF7,0x03); // Opcode F7 /3 ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg ); %} //----------Multiplication/Division Instructions------------------------------- // Integer Multiplication Instructions // Multiply Register instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (MulI dst src)); effect(KILL cr); size(3); ins_cost(300); format %{ "IMUL $dst,$src" %} opcode(0xAF, 0x0F); ins_encode( OpcS, OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg_alu0 ); %} // Multiply 32-bit Immediate instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ match(Set dst (MulI src imm)); effect(KILL cr); ins_cost(300); format %{ "IMUL $dst,$src,$imm" %} opcode(0x69); /* 69 /r id */ ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); ins_pipe( ialu_reg_reg_alu0 ); %} instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ match(Set dst src); effect(KILL cr); // Note that this is artificially increased to make it more expensive than loadConL ins_cost(250); format %{ "MOV EAX,$src\t// low word only" %} opcode(0xB8); ins_encode( LdImmL_Lo(dst, src) ); ins_pipe( ialu_reg_fat ); %} // Multiply by 32-bit Immediate, taking the shifted high order results // (special case for shift by 32) instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); effect(USE src1, KILL cr); // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only ins_cost(0*100 + 1*400 - 150); format %{ "IMUL EDX:EAX,$src1" %} ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); ins_pipe( pipe_slow ); %} // Multiply by 32-bit Immediate, taking the shifted high order results instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); effect(USE src1, KILL cr); // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only ins_cost(1*100 + 1*400 - 150); format %{ "IMUL EDX:EAX,$src1\n\t" "SAR EDX,$cnt-32" %} ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); ins_pipe( pipe_slow ); %} // Multiply Memory 32-bit Immediate instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ match(Set dst (MulI (LoadI src) imm)); effect(KILL cr); ins_cost(300); format %{ "IMUL $dst,$src,$imm" %} opcode(0x69); /* 69 /r id */ ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); ins_pipe( ialu_reg_mem_alu0 ); %} // Multiply Memory instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (MulI dst (LoadI src))); effect(KILL cr); ins_cost(350); format %{ "IMUL $dst,$src" %} opcode(0xAF, 0x0F); ins_encode( OpcS, OpcP, RegMem( dst, src) ); ins_pipe( ialu_reg_mem_alu0 ); %} // Multiply Register Int to Long instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ // Basic Idea: long = (long)int * (long)int match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); effect(DEF dst, USE src, USE src1, KILL flags); ins_cost(300); format %{ "IMUL $dst,$src1" %} ins_encode( long_int_multiply( dst, src1 ) ); ins_pipe( ialu_reg_reg_alu0 ); %} instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); effect(KILL flags); ins_cost(300); format %{ "MUL $dst,$src1" %} ins_encode( long_uint_multiply(dst, src1) ); ins_pipe( ialu_reg_reg_alu0 ); %} // Multiply Register Long instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(4*100+3*400); // Basic idea: lo(result) = lo(x_lo * y_lo) // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) format %{ "MOV $tmp,$src.lo\n\t" "IMUL $tmp,EDX\n\t" "MOV EDX,$src.hi\n\t" "IMUL EDX,EAX\n\t" "ADD $tmp,EDX\n\t" "MUL EDX:EAX,$src.lo\n\t" "ADD EDX,$tmp" %} ins_encode( long_multiply( dst, src, tmp ) ); ins_pipe( pipe_slow ); %} // Multiply Register Long where the left operand's high 32 bits are zero instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(is_operand_hi32_zero(n->in(1))); match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(2*100+2*400); // Basic idea: lo(result) = lo(x_lo * y_lo) // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 format %{ "MOV $tmp,$src.hi\n\t" "IMUL $tmp,EAX\n\t" "MUL EDX:EAX,$src.lo\n\t" "ADD EDX,$tmp" %} ins_encode %{ __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); __ imull($tmp$$Register, rax); __ mull($src$$Register); __ addl(rdx, $tmp$$Register); %} ins_pipe( pipe_slow ); %} // Multiply Register Long where the right operand's high 32 bits are zero instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(is_operand_hi32_zero(n->in(2))); match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(2*100+2*400); // Basic idea: lo(result) = lo(x_lo * y_lo) // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 format %{ "MOV $tmp,$src.lo\n\t" "IMUL $tmp,EDX\n\t" "MUL EDX:EAX,$src.lo\n\t" "ADD EDX,$tmp" %} ins_encode %{ __ movl($tmp$$Register, $src$$Register); __ imull($tmp$$Register, rdx); __ mull($src$$Register); __ addl(rdx, $tmp$$Register); %} ins_pipe( pipe_slow ); %} // Multiply Register Long where the left and the right operands' high 32 bits are zero instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); match(Set dst (MulL dst src)); effect(KILL cr); ins_cost(1*400); // Basic idea: lo(result) = lo(x_lo * y_lo) // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 format %{ "MUL EDX:EAX,$src.lo\n\t" %} ins_encode %{ __ mull($src$$Register); %} ins_pipe( pipe_slow ); %} // Multiply Register Long by small constant instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(2*100+2*400); size(12); // Basic idea: lo(result) = lo(src * EAX) // hi(result) = hi(src * EAX) + lo(src * EDX) format %{ "IMUL $tmp,EDX,$src\n\t" "MOV EDX,$src\n\t" "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" "ADD EDX,$tmp" %} ins_encode( long_multiply_con( dst, src, tmp ) ); ins_pipe( pipe_slow ); %} // Integer DIV with Register instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ match(Set rax (DivI rax div)); effect(KILL rdx, KILL cr); size(26); ins_cost(30*100+10*100); format %{ "CMP EAX,0x80000000\n\t" "JNE,s normal\n\t" "XOR EDX,EDX\n\t" "CMP ECX,-1\n\t" "JE,s done\n" "normal: CDQ\n\t" "IDIV $div\n\t" "done:" %} opcode(0xF7, 0x7); /* Opcode F7 /7 */ ins_encode( cdq_enc, OpcP, RegOpc(div) ); ins_pipe( ialu_reg_reg_alu0 ); %} // Divide Register Long instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ match(Set dst (DivL src1 src2)); effect( KILL cr, KILL cx, KILL bx ); ins_cost(10000); format %{ "PUSH $src1.hi\n\t" "PUSH $src1.lo\n\t" "PUSH $src2.hi\n\t" "PUSH $src2.lo\n\t" "CALL SharedRuntime::ldiv\n\t" "ADD ESP,16" %} ins_encode( long_div(src1,src2) ); ins_pipe( pipe_slow ); %} // Integer DIVMOD with Register, both quotient and mod results instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ match(DivModI rax div); effect(KILL cr); size(26); ins_cost(30*100+10*100); format %{ "CMP EAX,0x80000000\n\t" "JNE,s normal\n\t" "XOR EDX,EDX\n\t" "CMP ECX,-1\n\t" "JE,s done\n" "normal: CDQ\n\t" "IDIV $div\n\t" "done:" %} opcode(0xF7, 0x7); /* Opcode F7 /7 */ ins_encode( cdq_enc, OpcP, RegOpc(div) ); ins_pipe( pipe_slow ); %} // Integer MOD with Register instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ match(Set rdx (ModI rax div)); effect(KILL rax, KILL cr); size(26); ins_cost(300); format %{ "CDQ\n\t" "IDIV $div" %} opcode(0xF7, 0x7); /* Opcode F7 /7 */ ins_encode( cdq_enc, OpcP, RegOpc(div) ); ins_pipe( ialu_reg_reg_alu0 ); %} // Remainder Register Long instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ match(Set dst (ModL src1 src2)); effect( KILL cr, KILL cx, KILL bx ); ins_cost(10000); format %{ "PUSH $src1.hi\n\t" "PUSH $src1.lo\n\t" "PUSH $src2.hi\n\t" "PUSH $src2.lo\n\t" "CALL SharedRuntime::lrem\n\t" "ADD ESP,16" %} ins_encode( long_mod(src1,src2) ); ins_pipe( pipe_slow ); %} // Divide Register Long (no special case since divisor != -1) instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ match(Set dst (DivL dst imm)); effect( TEMP tmp, TEMP tmp2, KILL cr ); ins_cost(1000); format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" "XOR $tmp2,$tmp2\n\t" "CMP $tmp,EDX\n\t" "JA,s fast\n\t" "MOV $tmp2,EAX\n\t" "MOV EAX,EDX\n\t" "MOV EDX,0\n\t" "JLE,s pos\n\t" "LNEG EAX : $tmp2\n\t" "DIV $tmp # unsigned division\n\t" "XCHG EAX,$tmp2\n\t" "DIV $tmp\n\t" "LNEG $tmp2 : EAX\n\t" "JMP,s done\n" "pos:\n\t" "DIV $tmp\n\t" "XCHG EAX,$tmp2\n" "fast:\n\t" "DIV $tmp\n" "done:\n\t" "MOV EDX,$tmp2\n\t" "NEG EDX:EAX # if $imm < 0" %} ins_encode %{ int con = (int)$imm$$constant; assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); int pcon = (con > 0) ? con : -con; Label Lfast, Lpos, Ldone; __ movl($tmp$$Register, pcon); __ xorl($tmp2$$Register,$tmp2$$Register); __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); __ jccb(Assembler::above, Lfast); // result fits into 32 bit __ movl($tmp2$$Register, $dst$$Register); // save __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags __ jccb(Assembler::lessEqual, Lpos); // result is positive // Negative dividend. // convert value to positive to use unsigned division __ lneg($dst$$Register, $tmp2$$Register); __ divl($tmp$$Register); __ xchgl($dst$$Register, $tmp2$$Register); __ divl($tmp$$Register); // revert result back to negative __ lneg($tmp2$$Register, $dst$$Register); __ jmpb(Ldone); __ bind(Lpos); __ divl($tmp$$Register); // Use unsigned division __ xchgl($dst$$Register, $tmp2$$Register); // Fallthrow for final divide, tmp2 has 32 bit hi result __ bind(Lfast); // fast path: src is positive __ divl($tmp$$Register); // Use unsigned division __ bind(Ldone); __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); if (con < 0) { __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); } %} ins_pipe( pipe_slow ); %} // Remainder Register Long (remainder fit into 32 bits) instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ match(Set dst (ModL dst imm)); effect( TEMP tmp, TEMP tmp2, KILL cr ); ins_cost(1000); format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" "CMP $tmp,EDX\n\t" "JA,s fast\n\t" "MOV $tmp2,EAX\n\t" "MOV EAX,EDX\n\t" "MOV EDX,0\n\t" "JLE,s pos\n\t" "LNEG EAX : $tmp2\n\t" "DIV $tmp # unsigned division\n\t" "MOV EAX,$tmp2\n\t" "DIV $tmp\n\t" "NEG EDX\n\t" "JMP,s done\n" "pos:\n\t" "DIV $tmp\n\t" "MOV EAX,$tmp2\n" "fast:\n\t" "DIV $tmp\n" "done:\n\t" "MOV EAX,EDX\n\t" "SAR EDX,31\n\t" %} ins_encode %{ int con = (int)$imm$$constant; assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); int pcon = (con > 0) ? con : -con; Label Lfast, Lpos, Ldone; __ movl($tmp$$Register, pcon); __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit __ movl($tmp2$$Register, $dst$$Register); // save __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags __ jccb(Assembler::lessEqual, Lpos); // result is positive // Negative dividend. // convert value to positive to use unsigned division __ lneg($dst$$Register, $tmp2$$Register); __ divl($tmp$$Register); __ movl($dst$$Register, $tmp2$$Register); __ divl($tmp$$Register); // revert remainder back to negative __ negl(HIGH_FROM_LOW($dst$$Register)); __ jmpb(Ldone); __ bind(Lpos); __ divl($tmp$$Register); __ movl($dst$$Register, $tmp2$$Register); __ bind(Lfast); // fast path: src is positive __ divl($tmp$$Register); __ bind(Ldone); __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign %} ins_pipe( pipe_slow ); %} // Integer Shift Instructions // Shift Left by one instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); size(2); format %{ "SHL $dst,$shift" %} opcode(0xD1, 0x4); /* D1 /4 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg ); %} // Shift Left by 8-bit immediate instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); size(3); format %{ "SHL $dst,$shift" %} opcode(0xC1, 0x4); /* C1 /4 ib */ ins_encode( RegOpcImm( dst, shift) ); ins_pipe( ialu_reg ); %} // Shift Left by variable instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); size(2); format %{ "SHL $dst,$shift" %} opcode(0xD3, 0x4); /* D3 /4 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg_reg ); %} // Arithmetic shift right by one instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); size(2); format %{ "SAR $dst,$shift" %} opcode(0xD1, 0x7); /* D1 /7 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg ); %} // Arithmetic shift right by one instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); effect(KILL cr); format %{ "SAR $dst,$shift" %} opcode(0xD1, 0x7); /* D1 /7 */ ins_encode( OpcP, RMopc_Mem(secondary,dst) ); ins_pipe( ialu_mem_imm ); %} // Arithmetic Shift Right by 8-bit immediate instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); size(3); format %{ "SAR $dst,$shift" %} opcode(0xC1, 0x7); /* C1 /7 ib */ ins_encode( RegOpcImm( dst, shift ) ); ins_pipe( ialu_mem_imm ); %} // Arithmetic Shift Right by 8-bit immediate instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); effect(KILL cr); format %{ "SAR $dst,$shift" %} opcode(0xC1, 0x7); /* C1 /7 ib */ ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); ins_pipe( ialu_mem_imm ); %} // Arithmetic Shift Right by variable instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); size(2); format %{ "SAR $dst,$shift" %} opcode(0xD3, 0x7); /* D3 /7 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg_reg ); %} // Logical shift right by one instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); size(2); format %{ "SHR $dst,$shift" %} opcode(0xD1, 0x5); /* D1 /5 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg ); %} // Logical Shift Right by 8-bit immediate instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); size(3); format %{ "SHR $dst,$shift" %} opcode(0xC1, 0x5); /* C1 /5 ib */ ins_encode( RegOpcImm( dst, shift) ); ins_pipe( ialu_reg ); %} // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. // This idiom is used by the compiler for the i2b bytecode. instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); size(3); format %{ "MOVSX $dst,$src :8" %} ins_encode %{ __ movsbl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg_reg); %} // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. // This idiom is used by the compiler the i2s bytecode. instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); size(3); format %{ "MOVSX $dst,$src :16" %} ins_encode %{ __ movswl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg_reg); %} // Logical Shift Right by variable instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); size(2); format %{ "SHR $dst,$shift" %} opcode(0xD3, 0x5); /* D3 /5 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg_reg ); %} //----------Logical Instructions----------------------------------------------- //----------Integer Logical Instructions--------------------------------------- // And Instructions // And Register with Register instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AndI dst src)); effect(KILL cr); size(2); format %{ "AND $dst,$src" %} opcode(0x23); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg ); %} // And Register with Immediate instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (AndI dst src)); effect(KILL cr); format %{ "AND $dst,$src" %} opcode(0x81,0x04); /* Opcode 81 /4 */ // ins_encode( RegImm( dst, src) ); ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg ); %} // And Register with Memory instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (AndI dst (LoadI src))); effect(KILL cr); ins_cost(125); format %{ "AND $dst,$src" %} opcode(0x23); ins_encode( OpcP, RegMem( dst, src) ); ins_pipe( ialu_reg_mem ); %} // And Memory with Register instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AndI (LoadI dst) src))); effect(KILL cr); ins_cost(150); format %{ "AND $dst,$src" %} opcode(0x21); /* Opcode 21 /r */ ins_encode( OpcP, RegMem( src, dst ) ); ins_pipe( ialu_mem_reg ); %} // And Memory with Immediate instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AndI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "AND $dst,$src" %} opcode(0x81, 0x4); /* Opcode 81 /4 id */ // ins_encode( MemImm( dst, src) ); ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); ins_pipe( ialu_mem_imm ); %} // BMI1 instructions instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ match(Set dst (AndI (XorI src1 minus_1) src2)); predicate(UseBMI1Instructions); effect(KILL cr); format %{ "ANDNL $dst, $src1, $src2" %} ins_encode %{ __ andnl($dst$$Register, $src1$$Register, $src2$$Register); %} ins_pipe(ialu_reg); %} instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); predicate(UseBMI1Instructions); effect(KILL cr); ins_cost(125); format %{ "ANDNL $dst, $src1, $src2" %} ins_encode %{ __ andnl($dst$$Register, $src1$$Register, $src2$$Address); %} ins_pipe(ialu_reg_mem); %} instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ match(Set dst (AndI (SubI imm_zero src) src)); predicate(UseBMI1Instructions); effect(KILL cr); format %{ "BLSIL $dst, $src" %} ins_encode %{ __ blsil($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg); %} instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); predicate(UseBMI1Instructions); effect(KILL cr); ins_cost(125); format %{ "BLSIL $dst, $src" %} ins_encode %{ __ blsil($dst$$Register, $src$$Address); %} ins_pipe(ialu_reg_mem); %} instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) %{ match(Set dst (XorI (AddI src minus_1) src)); predicate(UseBMI1Instructions); effect(KILL cr); format %{ "BLSMSKL $dst, $src" %} ins_encode %{ __ blsmskl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg); %} instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) %{ match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); predicate(UseBMI1Instructions); effect(KILL cr); ins_cost(125); format %{ "BLSMSKL $dst, $src" %} ins_encode %{ __ blsmskl($dst$$Register, $src$$Address); %} ins_pipe(ialu_reg_mem); %} instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) %{ match(Set dst (AndI (AddI src minus_1) src) ); predicate(UseBMI1Instructions); effect(KILL cr); format %{ "BLSRL $dst, $src" %} ins_encode %{ __ blsrl($dst$$Register, $src$$Register); %} ins_pipe(ialu_reg); %} instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) %{ match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); predicate(UseBMI1Instructions); effect(KILL cr); ins_cost(125); format %{ "BLSRL $dst, $src" %} ins_encode %{ __ blsrl($dst$$Register, $src$$Address); %} ins_pipe(ialu_reg_mem); %} // Or Instructions // Or Register with Register instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (OrI dst src)); effect(KILL cr); size(2); format %{ "OR $dst,$src" %} opcode(0x0B); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg ); %} instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ match(Set dst (OrI dst (CastP2X src))); effect(KILL cr); size(2); format %{ "OR $dst,$src" %} opcode(0x0B); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg ); %} // Or Register with Immediate instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (OrI dst src)); effect(KILL cr); format %{ "OR $dst,$src" %} opcode(0x81,0x01); /* Opcode 81 /1 id */ // ins_encode( RegImm( dst, src) ); ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg ); %} // Or Register with Memory instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (OrI dst (LoadI src))); effect(KILL cr); ins_cost(125); format %{ "OR $dst,$src" %} opcode(0x0B); ins_encode( OpcP, RegMem( dst, src) ); ins_pipe( ialu_reg_mem ); %} // Or Memory with Register instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (OrI (LoadI dst) src))); effect(KILL cr); ins_cost(150); format %{ "OR $dst,$src" %} opcode(0x09); /* Opcode 09 /r */ ins_encode( OpcP, RegMem( src, dst ) ); ins_pipe( ialu_mem_reg ); %} // Or Memory with Immediate instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (OrI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "OR $dst,$src" %} opcode(0x81,0x1); /* Opcode 81 /1 id */ // ins_encode( MemImm( dst, src) ); ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); ins_pipe( ialu_mem_imm ); %} // ROL/ROR // ROL expand instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROL $dst, $shift" %} opcode(0xD1, 0x0); /* Opcode D1 /0 */ ins_encode( OpcP, RegOpc( dst )); ins_pipe( ialu_reg ); %} instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROL $dst, $shift" %} opcode(0xC1, 0x0); /*Opcode /C1 /0 */ ins_encode( RegOpcImm(dst, shift) ); ins_pipe(ialu_reg); %} instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROL $dst, $shift" %} opcode(0xD3, 0x0); /* Opcode D3 /0 */ ins_encode(OpcP, RegOpc(dst)); ins_pipe( ialu_reg_reg ); %} // end of ROL expand // ROL 32bit by one once instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); expand %{ rolI_eReg_imm1(dst, lshift, cr); %} %} // ROL 32bit var by imm8 once instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); expand %{ rolI_eReg_imm8(dst, lshift, cr); %} %} // ROL 32bit var by var once instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); expand %{ rolI_eReg_CL(dst, shift, cr); %} %} // ROL 32bit var by var once instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); expand %{ rolI_eReg_CL(dst, shift, cr); %} %} // ROR expand instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROR $dst, $shift" %} opcode(0xD1,0x1); /* Opcode D1 /1 */ ins_encode( OpcP, RegOpc( dst ) ); ins_pipe( ialu_reg ); %} instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ effect (USE_DEF dst, USE shift, KILL cr); format %{ "ROR $dst, $shift" %} opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ ins_encode( RegOpcImm(dst, shift) ); ins_pipe( ialu_reg ); %} instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROR $dst, $shift" %} opcode(0xD3, 0x1); /* Opcode D3 /1 */ ins_encode(OpcP, RegOpc(dst)); ins_pipe( ialu_reg_reg ); %} // end of ROR expand // ROR right once instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); expand %{ rorI_eReg_imm1(dst, rshift, cr); %} %} // ROR 32bit by immI8 once instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); expand %{ rorI_eReg_imm8(dst, rshift, cr); %} %} // ROR 32bit var by var once instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); expand %{ rorI_eReg_CL(dst, shift, cr); %} %} // ROR 32bit var by var once instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); expand %{ rorI_eReg_CL(dst, shift, cr); %} %} // Xor Instructions // Xor Register with Register instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (XorI dst src)); effect(KILL cr); size(2); format %{ "XOR $dst,$src" %} opcode(0x33); ins_encode( OpcP, RegReg( dst, src) ); ins_pipe( ialu_reg_reg ); %} // Xor Register with Immediate -1 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ match(Set dst (XorI dst imm)); size(2); format %{ "NOT $dst" %} ins_encode %{ __ notl($dst$$Register); %} ins_pipe( ialu_reg ); %} // Xor Register with Immediate instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (XorI dst src)); effect(KILL cr); format %{ "XOR $dst,$src" %} opcode(0x81,0x06); /* Opcode 81 /6 id */ // ins_encode( RegImm( dst, src) ); ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); ins_pipe( ialu_reg ); %} // Xor Register with Memory instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (XorI dst (LoadI src))); effect(KILL cr); ins_cost(125); format %{ "XOR $dst,$src" %} opcode(0x33); ins_encode( OpcP, RegMem(dst, src) ); ins_pipe( ialu_reg_mem ); %} // Xor Memory with Register instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (XorI (LoadI dst) src))); effect(KILL cr); ins_cost(150); format %{ "XOR $dst,$src" %} opcode(0x31); /* Opcode 31 /r */ ins_encode( OpcP, RegMem( src, dst ) ); ins_pipe( ialu_mem_reg ); %} // Xor Memory with Immediate instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (XorI (LoadI dst) src))); effect(KILL cr); ins_cost(125); format %{ "XOR $dst,$src" %} opcode(0x81,0x6); /* Opcode 81 /6 id */ ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); ins_pipe( ialu_mem_imm ); %} //----------Convert Int to Boolean--------------------------------------------- instruct movI_nocopy(rRegI dst, rRegI src) %{ effect( DEF dst, USE src ); format %{ "MOV $dst,$src" %} ins_encode( enc_Copy( dst, src) ); ins_pipe( ialu_reg_reg ); %} instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ effect( USE_DEF dst, USE src, KILL cr ); size(4); format %{ "NEG $dst\n\t" "ADC $dst,$src" %} ins_encode( neg_reg(dst), OpcRegReg(0x13,dst,src) ); ins_pipe( ialu_reg_reg_long ); %} instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ match(Set dst (Conv2B src)); expand %{ movI_nocopy(dst,src); ci2b(dst,src,cr); %} %} instruct movP_nocopy(rRegI dst, eRegP src) %{ effect( DEF dst, USE src ); format %{ "MOV $dst,$src" %} ins_encode( enc_Copy( dst, src) ); ins_pipe( ialu_reg_reg ); %} instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ effect( USE_DEF dst, USE src, KILL cr ); format %{ "NEG $dst\n\t" "ADC $dst,$src" %} ins_encode( neg_reg(dst), OpcRegReg(0x13,dst,src) ); ins_pipe( ialu_reg_reg_long ); %} instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ match(Set dst (Conv2B src)); expand %{ movP_nocopy(dst,src); cp2b(dst,src,cr); %} %} instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ match(Set dst (CmpLTMask p q)); effect(KILL cr); ins_cost(400); // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination format %{ "XOR $dst,$dst\n\t" "CMP $p,$q\n\t" "SETlt $dst\n\t" "NEG $dst" %} ins_encode %{ Register Rp = $p$$Register; Register Rq = $q$$Register; Register Rd = $dst$$Register; Label done; __ xorl(Rd, Rd); __ cmpl(Rp, Rq); __ setb(Assembler::less, Rd); __ negl(Rd); %} ins_pipe(pipe_slow); %} instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ match(Set dst (CmpLTMask dst zero)); effect(DEF dst, KILL cr); ins_cost(100); format %{ "SAR $dst,31\t# cmpLTMask0" %} ins_encode %{ __ sarl($dst$$Register, 31); %} ins_pipe(ialu_reg); %} /* better to save a register than avoid a branch */ instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); effect(KILL cr); ins_cost(400); format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" "JGE done\n\t" "ADD $p,$y\n" "done: " %} ins_encode %{ Register Rp = $p$$Register; Register Rq = $q$$Register; Register Ry = $y$$Register; Label done; __ subl(Rp, Rq); __ jccb(Assembler::greaterEqual, done); __ addl(Rp, Ry); __ bind(done); %} ins_pipe(pipe_cmplt); %} /* better to save a register than avoid a branch */ instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ match(Set y (AndI (CmpLTMask p q) y)); effect(KILL cr); ins_cost(300); format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" "JLT done\n\t" "XORL $y, $y\n" "done: " %} ins_encode %{ Register Rp = $p$$Register; Register Rq = $q$$Register; Register Ry = $y$$Register; Label done; __ cmpl(Rp, Rq); __ jccb(Assembler::less, done); __ xorl(Ry, Ry); __ bind(done); %} ins_pipe(pipe_cmplt); %} /* If I enable this, I encourage spilling in the inner loop of compress. instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); */ //----------Overflow Math Instructions----------------------------------------- instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) %{ match(Set cr (OverflowAddI op1 op2)); effect(DEF cr, USE_KILL op1, USE op2); format %{ "ADD $op1, $op2\t# overflow check int" %} ins_encode %{ __ addl($op1$$Register, $op2$$Register); %} ins_pipe(ialu_reg_reg); %} instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) %{ match(Set cr (OverflowAddI op1 op2)); effect(DEF cr, USE_KILL op1, USE op2); format %{ "ADD $op1, $op2\t# overflow check int" %} ins_encode %{ __ addl($op1$$Register, $op2$$constant); %} ins_pipe(ialu_reg_reg); %} instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ match(Set cr (OverflowSubI op1 op2)); format %{ "CMP $op1, $op2\t# overflow check int" %} ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %} ins_pipe(ialu_reg_reg); %} instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ match(Set cr (OverflowSubI op1 op2)); format %{ "CMP $op1, $op2\t# overflow check int" %} ins_encode %{ __ cmpl($op1$$Register, $op2$$constant); %} ins_pipe(ialu_reg_reg); %} instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) %{ match(Set cr (OverflowSubI zero op2)); effect(DEF cr, USE_KILL op2); format %{ "NEG $op2\t# overflow check int" %} ins_encode %{ __ negl($op2$$Register); %} ins_pipe(ialu_reg_reg); %} instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) %{ match(Set cr (OverflowMulI op1 op2)); effect(DEF cr, USE_KILL op1, USE op2); format %{ "IMUL $op1, $op2\t# overflow check int" %} ins_encode %{ __ imull($op1$$Register, $op2$$Register); %} ins_pipe(ialu_reg_reg_alu0); %} instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) %{ match(Set cr (OverflowMulI op1 op2)); effect(DEF cr, TEMP tmp, USE op1, USE op2); format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} ins_encode %{ __ imull($tmp$$Register, $op1$$Register, $op2$$constant); %} ins_pipe(ialu_reg_reg_alu0); %} //----------Long Instructions------------------------------------------------ // Add Long Register with Register instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (AddL dst src)); effect(KILL cr); ins_cost(200); format %{ "ADD $dst.lo,$src.lo\n\t" "ADC $dst.hi,$src.hi" %} opcode(0x03, 0x13); ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); ins_pipe( ialu_reg_reg_long ); %} // Add Long Register with Immediate instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (AddL dst src)); effect(KILL cr); format %{ "ADD $dst.lo,$src.lo\n\t" "ADC $dst.hi,$src.hi" %} opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); ins_pipe( ialu_reg_long ); %} // Add Long Register with Memory instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ match(Set dst (AddL dst (LoadL mem))); effect(KILL cr); ins_cost(125); format %{ "ADD $dst.lo,$mem\n\t" "ADC $dst.hi,$mem+4" %} opcode(0x03, 0x13); ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); ins_pipe( ialu_reg_long_mem ); %} // Subtract Long Register with Register. instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (SubL dst src)); effect(KILL cr); ins_cost(200); format %{ "SUB $dst.lo,$src.lo\n\t" "SBB $dst.hi,$src.hi" %} opcode(0x2B, 0x1B); ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); ins_pipe( ialu_reg_reg_long ); %} // Subtract Long Register with Immediate instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (SubL dst src)); effect(KILL cr); format %{ "SUB $dst.lo,$src.lo\n\t" "SBB $dst.hi,$src.hi" %} opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); ins_pipe( ialu_reg_long ); %} // Subtract Long Register with Memory instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ match(Set dst (SubL dst (LoadL mem))); effect(KILL cr); ins_cost(125); format %{ "SUB $dst.lo,$mem\n\t" "SBB $dst.hi,$mem+4" %} opcode(0x2B, 0x1B); ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); ins_pipe( ialu_reg_long_mem ); %} instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ match(Set dst (SubL zero dst)); effect(KILL cr); ins_cost(300); format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} ins_encode( neg_long(dst) ); ins_pipe( ialu_reg_reg_long ); %} // And Long Register with Register instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (AndL dst src)); effect(KILL cr); format %{ "AND $dst.lo,$src.lo\n\t" "AND $dst.hi,$src.hi" %} opcode(0x23,0x23); ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); ins_pipe( ialu_reg_reg_long ); %} // And Long Register with Immediate instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (AndL dst src)); effect(KILL cr); format %{ "AND $dst.lo,$src.lo\n\t" "AND $dst.hi,$src.hi" %} opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); ins_pipe( ialu_reg_long ); %} // And Long Register with Memory instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ match(Set dst (AndL dst (LoadL mem))); effect(KILL cr); ins_cost(125); format %{ "AND $dst.lo,$mem\n\t" "AND $dst.hi,$mem+4" %} opcode(0x23, 0x23); ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); ins_pipe( ialu_reg_long_mem ); %} // BMI1 instructions instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ match(Set dst (AndL (XorL src1 minus_1) src2)); predicate(UseBMI1Instructions); effect(KILL cr, TEMP dst); format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" "ANDNL $dst.hi, $src1.hi, $src2.hi" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc1 = $src1$$Register; Register Rsrc2 = $src2$$Register; __ andnl(Rdst, Rsrc1, Rsrc2); __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); %} ins_pipe(ialu_reg_reg_long); %} instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); predicate(UseBMI1Instructions); effect(KILL cr, TEMP dst); ins_cost(125); format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" "ANDNL $dst.hi, $src1.hi, $src2+4" %} ins_encode %{ Register Rdst = $dst$$Register; Register Rsrc1 = $src1$$Register; Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); __ andnl(Rdst, Rsrc1, $src2$$Address); __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); %} ins_pipe(ialu_reg_mem); %} instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ match(Set dst (AndL (SubL imm_zero src) src)); predicate(UseBMI1Instructions); effect(KILL cr, TEMP dst); format %{ "MOVL $dst.hi, 0\n\t" "BLSIL $dst.lo, $src.lo\n\t" "JNZ done\n\t" "BLSIL $dst.hi, $src.hi\n" "done:" %} ins_encode %{ Label done; Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; __ movl(HIGH_FROM_LOW(Rdst), 0); __ blsil(Rdst, Rsrc); __ jccb(Assembler::notZero, done); __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); __ bind(done); %} ins_pipe(ialu_reg); %} instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); predicate(UseBMI1Instructions); effect(KILL cr, TEMP dst); ins_cost(125); format %{ "MOVL $dst.hi, 0\n\t" "BLSIL $dst.lo, $src\n\t" "JNZ done\n\t" "BLSIL $dst.hi, $src+4\n" "done:" %} ins_encode %{ Label done; Register Rdst = $dst$$Register; Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); __ movl(HIGH_FROM_LOW(Rdst), 0); __ blsil(Rdst, $src$$Address); __ jccb(Assembler::notZero, done); __ blsil(HIGH_FROM_LOW(Rdst), src_hi); __ bind(done); %} ins_pipe(ialu_reg_mem); %} instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) %{ match(Set dst (XorL (AddL src minus_1) src)); predicate(UseBMI1Instructions); effect(KILL cr, TEMP dst); format %{ "MOVL $dst.hi, 0\n\t" "BLSMSKL $dst.lo, $src.lo\n\t" "JNC done\n\t" "BLSMSKL $dst.hi, $src.hi\n" "done:" %} ins_encode %{ Label done; Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; __ movl(HIGH_FROM_LOW(Rdst), 0); __ blsmskl(Rdst, Rsrc); __ jccb(Assembler::carryClear, done); __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); __ bind(done); %} ins_pipe(ialu_reg); %} instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) %{ match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); predicate(UseBMI1Instructions); effect(KILL cr, TEMP dst); ins_cost(125); format %{ "MOVL $dst.hi, 0\n\t" "BLSMSKL $dst.lo, $src\n\t" "JNC done\n\t" "BLSMSKL $dst.hi, $src+4\n" "done:" %} ins_encode %{ Label done; Register Rdst = $dst$$Register; Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); __ movl(HIGH_FROM_LOW(Rdst), 0); __ blsmskl(Rdst, $src$$Address); __ jccb(Assembler::carryClear, done); __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); __ bind(done); %} ins_pipe(ialu_reg_mem); %} instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) %{ match(Set dst (AndL (AddL src minus_1) src) ); predicate(UseBMI1Instructions); effect(KILL cr, TEMP dst); format %{ "MOVL $dst.hi, $src.hi\n\t" "BLSRL $dst.lo, $src.lo\n\t" "JNC done\n\t" "BLSRL $dst.hi, $src.hi\n" "done:" %} ins_encode %{ Label done; Register Rdst = $dst$$Register; Register Rsrc = $src$$Register; __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); __ blsrl(Rdst, Rsrc); __ jccb(Assembler::carryClear, done); __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); __ bind(done); %} ins_pipe(ialu_reg); %} instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) %{ match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); predicate(UseBMI1Instructions); effect(KILL cr, TEMP dst); ins_cost(125); format %{ "MOVL $dst.hi, $src+4\n\t" "BLSRL $dst.lo, $src\n\t" "JNC done\n\t" "BLSRL $dst.hi, $src+4\n" "done:" %} ins_encode %{ Label done; Register Rdst = $dst$$Register; Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); __ movl(HIGH_FROM_LOW(Rdst), src_hi); __ blsrl(Rdst, $src$$Address); __ jccb(Assembler::carryClear, done); __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); __ bind(done); %} ins_pipe(ialu_reg_mem); %} // Or Long Register with Register instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (OrL dst src)); effect(KILL cr); format %{ "OR $dst.lo,$src.lo\n\t" "OR $dst.hi,$src.hi" %} opcode(0x0B,0x0B); ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); ins_pipe( ialu_reg_reg_long ); %} // Or Long Register with Immediate instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (OrL dst src)); effect(KILL cr); format %{ "OR $dst.lo,$src.lo\n\t" "OR $dst.hi,$src.hi" %} opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); ins_pipe( ialu_reg_long ); %} // Or Long Register with Memory instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ match(Set dst (OrL dst (LoadL mem))); effect(KILL cr); ins_cost(125); format %{ "OR $dst.lo,$mem\n\t" "OR $dst.hi,$mem+4" %} opcode(0x0B,0x0B); ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); ins_pipe( ialu_reg_long_mem ); %} // Xor Long Register with Register instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (XorL dst src)); effect(KILL cr); format %{ "XOR $dst.lo,$src.lo\n\t" "XOR $dst.hi,$src.hi" %} opcode(0x33,0x33); ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); ins_pipe( ialu_reg_reg_long ); %} // Xor Long Register with Immediate -1 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ match(Set dst (XorL dst imm)); format %{ "NOT $dst.lo\n\t" "NOT $dst.hi" %} ins_encode %{ __ notl($dst$$Register); __ notl(HIGH_FROM_LOW($dst$$Register)); %} ins_pipe( ialu_reg_long ); %} // Xor Long Register with Immediate instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ match(Set dst (XorL dst src)); effect(KILL cr); format %{ "XOR $dst.lo,$src.lo\n\t" "XOR $dst.hi,$src.hi" %} opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); ins_pipe( ialu_reg_long ); %} // Xor Long Register with Memory instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ match(Set dst (XorL dst (LoadL mem))); effect(KILL cr); ins_cost(125); format %{ "XOR $dst.lo,$mem\n\t" "XOR $dst.hi,$mem+4" %} opcode(0x33,0x33); ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); ins_pipe( ialu_reg_long_mem ); %} // Shift Left Long by 1 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ predicate(UseNewLongLShift); match(Set dst (LShiftL dst cnt)); effect(KILL cr); ins_cost(100); format %{ "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi" %} ins_encode %{ __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); %} ins_pipe( ialu_reg_long ); %} // Shift Left Long by 2 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ predicate(UseNewLongLShift); match(Set dst (LShiftL dst cnt)); effect(KILL cr); ins_cost(100); format %{ "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi\n\t" "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi" %} ins_encode %{ __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); %} ins_pipe( ialu_reg_long ); %} // Shift Left Long by 3 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ predicate(UseNewLongLShift); match(Set dst (LShiftL dst cnt)); effect(KILL cr); ins_cost(100); format %{ "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi\n\t" "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi\n\t" "ADD $dst.lo,$dst.lo\n\t" "ADC $dst.hi,$dst.hi" %} ins_encode %{ __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); __ addl($dst$$Register,$dst$$Register); __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); %} ins_pipe( ialu_reg_long ); %} // Shift Left Long by 1-31 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ match(Set dst (LShiftL dst cnt)); effect(KILL cr); ins_cost(200); format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" "SHL $dst.lo,$cnt" %} opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ ins_encode( move_long_small_shift(dst,cnt) ); ins_pipe( ialu_reg_long ); %} // Shift Left Long by 32-63 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ match(Set dst (LShiftL dst cnt)); effect(KILL cr); ins_cost(300); format %{ "MOV $dst.hi,$dst.lo\n" "\tSHL $dst.hi,$cnt-32\n" "\tXOR $dst.lo,$dst.lo" %} opcode(0xC1, 0x4); /* C1 /4 ib */ ins_encode( move_long_big_shift_clr(dst,cnt) ); ins_pipe( ialu_reg_long ); %} // Shift Left Long by variable instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (LShiftL dst shift)); effect(KILL cr); ins_cost(500+200); size(17); format %{ "TEST $shift,32\n\t" "JEQ,s small\n\t" "MOV $dst.hi,$dst.lo\n\t" "XOR $dst.lo,$dst.lo\n" "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" "SHL $dst.lo,$shift" %} ins_encode( shift_left_long( dst, shift ) ); ins_pipe( pipe_slow ); %} // Shift Right Long by 1-31 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ match(Set dst (URShiftL dst cnt)); effect(KILL cr); ins_cost(200); format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" "SHR $dst.hi,$cnt" %} opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ ins_encode( move_long_small_shift(dst,cnt) ); ins_pipe( ialu_reg_long ); %} // Shift Right Long by 32-63 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ match(Set dst (URShiftL dst cnt)); effect(KILL cr); ins_cost(300); format %{ "MOV $dst.lo,$dst.hi\n" "\tSHR $dst.lo,$cnt-32\n" "\tXOR $dst.hi,$dst.hi" %} opcode(0xC1, 0x5); /* C1 /5 ib */ ins_encode( move_long_big_shift_clr(dst,cnt) ); ins_pipe( ialu_reg_long ); %} // Shift Right Long by variable instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (URShiftL dst shift)); effect(KILL cr); ins_cost(600); size(17); format %{ "TEST $shift,32\n\t" "JEQ,s small\n\t" "MOV $dst.lo,$dst.hi\n\t" "XOR $dst.hi,$dst.hi\n" "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" "SHR $dst.hi,$shift" %} ins_encode( shift_right_long( dst, shift ) ); ins_pipe( pipe_slow ); %} // Shift Right Long by 1-31 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ match(Set dst (RShiftL dst cnt)); effect(KILL cr); ins_cost(200); format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" "SAR $dst.hi,$cnt" %} opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ ins_encode( move_long_small_shift(dst,cnt) ); ins_pipe( ialu_reg_long ); %} // Shift Right Long by 32-63 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ match(Set dst (RShiftL dst cnt)); effect(KILL cr); ins_cost(300); format %{ "MOV $dst.lo,$dst.hi\n" "\tSAR $dst.lo,$cnt-32\n" "\tSAR $dst.hi,31" %} opcode(0xC1, 0x7); /* C1 /7 ib */ ins_encode( move_long_big_shift_sign(dst,cnt) ); ins_pipe( ialu_reg_long ); %} // Shift Right arithmetic Long by variable instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (RShiftL dst shift)); effect(KILL cr); ins_cost(600); size(18); format %{ "TEST $shift,32\n\t" "JEQ,s small\n\t" "MOV $dst.lo,$dst.hi\n\t" "SAR $dst.hi,31\n" "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" "SAR $dst.hi,$shift" %} ins_encode( shift_right_arith_long( dst, shift ) ); ins_pipe( pipe_slow ); %} //----------Double Instructions------------------------------------------------ // Double Math // Compare & branch // P6 version of float compare, sets condition codes in EFLAGS instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ predicate(VM_Version::supports_cmov() && UseSSE <=1); match(Set cr (CmpD src1 src2)); effect(KILL rax); ins_cost(150); format %{ "FLD $src1\n\t" "FUCOMIP ST,$src2 // P6 instruction\n\t" "JNP exit\n\t" "MOV ah,1 // saw a NaN, set CF\n\t" "SAHF\n" "exit:\tNOP // avoid branch to branch" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), cmpF_P6_fixup ); ins_pipe( pipe_slow ); %} instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ predicate(VM_Version::supports_cmov() && UseSSE <=1); match(Set cr (CmpD src1 src2)); ins_cost(150); format %{ "FLD $src1\n\t" "FUCOMIP ST,$src2 // P6 instruction" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2)); ins_pipe( pipe_slow ); %} // Compare & branch instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ predicate(UseSSE<=1); match(Set cr (CmpD src1 src2)); effect(KILL rax); ins_cost(200); format %{ "FLD $src1\n\t" "FCOMp $src2\n\t" "FNSTSW AX\n\t" "TEST AX,0x400\n\t" "JZ,s flags\n\t" "MOV AH,1\t# unordered treat as LT\n" "flags:\tSAHF" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), fpu_flags); ins_pipe( pipe_slow ); %} // Compare vs zero into -1,0,1 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (CmpD3 src1 zero)); effect(KILL cr, KILL rax); ins_cost(280); format %{ "FTSTD $dst,$src1" %} opcode(0xE4, 0xD9); ins_encode( Push_Reg_DPR(src1), OpcS, OpcP, PopFPU, CmpF_Result(dst)); ins_pipe( pipe_slow ); %} // Compare into -1,0,1 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (CmpD3 src1 src2)); effect(KILL cr, KILL rax); ins_cost(300); format %{ "FCMPD $dst,$src1,$src2" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), CmpF_Result(dst)); ins_pipe( pipe_slow ); %} // float compare and set condition codes in EFLAGS by XMM regs instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ predicate(UseSSE>=2); match(Set cr (CmpD src1 src2)); ins_cost(145); format %{ "UCOMISD $src1,$src2\n\t" "JNP,s exit\n\t" "PUSHF\t# saw NaN, set CF\n\t" "AND [rsp], #0xffffff2b\n\t" "POPF\n" "exit:" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); emit_cmpfp_fixup(_masm); %} ins_pipe( pipe_slow ); %} instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ predicate(UseSSE>=2); match(Set cr (CmpD src1 src2)); ins_cost(100); format %{ "UCOMISD $src1,$src2" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} // float compare and set condition codes in EFLAGS by XMM regs instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ predicate(UseSSE>=2); match(Set cr (CmpD src1 (LoadD src2))); ins_cost(145); format %{ "UCOMISD $src1,$src2\n\t" "JNP,s exit\n\t" "PUSHF\t# saw NaN, set CF\n\t" "AND [rsp], #0xffffff2b\n\t" "POPF\n" "exit:" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$Address); emit_cmpfp_fixup(_masm); %} ins_pipe( pipe_slow ); %} instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ predicate(UseSSE>=2); match(Set cr (CmpD src1 (LoadD src2))); ins_cost(100); format %{ "UCOMISD $src1,$src2" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$Address); %} ins_pipe( pipe_slow ); %} // Compare into -1,0,1 in XMM instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ predicate(UseSSE>=2); match(Set dst (CmpD3 src1 src2)); effect(KILL cr); ins_cost(255); format %{ "UCOMISD $src1, $src2\n\t" "MOV $dst, #-1\n\t" "JP,s done\n\t" "JB,s done\n\t" "SETNE $dst\n\t" "MOVZB $dst, $dst\n" "done:" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); emit_cmpfp3(_masm, $dst$$Register); %} ins_pipe( pipe_slow ); %} // Compare into -1,0,1 in XMM and memory instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ predicate(UseSSE>=2); match(Set dst (CmpD3 src1 (LoadD src2))); effect(KILL cr); ins_cost(275); format %{ "UCOMISD $src1, $src2\n\t" "MOV $dst, #-1\n\t" "JP,s done\n\t" "JB,s done\n\t" "SETNE $dst\n\t" "MOVZB $dst, $dst\n" "done:" %} ins_encode %{ __ ucomisd($src1$$XMMRegister, $src2$$Address); emit_cmpfp3(_masm, $dst$$Register); %} ins_pipe( pipe_slow ); %} instruct subDPR_reg(regDPR dst, regDPR src) %{ predicate (UseSSE <=1); match(Set dst (SubD dst src)); format %{ "FLD $src\n\t" "DSUBp $dst,ST" %} opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ ins_cost(150); ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ predicate (UseSSE <=1); match(Set dst (RoundDouble (SubD src1 src2))); ins_cost(250); format %{ "FLD $src2\n\t" "DSUB ST,$src1\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xD8, 0x5); ins_encode( Push_Reg_DPR(src2), OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} instruct subDPR_reg_mem(regDPR dst, memory src) %{ predicate (UseSSE <=1); match(Set dst (SubD dst (LoadD src))); ins_cost(150); format %{ "FLD $src\n\t" "DSUBp $dst,ST" %} opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_mem ); %} instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst (AbsD src)); ins_cost(100); format %{ "FABS" %} opcode(0xE1, 0xD9); ins_encode( OpcS, OpcP ); ins_pipe( fpu_reg_reg ); %} instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate(UseSSE<=1); match(Set dst (NegD src)); ins_cost(100); format %{ "FCHS" %} opcode(0xE0, 0xD9); ins_encode( OpcS, OpcP ); ins_pipe( fpu_reg_reg ); %} instruct addDPR_reg(regDPR dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (AddD dst src)); format %{ "FLD $src\n\t" "DADD $dst,ST" %} size(4); ins_cost(150); opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ predicate(UseSSE<=1); match(Set dst (RoundDouble (AddD src1 src2))); ins_cost(250); format %{ "FLD $src2\n\t" "DADD ST,$src1\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ ins_encode( Push_Reg_DPR(src2), OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} instruct addDPR_reg_mem(regDPR dst, memory src) %{ predicate(UseSSE<=1); match(Set dst (AddD dst (LoadD src))); ins_cost(150); format %{ "FLD $src\n\t" "DADDp $dst,ST" %} opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_mem ); %} // add-to-memory instruct addDPR_mem_reg(memory dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); ins_cost(150); format %{ "FLD_D $dst\n\t" "DADD ST,$src\n\t" "FST_D $dst" %} opcode(0xDD, 0x0); ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), Opcode(0xD8), RegOpc(src), set_instruction_start, Opcode(0xDD), RMopc_Mem(0x03,dst) ); ins_pipe( fpu_reg_mem ); %} instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ predicate(UseSSE<=1); match(Set dst (AddD dst con)); ins_cost(125); format %{ "FLD1\n\t" "DADDp $dst,ST" %} ins_encode %{ __ fld1(); __ faddp($dst$$reg); %} ins_pipe(fpu_reg); %} instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); match(Set dst (AddD dst con)); ins_cost(200); format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" "DADDp $dst,ST" %} ins_encode %{ __ fld_d($constantaddress($con)); __ faddp($dst$$reg); %} ins_pipe(fpu_reg_mem); %} instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); match(Set dst (RoundDouble (AddD src con))); ins_cost(200); format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" "DADD ST,$src\n\t" "FSTP_D $dst\t# D-round" %} ins_encode %{ __ fld_d($constantaddress($con)); __ fadd($src$$reg); __ fstp_d(Address(rsp, $dst$$disp)); %} ins_pipe(fpu_mem_reg_con); %} instruct mulDPR_reg(regDPR dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (MulD dst src)); format %{ "FLD $src\n\t" "DMULp $dst,ST" %} opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ ins_cost(150); ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} // Strict FP instruction biases argument before multiply then // biases result to avoid double rounding of subnormals. // // scale arg1 by multiplying arg1 by 2^(-15360) // load arg2 // multiply scaled arg1 by arg2 // rescale product by 2^(15360) // instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); match(Set dst (MulD dst src)); ins_cost(1); // Select this instruction for all strict FP double multiplies format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" "DMULp $dst,ST\n\t" "FLD $src\n\t" "DMULp $dst,ST\n\t" "FLD StubRoutines::_fpu_subnormal_bias2\n\t" "DMULp $dst,ST\n\t" %} opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ ins_encode( strictfp_bias1(dst), Push_Reg_DPR(src), OpcP, RegOpc(dst), strictfp_bias2(dst) ); ins_pipe( fpu_reg_reg ); %} instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); match(Set dst (MulD dst con)); ins_cost(200); format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" "DMULp $dst,ST" %} ins_encode %{ __ fld_d($constantaddress($con)); __ fmulp($dst$$reg); %} ins_pipe(fpu_reg_mem); %} instruct mulDPR_reg_mem(regDPR dst, memory src) %{ predicate( UseSSE<=1 ); match(Set dst (MulD dst (LoadD src))); ins_cost(200); format %{ "FLD_D $src\n\t" "DMULp $dst,ST" %} opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_mem ); %} // // Cisc-alternate to reg-reg multiply instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ predicate( UseSSE<=1 ); match(Set dst (MulD src (LoadD mem))); ins_cost(250); format %{ "FLD_D $mem\n\t" "DMUL ST,$src\n\t" "FSTP_D $dst" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), OpcReg_FPR(src), Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_reg_mem ); %} // MACRO3 -- addDPR a mulDPR // This instruction is a '2-address' instruction in that the result goes // back to src2. This eliminates a move from the macro; possibly the // register allocator will have to add it back (and maybe not). instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ predicate( UseSSE<=1 ); match(Set src2 (AddD (MulD src0 src1) src2)); format %{ "FLD $src0\t# ===MACRO3d===\n\t" "DMUL ST,$src1\n\t" "DADDp $src2,ST" %} ins_cost(250); opcode(0xDD); /* LoadD DD /0 */ ins_encode( Push_Reg_FPR(src0), FMul_ST_reg(src1), FAddP_reg_ST(src2) ); ins_pipe( fpu_reg_reg_reg ); %} // MACRO3 -- subDPR a mulDPR instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ predicate( UseSSE<=1 ); match(Set src2 (SubD (MulD src0 src1) src2)); format %{ "FLD $src0\t# ===MACRO3d===\n\t" "DMUL ST,$src1\n\t" "DSUBRp $src2,ST" %} ins_cost(250); ins_encode( Push_Reg_FPR(src0), FMul_ST_reg(src1), Opcode(0xDE), Opc_plus(0xE0,src2)); ins_pipe( fpu_reg_reg_reg ); %} instruct divDPR_reg(regDPR dst, regDPR src) %{ predicate( UseSSE<=1 ); match(Set dst (DivD dst src)); format %{ "FLD $src\n\t" "FDIVp $dst,ST" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ ins_cost(150); ins_encode( Push_Reg_DPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} // Strict FP instruction biases argument before division then // biases result, to avoid double rounding of subnormals. // // scale dividend by multiplying dividend by 2^(-15360) // load divisor // divide scaled dividend by divisor // rescale quotient by 2^(15360) // instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ predicate (UseSSE<=1); match(Set dst (DivD dst src)); predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); ins_cost(01); format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" "DMULp $dst,ST\n\t" "FLD $src\n\t" "FDIVp $dst,ST\n\t" "FLD StubRoutines::_fpu_subnormal_bias2\n\t" "DMULp $dst,ST\n\t" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ ins_encode( strictfp_bias1(dst), Push_Reg_DPR(src), OpcP, RegOpc(dst), strictfp_bias2(dst) ); ins_pipe( fpu_reg_reg ); %} instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); match(Set dst (RoundDouble (DivD src1 src2))); format %{ "FLD $src1\n\t" "FDIV ST,$src2\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (ModD dst src)); effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS format %{ "DMOD $dst,$src" %} ins_cost(250); ins_encode(Push_Reg_Mod_DPR(dst, src), emitModDPR(), Push_Result_Mod_DPR(src), Pop_Reg_DPR(dst)); ins_pipe( pipe_slow ); %} instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE>=2); match(Set dst (ModD src0 src1)); effect(KILL rax, KILL cr); format %{ "SUB ESP,8\t # DMOD\n" "\tMOVSD [ESP+0],$src1\n" "\tFLD_D [ESP+0]\n" "\tMOVSD [ESP+0],$src0\n" "\tFLD_D [ESP+0]\n" "loop:\tFPREM\n" "\tFWAIT\n" "\tFNSTSW AX\n" "\tSAHF\n" "\tJP loop\n" "\tFSTP_D [ESP+0]\n" "\tMOVSD $dst,[ESP+0]\n" "\tADD ESP,8\n" "\tFSTP ST0\t # Restore FPU Stack" %} ins_cost(250); ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); ins_pipe( pipe_slow ); %} instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst(TanD src)); format %{ "DTAN $dst" %} ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan Opcode(0xDD), Opcode(0xD8)); // fstp st ins_pipe( pipe_slow ); %} instruct tanD_reg(regD dst, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst(TanD dst)); effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" format %{ "DTAN $dst" %} ins_encode( Push_SrcD(dst), Opcode(0xD9), Opcode(0xF2), // fptan Opcode(0xDD), Opcode(0xD8), // fstp st Push_ResultD(dst) ); ins_pipe( pipe_slow ); %} instruct atanDPR_reg(regDPR dst, regDPR src) %{ predicate (UseSSE<=1); match(Set dst(AtanD dst src)); format %{ "DATA $dst,$src" %} opcode(0xD9, 0xF3); ins_encode( Push_Reg_DPR(src), OpcP, OpcS, RegOpc(dst) ); ins_pipe( pipe_slow ); %} instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst(AtanD dst src)); effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" format %{ "DATA $dst,$src" %} opcode(0xD9, 0xF3); ins_encode( Push_SrcD(src), OpcP, OpcS, Push_ResultD(dst) ); ins_pipe( pipe_slow ); %} instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ predicate (UseSSE<=1); match(Set dst (SqrtD src)); format %{ "DSQRT $dst,$src" %} opcode(0xFA, 0xD9); ins_encode( Push_Reg_DPR(src), OpcS, OpcP, Pop_Reg_DPR(dst) ); ins_pipe( pipe_slow ); %} instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); // The source Double operand on FPU stack match(Set dst (Log10D src)); // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number // fxch ; swap ST(0) with ST(1) // fyl2x ; compute log_10(2) * log_2(x) format %{ "FLDLG2 \t\t\t#Log10\n\t" "FXCH \n\t" "FYL2X \t\t\t# Q=Log10*Log_2(x)" %} ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 Opcode(0xD9), Opcode(0xC9), // fxch Opcode(0xD9), Opcode(0xF1)); // fyl2x ins_pipe( pipe_slow ); %} instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ predicate (UseSSE>=2); effect(KILL cr); match(Set dst (Log10D src)); // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number // fyl2x ; compute log_10(2) * log_2(x) format %{ "FLDLG2 \t\t\t#Log10\n\t" "FYL2X \t\t\t# Q=Log10*Log_2(x)" %} ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 Push_SrcD(src), Opcode(0xD9), Opcode(0xF1), // fyl2x Push_ResultD(dst)); ins_pipe( pipe_slow ); %} //-------------Float Instructions------------------------------- // Float Math // Code for float compare: // fcompp(); // fwait(); fnstsw_ax(); // sahf(); // movl(dst, unordered_result); // jcc(Assembler::parity, exit); // movl(dst, less_result); // jcc(Assembler::below, exit); // movl(dst, equal_result); // jcc(Assembler::equal, exit); // movl(dst, greater_result); // exit: // P6 version of float compare, sets condition codes in EFLAGS instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ predicate(VM_Version::supports_cmov() && UseSSE == 0); match(Set cr (CmpF src1 src2)); effect(KILL rax); ins_cost(150); format %{ "FLD $src1\n\t" "FUCOMIP ST,$src2 // P6 instruction\n\t" "JNP exit\n\t" "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" "SAHF\n" "exit:\tNOP // avoid branch to branch" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), cmpF_P6_fixup ); ins_pipe( pipe_slow ); %} instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ predicate(VM_Version::supports_cmov() && UseSSE == 0); match(Set cr (CmpF src1 src2)); ins_cost(100); format %{ "FLD $src1\n\t" "FUCOMIP ST,$src2 // P6 instruction" %} opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2)); ins_pipe( pipe_slow ); %} // Compare & branch instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ predicate(UseSSE == 0); match(Set cr (CmpF src1 src2)); effect(KILL rax); ins_cost(200); format %{ "FLD $src1\n\t" "FCOMp $src2\n\t" "FNSTSW AX\n\t" "TEST AX,0x400\n\t" "JZ,s flags\n\t" "MOV AH,1\t# unordered treat as LT\n" "flags:\tSAHF" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), fpu_flags); ins_pipe( pipe_slow ); %} // Compare vs zero into -1,0,1 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE == 0); match(Set dst (CmpF3 src1 zero)); effect(KILL cr, KILL rax); ins_cost(280); format %{ "FTSTF $dst,$src1" %} opcode(0xE4, 0xD9); ins_encode( Push_Reg_DPR(src1), OpcS, OpcP, PopFPU, CmpF_Result(dst)); ins_pipe( pipe_slow ); %} // Compare into -1,0,1 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE == 0); match(Set dst (CmpF3 src1 src2)); effect(KILL cr, KILL rax); ins_cost(300); format %{ "FCMPF $dst,$src1,$src2" %} opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ ins_encode( Push_Reg_DPR(src1), OpcP, RegOpc(src2), CmpF_Result(dst)); ins_pipe( pipe_slow ); %} // float compare and set condition codes in EFLAGS by XMM regs instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ predicate(UseSSE>=1); match(Set cr (CmpF src1 src2)); ins_cost(145); format %{ "UCOMISS $src1,$src2\n\t" "JNP,s exit\n\t" "PUSHF\t# saw NaN, set CF\n\t" "AND [rsp], #0xffffff2b\n\t" "POPF\n" "exit:" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); emit_cmpfp_fixup(_masm); %} ins_pipe( pipe_slow ); %} instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ predicate(UseSSE>=1); match(Set cr (CmpF src1 src2)); ins_cost(100); format %{ "UCOMISS $src1,$src2" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe( pipe_slow ); %} // float compare and set condition codes in EFLAGS by XMM regs instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ predicate(UseSSE>=1); match(Set cr (CmpF src1 (LoadF src2))); ins_cost(165); format %{ "UCOMISS $src1,$src2\n\t" "JNP,s exit\n\t" "PUSHF\t# saw NaN, set CF\n\t" "AND [rsp], #0xffffff2b\n\t" "POPF\n" "exit:" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$Address); emit_cmpfp_fixup(_masm); %} ins_pipe( pipe_slow ); %} instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ predicate(UseSSE>=1); match(Set cr (CmpF src1 (LoadF src2))); ins_cost(100); format %{ "UCOMISS $src1,$src2" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$Address); %} ins_pipe( pipe_slow ); %} // Compare into -1,0,1 in XMM instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ predicate(UseSSE>=1); match(Set dst (CmpF3 src1 src2)); effect(KILL cr); ins_cost(255); format %{ "UCOMISS $src1, $src2\n\t" "MOV $dst, #-1\n\t" "JP,s done\n\t" "JB,s done\n\t" "SETNE $dst\n\t" "MOVZB $dst, $dst\n" "done:" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); emit_cmpfp3(_masm, $dst$$Register); %} ins_pipe( pipe_slow ); %} // Compare into -1,0,1 in XMM and memory instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ predicate(UseSSE>=1); match(Set dst (CmpF3 src1 (LoadF src2))); effect(KILL cr); ins_cost(275); format %{ "UCOMISS $src1, $src2\n\t" "MOV $dst, #-1\n\t" "JP,s done\n\t" "JB,s done\n\t" "SETNE $dst\n\t" "MOVZB $dst, $dst\n" "done:" %} ins_encode %{ __ ucomiss($src1$$XMMRegister, $src2$$Address); emit_cmpfp3(_masm, $dst$$Register); %} ins_pipe( pipe_slow ); %} // Spill to obtain 24-bit precision instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (SubF src1 src2)); format %{ "FSUB $dst,$src1 - $src2" %} opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ ins_encode( Push_Reg_FPR(src1), OpcReg_FPR(src2), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} // // This instruction does not round to 24-bits instruct subFPR_reg(regFPR dst, regFPR src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (SubF dst src)); format %{ "FSUB $dst,$src" %} opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ ins_encode( Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} // Spill to obtain 24-bit precision instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 src2)); format %{ "FADD $dst,$src1,$src2" %} opcode(0xD8, 0x0); /* D8 C0+i */ ins_encode( Push_Reg_FPR(src2), OpcReg_FPR(src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} // // This instruction does not round to 24-bits instruct addFPR_reg(regFPR dst, regFPR src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF dst src)); format %{ "FLD $src\n\t" "FADDp $dst,ST" %} opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ ins_encode( Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ predicate(UseSSE==0); match(Set dst (AbsF src)); ins_cost(100); format %{ "FABS" %} opcode(0xE1, 0xD9); ins_encode( OpcS, OpcP ); ins_pipe( fpu_reg_reg ); %} instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ predicate(UseSSE==0); match(Set dst (NegF src)); ins_cost(100); format %{ "FCHS" %} opcode(0xE0, 0xD9); ins_encode( OpcS, OpcP ); ins_pipe( fpu_reg_reg ); %} // Cisc-alternate to addFPR_reg // Spill to obtain 24-bit precision instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 (LoadF src2))); format %{ "FLD $src2\n\t" "FADD ST,$src1\n\t" "FSTP_S $dst" %} opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), OpcReg_FPR(src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_mem ); %} // // Cisc-alternate to addFPR_reg // This instruction does not round to 24-bits instruct addFPR_reg_mem(regFPR dst, memory src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF dst (LoadF src))); format %{ "FADD $dst,$src" %} opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_mem ); %} // // Following two instructions for _222_mpegaudio // Spill to obtain 24-bit precision instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 src2)); format %{ "FADD $dst,$src1,$src2" %} opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), OpcReg_FPR(src2), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_mem ); %} // Cisc-spill variant // Spill to obtain 24-bit precision instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 (LoadF src2))); format %{ "FADD $dst,$src1,$src2 cisc" %} opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), set_instruction_start, OpcP, RMopc_Mem(secondary,src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_mem_mem ); %} // Spill to obtain 24-bit precision instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src1 src2)); format %{ "FADD $dst,$src1,$src2" %} opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), set_instruction_start, OpcP, RMopc_Mem(secondary,src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_mem_mem ); %} // Spill to obtain 24-bit precision instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (AddF src con)); format %{ "FLD $src\n\t" "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" "FSTP_S $dst" %} ins_encode %{ __ fld_s($src$$reg - 1); // FLD ST(i-1) __ fadd_s($constantaddress($con)); __ fstp_s(Address(rsp, $dst$$disp)); %} ins_pipe(fpu_mem_reg_con); %} // // This instruction does not round to 24-bits instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF src con)); format %{ "FLD $src\n\t" "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" "FSTP $dst" %} ins_encode %{ __ fld_s($src$$reg - 1); // FLD ST(i-1) __ fadd_s($constantaddress($con)); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_reg_con); %} // Spill to obtain 24-bit precision instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 src2)); format %{ "FLD $src1\n\t" "FMUL $src2\n\t" "FSTP_S $dst" %} opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ ins_encode( Push_Reg_FPR(src1), OpcReg_FPR(src2), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} // // This instruction does not round to 24-bits instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 src2)); format %{ "FLD $src1\n\t" "FMUL $src2\n\t" "FSTP_S $dst" %} opcode(0xD8, 0x1); /* D8 C8+i */ ins_encode( Push_Reg_FPR(src2), OpcReg_FPR(src1), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_reg ); %} // Spill to obtain 24-bit precision // Cisc-alternate to reg-reg multiply instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 (LoadF src2))); format %{ "FLD_S $src2\n\t" "FMUL $src1\n\t" "FSTP_S $dst" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), OpcReg_FPR(src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_mem ); %} // // This instruction does not round to 24-bits // Cisc-alternate to reg-reg multiply instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 (LoadF src2))); format %{ "FMUL $dst,$src1,$src2" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), OpcReg_FPR(src1), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_mem ); %} // Spill to obtain 24-bit precision instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src1 src2)); format %{ "FMUL $dst,$src1,$src2" %} opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), set_instruction_start, OpcP, RMopc_Mem(secondary,src1), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_mem_mem ); %} // Spill to obtain 24-bit precision instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (MulF src con)); format %{ "FLD $src\n\t" "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" "FSTP_S $dst" %} ins_encode %{ __ fld_s($src$$reg - 1); // FLD ST(i-1) __ fmul_s($constantaddress($con)); __ fstp_s(Address(rsp, $dst$$disp)); %} ins_pipe(fpu_mem_reg_con); %} // // This instruction does not round to 24-bits instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF src con)); format %{ "FLD $src\n\t" "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" "FSTP $dst" %} ins_encode %{ __ fld_s($src$$reg - 1); // FLD ST(i-1) __ fmul_s($constantaddress($con)); __ fstp_d($dst$$reg); %} ins_pipe(fpu_reg_reg_con); %} // // MACRO1 -- subsume unshared load into mulFPR // This instruction does not round to 24-bits instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (MulF (LoadF mem1) src)); format %{ "FLD $mem1 ===MACRO1===\n\t" "FMUL ST,$src\n\t" "FSTP $dst" %} opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), OpcReg_FPR(src), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_mem ); %} // // MACRO2 -- addFPR a mulFPR which subsumed an unshared load // This instruction does not round to 24-bits instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); ins_cost(95); format %{ "FLD $mem1 ===MACRO2===\n\t" "FMUL ST,$src1 subsume mulFPR left load\n\t" "FADD ST,$src2\n\t" "FSTP $dst" %} opcode(0xD9); /* LoadF D9 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem1), FMul_ST_reg(src1), FAdd_ST_reg(src2), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem_reg_reg ); %} // MACRO3 -- addFPR a mulFPR // This instruction does not round to 24-bits. It is a '2-address' // instruction in that the result goes back to src2. This eliminates // a move from the macro; possibly the register allocator will have // to add it back (and maybe not). instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set src2 (AddF (MulF src0 src1) src2)); format %{ "FLD $src0 ===MACRO3===\n\t" "FMUL ST,$src1\n\t" "FADDP $src2,ST" %} opcode(0xD9); /* LoadF D9 /0 */ ins_encode( Push_Reg_FPR(src0), FMul_ST_reg(src1), FAddP_reg_ST(src2) ); ins_pipe( fpu_reg_reg_reg ); %} // MACRO4 -- divFPR subFPR // This instruction does not round to 24-bits instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (DivF (SubF src2 src1) src3)); format %{ "FLD $src2 ===MACRO4===\n\t" "FSUB ST,$src1\n\t" "FDIV ST,$src3\n\t" "FSTP $dst" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ ins_encode( Push_Reg_FPR(src2), subFPR_divFPR_encode(src1,src3), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_reg_reg_reg ); %} // Spill to obtain 24-bit precision instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (DivF src1 src2)); format %{ "FDIV $dst,$src1,$src2" %} opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ ins_encode( Push_Reg_FPR(src1), OpcReg_FPR(src2), Pop_Mem_FPR(dst) ); ins_pipe( fpu_mem_reg_reg ); %} // // This instruction does not round to 24-bits instruct divFPR_reg(regFPR dst, regFPR src) %{ predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (DivF dst src)); format %{ "FDIV $dst,$src" %} opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ ins_encode( Push_Reg_FPR(src), OpcP, RegOpc(dst) ); ins_pipe( fpu_reg_reg ); %} // Spill to obtain 24-bit precision instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (ModF src1 src2)); effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS format %{ "FMOD $dst,$src1,$src2" %} ins_encode( Push_Reg_Mod_DPR(src1, src2), emitModDPR(), Push_Result_Mod_DPR(src2), Pop_Mem_FPR(dst)); ins_pipe( pipe_slow ); %} // // This instruction does not round to 24-bits instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (ModF dst src)); effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS format %{ "FMOD $dst,$src" %} ins_encode(Push_Reg_Mod_DPR(dst, src), emitModDPR(), Push_Result_Mod_DPR(src), Pop_Reg_FPR(dst)); ins_pipe( pipe_slow ); %} instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE>=1); match(Set dst (ModF src0 src1)); effect(KILL rax, KILL cr); format %{ "SUB ESP,4\t # FMOD\n" "\tMOVSS [ESP+0],$src1\n" "\tFLD_S [ESP+0]\n" "\tMOVSS [ESP+0],$src0\n" "\tFLD_S [ESP+0]\n" "loop:\tFPREM\n" "\tFWAIT\n" "\tFNSTSW AX\n" "\tSAHF\n" "\tJP loop\n" "\tFSTP_S [ESP+0]\n" "\tMOVSS $dst,[ESP+0]\n" "\tADD ESP,4\n" "\tFSTP ST0\t # Restore FPU Stack" %} ins_cost(250); ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); ins_pipe( pipe_slow ); %} //----------Arithmetic Conversion Instructions--------------------------------- // The conversions operations are all Alpha sorted. Please keep it that way! instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (RoundFloat src)); ins_cost(125); format %{ "FST_S $dst,$src\t# F-round" %} ins_encode( Pop_Mem_Reg_FPR(dst, src) ); ins_pipe( fpu_mem_reg ); %} instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (RoundDouble src)); ins_cost(125); format %{ "FST_D $dst,$src\t# D-round" %} ins_encode( Pop_Mem_Reg_DPR(dst, src) ); ins_pipe( fpu_mem_reg ); %} // Force rounding to 24-bit precision and 6-bit exponent instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ predicate(UseSSE==0); match(Set dst (ConvD2F src)); format %{ "FST_S $dst,$src\t# F-round" %} expand %{ roundFloat_mem_reg(dst,src); %} %} // Force rounding to 24-bit precision and 6-bit exponent instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ predicate(UseSSE==1); match(Set dst (ConvD2F src)); effect( KILL cr ); format %{ "SUB ESP,4\n\t" "FST_S [ESP],$src\t# F-round\n\t" "MOVSS $dst,[ESP]\n\t" "ADD ESP,4" %} ins_encode %{ __ subptr(rsp, 4); if ($src$$reg != FPR1L_enc) { __ fld_s($src$$reg-1); __ fstp_s(Address(rsp, 0)); } else { __ fst_s(Address(rsp, 0)); } __ movflt($dst$$XMMRegister, Address(rsp, 0)); __ addptr(rsp, 4); %} ins_pipe( pipe_slow ); %} // Force rounding double precision to single precision instruct convD2F_reg(regF dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (ConvD2F src)); format %{ "CVTSD2SS $dst,$src\t# F-round" %} ins_encode %{ __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (ConvF2D src)); format %{ "FST_S $dst,$src\t# D-round" %} ins_encode( Pop_Reg_Reg_DPR(dst, src)); ins_pipe( fpu_reg_reg ); %} instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ predicate(UseSSE==1); match(Set dst (ConvF2D src)); format %{ "FST_D $dst,$src\t# D-round" %} expand %{ roundDouble_mem_reg(dst,src); %} %} instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ predicate(UseSSE==1); match(Set dst (ConvF2D src)); effect( KILL cr ); format %{ "SUB ESP,4\n\t" "MOVSS [ESP] $src\n\t" "FLD_S [ESP]\n\t" "ADD ESP,4\n\t" "FSTP $dst\t# D-round" %} ins_encode %{ __ subptr(rsp, 4); __ movflt(Address(rsp, 0), $src$$XMMRegister); __ fld_s(Address(rsp, 0)); __ addptr(rsp, 4); __ fstp_d($dst$$reg); %} ins_pipe( pipe_slow ); %} instruct convF2D_reg(regD dst, regF src) %{ predicate(UseSSE>=2); match(Set dst (ConvF2D src)); format %{ "CVTSS2SD $dst,$src\t# D-round" %} ins_encode %{ __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} // Convert a double to an int. If the double is a NAN, stuff a zero in instead. instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ predicate(UseSSE<=1); match(Set dst (ConvD2I src)); effect( KILL tmp, KILL cr ); format %{ "FLD $src\t# Convert double to int \n\t" "FLDCW trunc mode\n\t" "SUB ESP,4\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "CMP EAX,0x80000000\n\t" "JNE,s fast\n\t" "FLD_D $src\n\t" "CALL d2i_wrapper\n" "fast:" %} ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); ins_pipe( pipe_slow ); %} // Convert a double to an int. If the double is a NAN, stuff a zero in instead. instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ predicate(UseSSE>=2); match(Set dst (ConvD2I src)); effect( KILL tmp, KILL cr ); format %{ "CVTTSD2SI $dst, $src\n\t" "CMP $dst,0x80000000\n\t" "JNE,s fast\n\t" "SUB ESP, 8\n\t" "MOVSD [ESP], $src\n\t" "FLD_D [ESP]\n\t" "ADD ESP, 8\n\t" "CALL d2i_wrapper\n" "fast:" %} ins_encode %{ Label fast; __ cvttsd2sil($dst$$Register, $src$$XMMRegister); __ cmpl($dst$$Register, 0x80000000); __ jccb(Assembler::notEqual, fast); __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); __ addptr(rsp, 8); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); __ bind(fast); %} ins_pipe( pipe_slow ); %} instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ predicate(UseSSE<=1); match(Set dst (ConvD2L src)); effect( KILL cr ); format %{ "FLD $src\t# Convert double to long\n\t" "FLDCW trunc mode\n\t" "SUB ESP,8\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "POP EDX\n\t" "CMP EDX,0x80000000\n\t" "JNE,s fast\n\t" "TEST EAX,EAX\n\t" "JNE,s fast\n\t" "FLD $src\n\t" "CALL d2l_wrapper\n" "fast:" %} ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); ins_pipe( pipe_slow ); %} // XMM lacks a float/double->long conversion, so use the old FPU stack. instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ predicate (UseSSE>=2); match(Set dst (ConvD2L src)); effect( KILL cr ); format %{ "SUB ESP,8\t# Convert double to long\n\t" "MOVSD [ESP],$src\n\t" "FLD_D [ESP]\n\t" "FLDCW trunc mode\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "POP EDX\n\t" "CMP EDX,0x80000000\n\t" "JNE,s fast\n\t" "TEST EAX,EAX\n\t" "JNE,s fast\n\t" "SUB ESP,8\n\t" "MOVSD [ESP],$src\n\t" "FLD_D [ESP]\n\t" "ADD ESP,8\n\t" "CALL d2l_wrapper\n" "fast:" %} ins_encode %{ Label fast; __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); __ fistp_d(Address(rsp, 0)); // Restore the rounding mode, mask the exception if (Compile::current()->in_24_bit_fp_mode()) { __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); } else { __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); } // Load the converted long, adjust CPU stack __ pop(rax); __ pop(rdx); __ cmpl(rdx, 0x80000000); __ jccb(Assembler::notEqual, fast); __ testl(rax, rax); __ jccb(Assembler::notEqual, fast); __ subptr(rsp, 8); __ movdbl(Address(rsp, 0), $src$$XMMRegister); __ fld_d(Address(rsp, 0)); __ addptr(rsp, 8); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); __ bind(fast); %} ins_pipe( pipe_slow ); %} // Convert a double to an int. Java semantics require we do complex // manglations in the corner cases. So we set the rounding mode to // 'zero', store the darned double down as an int, and reset the // rounding mode to 'nearest'. The hardware stores a flag value down // if we would overflow or converted a NAN; we check for this and // and go the slow path if needed. instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ predicate(UseSSE==0); match(Set dst (ConvF2I src)); effect( KILL tmp, KILL cr ); format %{ "FLD $src\t# Convert float to int \n\t" "FLDCW trunc mode\n\t" "SUB ESP,4\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "CMP EAX,0x80000000\n\t" "JNE,s fast\n\t" "FLD $src\n\t" "CALL d2i_wrapper\n" "fast:" %} // DPR2I_encoding works for FPR2I ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); ins_pipe( pipe_slow ); %} // Convert a float in xmm to an int reg. instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ predicate(UseSSE>=1); match(Set dst (ConvF2I src)); effect( KILL tmp, KILL cr ); format %{ "CVTTSS2SI $dst, $src\n\t" "CMP $dst,0x80000000\n\t" "JNE,s fast\n\t" "SUB ESP, 4\n\t" "MOVSS [ESP], $src\n\t" "FLD [ESP]\n\t" "ADD ESP, 4\n\t" "CALL d2i_wrapper\n" "fast:" %} ins_encode %{ Label fast; __ cvttss2sil($dst$$Register, $src$$XMMRegister); __ cmpl($dst$$Register, 0x80000000); __ jccb(Assembler::notEqual, fast); __ subptr(rsp, 4); __ movflt(Address(rsp, 0), $src$$XMMRegister); __ fld_s(Address(rsp, 0)); __ addptr(rsp, 4); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); __ bind(fast); %} ins_pipe( pipe_slow ); %} instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ predicate(UseSSE==0); match(Set dst (ConvF2L src)); effect( KILL cr ); format %{ "FLD $src\t# Convert float to long\n\t" "FLDCW trunc mode\n\t" "SUB ESP,8\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "POP EDX\n\t" "CMP EDX,0x80000000\n\t" "JNE,s fast\n\t" "TEST EAX,EAX\n\t" "JNE,s fast\n\t" "FLD $src\n\t" "CALL d2l_wrapper\n" "fast:" %} // DPR2L_encoding works for FPR2L ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); ins_pipe( pipe_slow ); %} // XMM lacks a float/double->long conversion, so use the old FPU stack. instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ predicate (UseSSE>=1); match(Set dst (ConvF2L src)); effect( KILL cr ); format %{ "SUB ESP,8\t# Convert float to long\n\t" "MOVSS [ESP],$src\n\t" "FLD_S [ESP]\n\t" "FLDCW trunc mode\n\t" "FISTp [ESP + #0]\n\t" "FLDCW std/24-bit mode\n\t" "POP EAX\n\t" "POP EDX\n\t" "CMP EDX,0x80000000\n\t" "JNE,s fast\n\t" "TEST EAX,EAX\n\t" "JNE,s fast\n\t" "SUB ESP,4\t# Convert float to long\n\t" "MOVSS [ESP],$src\n\t" "FLD_S [ESP]\n\t" "ADD ESP,4\n\t" "CALL d2l_wrapper\n" "fast:" %} ins_encode %{ Label fast; __ subptr(rsp, 8); __ movflt(Address(rsp, 0), $src$$XMMRegister); __ fld_s(Address(rsp, 0)); __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); __ fistp_d(Address(rsp, 0)); // Restore the rounding mode, mask the exception if (Compile::current()->in_24_bit_fp_mode()) { __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); } else { __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); } // Load the converted long, adjust CPU stack __ pop(rax); __ pop(rdx); __ cmpl(rdx, 0x80000000); __ jccb(Assembler::notEqual, fast); __ testl(rax, rax); __ jccb(Assembler::notEqual, fast); __ subptr(rsp, 4); __ movflt(Address(rsp, 0), $src$$XMMRegister); __ fld_s(Address(rsp, 0)); __ addptr(rsp, 4); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); __ bind(fast); %} ins_pipe( pipe_slow ); %} instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ predicate( UseSSE<=1 ); match(Set dst (ConvI2D src)); format %{ "FILD $src\n\t" "FSTP $dst" %} opcode(0xDB, 0x0); /* DB /0 */ ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); ins_pipe( fpu_reg_mem ); %} instruct convI2D_reg(regD dst, rRegI src) %{ predicate( UseSSE>=2 && !UseXmmI2D ); match(Set dst (ConvI2D src)); format %{ "CVTSI2SD $dst,$src" %} ins_encode %{ __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); %} ins_pipe( pipe_slow ); %} instruct convI2D_mem(regD dst, memory mem) %{ predicate( UseSSE>=2 ); match(Set dst (ConvI2D (LoadI mem))); format %{ "CVTSI2SD $dst,$mem" %} ins_encode %{ __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); %} ins_pipe( pipe_slow ); %} instruct convXI2D_reg(regD dst, rRegI src) %{ predicate( UseSSE>=2 && UseXmmI2D ); match(Set dst (ConvI2D src)); format %{ "MOVD $dst,$src\n\t" "CVTDQ2PD $dst,$dst\t# i2d" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe(pipe_slow); // XXX %} instruct convI2DPR_mem(regDPR dst, memory mem) %{ predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2D (LoadI mem))); format %{ "FILD $mem\n\t" "FSTP $dst" %} opcode(0xDB); /* DB /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Pop_Reg_DPR(dst)); ins_pipe( fpu_reg_mem ); %} // Convert a byte to a float; no rounding step needed. instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); match(Set dst (ConvI2F src)); format %{ "FILD $src\n\t" "FSTP $dst" %} opcode(0xDB, 0x0); /* DB /0 */ ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); ins_pipe( fpu_reg_mem ); %} // In 24-bit mode, force exponent rounding by storing back out instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F src)); ins_cost(200); format %{ "FILD $src\n\t" "FSTP_S $dst" %} opcode(0xDB, 0x0); /* DB /0 */ ins_encode( Push_Mem_I(src), Pop_Mem_FPR(dst)); ins_pipe( fpu_mem_mem ); %} // In 24-bit mode, force exponent rounding by storing back out instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F (LoadI mem))); ins_cost(200); format %{ "FILD $mem\n\t" "FSTP_S $dst" %} opcode(0xDB); /* DB /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Pop_Mem_FPR(dst)); ins_pipe( fpu_mem_mem ); %} // This instruction does not round to 24-bits instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F src)); format %{ "FILD $src\n\t" "FSTP $dst" %} opcode(0xDB, 0x0); /* DB /0 */ ins_encode( Push_Mem_I(src), Pop_Reg_FPR(dst)); ins_pipe( fpu_reg_mem ); %} // This instruction does not round to 24-bits instruct convI2FPR_mem(regFPR dst, memory mem) %{ predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2F (LoadI mem))); format %{ "FILD $mem\n\t" "FSTP $dst" %} opcode(0xDB); /* DB /0 */ ins_encode( OpcP, RMopc_Mem(0x00,mem), Pop_Reg_FPR(dst)); ins_pipe( fpu_reg_mem ); %} // Convert an int to a float in xmm; no rounding step needed. instruct convI2F_reg(regF dst, rRegI src) %{ predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); match(Set dst (ConvI2F src)); format %{ "CVTSI2SS $dst, $src" %} ins_encode %{ __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); %} ins_pipe( pipe_slow ); %} instruct convXI2F_reg(regF dst, rRegI src) %{ predicate( UseSSE>=2 && UseXmmI2F ); match(Set dst (ConvI2F src)); format %{ "MOVD $dst,$src\n\t" "CVTDQ2PS $dst,$dst\t# i2f" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe(pipe_slow); // XXX %} instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ match(Set dst (ConvI2L src)); effect(KILL cr); ins_cost(375); format %{ "MOV $dst.lo,$src\n\t" "MOV $dst.hi,$src\n\t" "SAR $dst.hi,31" %} ins_encode(convert_int_long(dst,src)); ins_pipe( ialu_reg_reg_long ); %} // Zero-extend convert int to long instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ match(Set dst (AndL (ConvI2L src) mask) ); effect( KILL flags ); ins_cost(250); format %{ "MOV $dst.lo,$src\n\t" "XOR $dst.hi,$dst.hi" %} opcode(0x33); // XOR ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); ins_pipe( ialu_reg_reg_long ); %} // Zero-extend long instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ match(Set dst (AndL src mask) ); effect( KILL flags ); ins_cost(250); format %{ "MOV $dst.lo,$src.lo\n\t" "XOR $dst.hi,$dst.hi\n\t" %} opcode(0x33); // XOR ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); ins_pipe( ialu_reg_reg_long ); %} instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ predicate (UseSSE<=1); match(Set dst (ConvL2D src)); effect( KILL cr ); format %{ "PUSH $src.hi\t# Convert long to double\n\t" "PUSH $src.lo\n\t" "FILD ST,[ESP + #0]\n\t" "ADD ESP,8\n\t" "FSTP_D $dst\t# D-round" %} opcode(0xDF, 0x5); /* DF /5 */ ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); ins_pipe( pipe_slow ); %} instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (ConvL2D src)); effect( KILL cr ); format %{ "PUSH $src.hi\t# Convert long to double\n\t" "PUSH $src.lo\n\t" "FILD_D [ESP]\n\t" "FSTP_D [ESP]\n\t" "MOVSD $dst,[ESP]\n\t" "ADD ESP,8" %} opcode(0xDF, 0x5); /* DF /5 */ ins_encode(convert_long_double2(src), Push_ResultD(dst)); ins_pipe( pipe_slow ); %} instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ predicate (UseSSE>=1); match(Set dst (ConvL2F src)); effect( KILL cr ); format %{ "PUSH $src.hi\t# Convert long to single float\n\t" "PUSH $src.lo\n\t" "FILD_D [ESP]\n\t" "FSTP_S [ESP]\n\t" "MOVSS $dst,[ESP]\n\t" "ADD ESP,8" %} opcode(0xDF, 0x5); /* DF /5 */ ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); ins_pipe( pipe_slow ); %} instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ match(Set dst (ConvL2F src)); effect( KILL cr ); format %{ "PUSH $src.hi\t# Convert long to single float\n\t" "PUSH $src.lo\n\t" "FILD ST,[ESP + #0]\n\t" "ADD ESP,8\n\t" "FSTP_S $dst\t# F-round" %} opcode(0xDF, 0x5); /* DF /5 */ ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); ins_pipe( pipe_slow ); %} instruct convL2I_reg( rRegI dst, eRegL src ) %{ match(Set dst (ConvL2I src)); effect( DEF dst, USE src ); format %{ "MOV $dst,$src.lo" %} ins_encode(enc_CopyL_Lo(dst,src)); ins_pipe( ialu_reg_reg ); %} instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(100); format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} ins_encode %{ __ movl($dst$$Register, Address(rsp, $src$$disp)); %} ins_pipe( ialu_reg_mem ); %} instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ predicate(UseSSE==0); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(125); format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} ins_encode( Pop_Mem_Reg_FPR(dst, src) ); ins_pipe( fpu_mem_reg ); %} instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ predicate(UseSSE>=1); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(95); format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} ins_encode %{ __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ predicate(UseSSE>=2); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(85); format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); ins_cost(100); format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} ins_encode %{ __ movl(Address(rsp, $dst$$disp), $src$$Register); %} ins_pipe( ialu_mem_reg ); %} instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ predicate(UseSSE==0); match(Set dst (MoveI2F src)); effect(DEF dst, USE src); ins_cost(125); format %{ "FLD_S $src\n\t" "FSTP $dst\t# MoveI2F_stack_reg" %} opcode(0xD9); /* D9 /0, FLD m32real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), Pop_Reg_FPR(dst) ); ins_pipe( fpu_reg_mem ); %} instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ predicate(UseSSE>=1); match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); ins_cost(95); format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} ins_encode %{ __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); %} ins_pipe( pipe_slow ); %} instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ predicate(UseSSE>=2); match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); ins_cost(85); format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %} ins_pipe( pipe_slow ); %} instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ match(Set dst (MoveD2L src)); effect(DEF dst, USE src); ins_cost(250); format %{ "MOV $dst.lo,$src\n\t" "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} opcode(0x8B, 0x8B); ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); ins_pipe( ialu_mem_long_reg ); %} instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ predicate(UseSSE<=1); match(Set dst (MoveD2L src)); effect(DEF dst, USE src); ins_cost(125); format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} ins_encode( Pop_Mem_Reg_DPR(dst, src) ); ins_pipe( fpu_mem_reg ); %} instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ predicate(UseSSE>=2); match(Set dst (MoveD2L src)); effect(DEF dst, USE src); ins_cost(95); format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} ins_encode %{ __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ predicate(UseSSE>=2); match(Set dst (MoveD2L src)); effect(DEF dst, USE src, TEMP tmp); ins_cost(85); format %{ "MOVD $dst.lo,$src\n\t" "PSHUFLW $tmp,$src,0x4E\n\t" "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ match(Set dst (MoveL2D src)); effect(DEF dst, USE src); ins_cost(200); format %{ "MOV $dst,$src.lo\n\t" "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} opcode(0x89, 0x89); ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); ins_pipe( ialu_mem_long_reg ); %} instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ predicate(UseSSE<=1); match(Set dst (MoveL2D src)); effect(DEF dst, USE src); ins_cost(125); format %{ "FLD_D $src\n\t" "FSTP $dst\t# MoveL2D_stack_reg" %} opcode(0xDD); /* DD /0, FLD m64real */ ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), Pop_Reg_DPR(dst) ); ins_pipe( fpu_reg_mem ); %} instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); match(Set dst (MoveL2D src)); effect(DEF dst, USE src); ins_cost(95); format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} ins_encode %{ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); %} ins_pipe( pipe_slow ); %} instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); match(Set dst (MoveL2D src)); effect(DEF dst, USE src); ins_cost(95); format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} ins_encode %{ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); %} ins_pipe( pipe_slow ); %} instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ predicate(UseSSE>=2); match(Set dst (MoveL2D src)); effect(TEMP dst, USE src, TEMP tmp); ins_cost(85); format %{ "MOVD $dst,$src.lo\n\t" "MOVD $tmp,$src.hi\n\t" "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} // ======================================================================= // fast clearing of an array instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ predicate(!UseFastStosb); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); format %{ "XOR EAX,EAX\t# ClearArray:\n\t" "SHL ECX,1\t# Convert doublewords to words\n\t" "REP STOS\t# store EAX into [EDI++] while ECX--" %} ins_encode %{ __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); %} ins_pipe( pipe_slow ); %} instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ predicate(UseFastStosb); match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); format %{ "XOR EAX,EAX\t# ClearArray:\n\t" "SHL ECX,3\t# Convert doublewords to bytes\n\t" "REP STOSB\t# store EAX into [EDI++] while ECX--" %} ins_encode %{ __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); %} ins_pipe( pipe_slow ); %} instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, eAXRegI result, regD tmp1, eFlagsReg cr) %{ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} ins_encode %{ __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, $result$$Register, $tmp1$$XMMRegister, StrIntrinsicNode::LL); %} ins_pipe( pipe_slow ); %} instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, eAXRegI result, regD tmp1, eFlagsReg cr) %{ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} ins_encode %{ __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, $result$$Register, $tmp1$$XMMRegister, StrIntrinsicNode::UU); %} ins_pipe( pipe_slow ); %} instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, eAXRegI result, regD tmp1, eFlagsReg cr) %{ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} ins_encode %{ __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, $result$$Register, $tmp1$$XMMRegister, StrIntrinsicNode::LU); %} ins_pipe( pipe_slow ); %} instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, eAXRegI result, regD tmp1, eFlagsReg cr) %{ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} ins_encode %{ __ string_compare($str2$$Register, $str1$$Register, $cnt2$$Register, $cnt1$$Register, $result$$Register, $tmp1$$XMMRegister, StrIntrinsicNode::UL); %} ins_pipe( pipe_slow ); %} // fast string equals instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ match(Set result (StrEquals (Binary str1 str2) cnt)); effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} ins_encode %{ __ arrays_equals(false, $str1$$Register, $str2$$Register, $cnt$$Register, $result$$Register, $tmp3$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); %} ins_pipe( pipe_slow ); %} // fast search of substring with known size. instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} ins_encode %{ int icnt2 = (int)$int_cnt2$$constant; if (icnt2 >= 16) { // IndexOf for constant substrings with size >= 16 elements // which don't need to be loaded through stack. __ string_indexofC8($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, icnt2, $result$$Register, $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); } else { // Small strings are loaded through stack if they cross page boundary. __ string_indexof($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, icnt2, $result$$Register, $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); } %} ins_pipe( pipe_slow ); %} // fast search of substring with known size. instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} ins_encode %{ int icnt2 = (int)$int_cnt2$$constant; if (icnt2 >= 8) { // IndexOf for constant substrings with size >= 8 elements // which don't need to be loaded through stack. __ string_indexofC8($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, icnt2, $result$$Register, $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); } else { // Small strings are loaded through stack if they cross page boundary. __ string_indexof($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, icnt2, $result$$Register, $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); } %} ins_pipe( pipe_slow ); %} // fast search of substring with known size. instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} ins_encode %{ int icnt2 = (int)$int_cnt2$$constant; if (icnt2 >= 8) { // IndexOf for constant substrings with size >= 8 elements // which don't need to be loaded through stack. __ string_indexofC8($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, icnt2, $result$$Register, $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); } else { // Small strings are loaded through stack if they cross page boundary. __ string_indexof($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, icnt2, $result$$Register, $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); } %} ins_pipe( pipe_slow ); %} instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} ins_encode %{ __ string_indexof($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, (-1), $result$$Register, $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); %} ins_pipe( pipe_slow ); %} instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} ins_encode %{ __ string_indexof($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, (-1), $result$$Register, $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); %} ins_pipe( pipe_slow ); %} instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} ins_encode %{ __ string_indexof($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register, (-1), $result$$Register, $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); %} ins_pipe( pipe_slow ); %} instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ predicate(UseSSE42Intrinsics); match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} ins_encode %{ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); %} ins_pipe( pipe_slow ); %} // fast array equals instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) %{ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} ins_encode %{ __ arrays_equals(true, $ary1$$Register, $ary2$$Register, $tmp3$$Register, $result$$Register, $tmp4$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); %} ins_pipe( pipe_slow ); %} instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) %{ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (AryEq ary1 ary2)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} ins_encode %{ __ arrays_equals(true, $ary1$$Register, $ary2$$Register, $tmp3$$Register, $result$$Register, $tmp4$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); %} ins_pipe( pipe_slow ); %} instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ match(Set result (HasNegatives ary1 len)); effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} ins_encode %{ __ has_negatives($ary1$$Register, $len$$Register, $result$$Register, $tmp3$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} // fast char[] to byte[] compression instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ match(Set result (StrCompressedCopy src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} ins_encode %{ __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); %} ins_pipe( pipe_slow ); %} // fast byte[] to char[] inflation instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ match(Set dummy (StrInflatedCopy src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} ins_encode %{ __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, $tmp1$$XMMRegister, $tmp2$$Register); %} ins_pipe( pipe_slow ); %} // encode char[] to byte[] in ISO_8859_1 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} ins_encode %{ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); %} ins_pipe( pipe_slow ); %} //----------Control Flow Instructions------------------------------------------ // Signed compare Instructions instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ match(Set cr (CmpI op1 op2)); effect( DEF cr, USE op1, USE op2 ); format %{ "CMP $op1,$op2" %} opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegReg( op1, op2) ); ins_pipe( ialu_cr_reg_reg ); %} instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ match(Set cr (CmpI op1 op2)); effect( DEF cr, USE op1 ); format %{ "CMP $op1,$op2" %} opcode(0x81,0x07); /* Opcode 81 /7 */ // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); ins_pipe( ialu_cr_reg_imm ); %} // Cisc-spilled version of cmpI_eReg instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ match(Set cr (CmpI op1 (LoadI op2))); format %{ "CMP $op1,$op2" %} ins_cost(500); opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegMem( op1, op2) ); ins_pipe( ialu_cr_reg_mem ); %} instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ match(Set cr (CmpI src zero)); effect( DEF cr, USE src ); format %{ "TEST $src,$src" %} opcode(0x85); ins_encode( OpcP, RegReg( src, src ) ); ins_pipe( ialu_cr_reg_imm ); %} instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ match(Set cr (CmpI (AndI src con) zero)); format %{ "TEST $src,$con" %} opcode(0xF7,0x00); ins_encode( OpcP, RegOpc(src), Con32(con) ); ins_pipe( ialu_cr_reg_imm ); %} instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ match(Set cr (CmpI (AndI src mem) zero)); format %{ "TEST $src,$mem" %} opcode(0x85); ins_encode( OpcP, RegMem( src, mem ) ); ins_pipe( ialu_cr_reg_mem ); %} // Unsigned compare Instructions; really, same as signed except they // produce an eFlagsRegU instead of eFlagsReg. instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ match(Set cr (CmpU op1 op2)); format %{ "CMPu $op1,$op2" %} opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegReg( op1, op2) ); ins_pipe( ialu_cr_reg_reg ); %} instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ match(Set cr (CmpU op1 op2)); format %{ "CMPu $op1,$op2" %} opcode(0x81,0x07); /* Opcode 81 /7 */ ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); ins_pipe( ialu_cr_reg_imm ); %} // // Cisc-spilled version of cmpU_eReg instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ match(Set cr (CmpU op1 (LoadI op2))); format %{ "CMPu $op1,$op2" %} ins_cost(500); opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegMem( op1, op2) ); ins_pipe( ialu_cr_reg_mem ); %} // // Cisc-spilled version of cmpU_eReg //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ // match(Set cr (CmpU (LoadI op1) op2)); // // format %{ "CMPu $op1,$op2" %} // ins_cost(500); // opcode(0x39); /* Opcode 39 /r */ // ins_encode( OpcP, RegMem( op1, op2) ); //%} instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ match(Set cr (CmpU src zero)); format %{ "TESTu $src,$src" %} opcode(0x85); ins_encode( OpcP, RegReg( src, src ) ); ins_pipe( ialu_cr_reg_imm ); %} // Unsigned pointer compare Instructions instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ match(Set cr (CmpP op1 op2)); format %{ "CMPu $op1,$op2" %} opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegReg( op1, op2) ); ins_pipe( ialu_cr_reg_reg ); %} instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ match(Set cr (CmpP op1 op2)); format %{ "CMPu $op1,$op2" %} opcode(0x81,0x07); /* Opcode 81 /7 */ ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); ins_pipe( ialu_cr_reg_imm ); %} // // Cisc-spilled version of cmpP_eReg instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ match(Set cr (CmpP op1 (LoadP op2))); format %{ "CMPu $op1,$op2" %} ins_cost(500); opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegMem( op1, op2) ); ins_pipe( ialu_cr_reg_mem ); %} // // Cisc-spilled version of cmpP_eReg //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ // match(Set cr (CmpP (LoadP op1) op2)); // // format %{ "CMPu $op1,$op2" %} // ins_cost(500); // opcode(0x39); /* Opcode 39 /r */ // ins_encode( OpcP, RegMem( op1, op2) ); //%} // Compare raw pointer (used in out-of-heap check). // Only works because non-oop pointers must be raw pointers // and raw pointers have no anti-dependencies. instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); match(Set cr (CmpP op1 (LoadP op2))); format %{ "CMPu $op1,$op2" %} opcode(0x3B); /* Opcode 3B /r */ ins_encode( OpcP, RegMem( op1, op2) ); ins_pipe( ialu_cr_reg_mem ); %} // // This will generate a signed flags result. This should be ok // since any compare to a zero should be eq/neq. instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ match(Set cr (CmpP src zero)); format %{ "TEST $src,$src" %} opcode(0x85); ins_encode( OpcP, RegReg( src, src ) ); ins_pipe( ialu_cr_reg_imm ); %} // Cisc-spilled version of testP_reg // This will generate a signed flags result. This should be ok // since any compare to a zero should be eq/neq. instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ match(Set cr (CmpP (LoadP op) zero)); format %{ "TEST $op,0xFFFFFFFF" %} ins_cost(500); opcode(0xF7); /* Opcode F7 /0 */ ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); ins_pipe( ialu_cr_reg_imm ); %} // Yanked all unsigned pointer compare operations. // Pointer compares are done with CmpP which is already unsigned. //----------Max and Min-------------------------------------------------------- // Min Instructions //// // *** Min and Max using the conditional move are slower than the // *** branch version on a Pentium III. // // Conditional move for min //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ // effect( USE_DEF op2, USE op1, USE cr ); // format %{ "CMOVlt $op2,$op1\t! min" %} // opcode(0x4C,0x0F); // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); // ins_pipe( pipe_cmov_reg ); //%} // //// Min Register with Register (P6 version) //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ // predicate(VM_Version::supports_cmov() ); // match(Set op2 (MinI op1 op2)); // ins_cost(200); // expand %{ // eFlagsReg cr; // compI_eReg(cr,op1,op2); // cmovI_reg_lt(op2,op1,cr); // %} //%} // Min Register with Register (generic version) instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ match(Set dst (MinI dst src)); effect(KILL flags); ins_cost(300); format %{ "MIN $dst,$src" %} opcode(0xCC); ins_encode( min_enc(dst,src) ); ins_pipe( pipe_slow ); %} // Max Register with Register // *** Min and Max using the conditional move are slower than the // *** branch version on a Pentium III. // // Conditional move for max //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ // effect( USE_DEF op2, USE op1, USE cr ); // format %{ "CMOVgt $op2,$op1\t! max" %} // opcode(0x4F,0x0F); // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); // ins_pipe( pipe_cmov_reg ); //%} // // // Max Register with Register (P6 version) //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ // predicate(VM_Version::supports_cmov() ); // match(Set op2 (MaxI op1 op2)); // ins_cost(200); // expand %{ // eFlagsReg cr; // compI_eReg(cr,op1,op2); // cmovI_reg_gt(op2,op1,cr); // %} //%} // Max Register with Register (generic version) instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ match(Set dst (MaxI dst src)); effect(KILL flags); ins_cost(300); format %{ "MAX $dst,$src" %} opcode(0xCC); ins_encode( max_enc(dst,src) ); ins_pipe( pipe_slow ); %} // ============================================================================ // Counted Loop limit node which represents exact final iterator value. // Note: the resulting value should fit into integer range since // counted loops have limit check on overflow. instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ match(Set limit (LoopLimit (Binary init limit) stride)); effect(TEMP limit_hi, TEMP tmp, KILL flags); ins_cost(300); format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} ins_encode %{ int strd = (int)$stride$$constant; assert(strd != 1 && strd != -1, "sanity"); int m1 = (strd > 0) ? 1 : -1; // Convert limit to long (EAX:EDX) __ cdql(); // Convert init to long (init:tmp) __ movl($tmp$$Register, $init$$Register); __ sarl($tmp$$Register, 31); // $limit - $init __ subl($limit$$Register, $init$$Register); __ sbbl($limit_hi$$Register, $tmp$$Register); // + ($stride - 1) if (strd > 0) { __ addl($limit$$Register, (strd - 1)); __ adcl($limit_hi$$Register, 0); __ movl($tmp$$Register, strd); } else { __ addl($limit$$Register, (strd + 1)); __ adcl($limit_hi$$Register, -1); __ lneg($limit_hi$$Register, $limit$$Register); __ movl($tmp$$Register, -strd); } // signed devision: (EAX:EDX) / pos_stride __ idivl($tmp$$Register); if (strd < 0) { // restore sign __ negl($tmp$$Register); } // (EAX) * stride __ mull($tmp$$Register); // + init (ignore upper bits) __ addl($limit$$Register, $init$$Register); %} ins_pipe( pipe_slow ); %} // ============================================================================ // Branch Instructions // Jump Table instruct jumpXtnd(rRegI switch_val) %{ match(Jump switch_val); ins_cost(350); format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} ins_encode %{ // Jump to Address(table_base + switch_reg) Address index(noreg, $switch_val$$Register, Address::times_1); __ jump(ArrayAddress($constantaddress, index)); %} ins_pipe(pipe_jmp); %} // Jump Direct - Label defines a relative address from JMP+1 instruct jmpDir(label labl) %{ match(Goto); effect(USE labl); ins_cost(300); format %{ "JMP $labl" %} size(5); ins_encode %{ Label* L = $labl$$label; __ jmp(*L, false); // Always long jump %} ins_pipe( pipe_jmp ); %} // Jump Direct Conditional - Label defines a relative address from Jcc+1 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ match(If cop cr); effect(USE labl); ins_cost(300); format %{ "J$cop $labl" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe( pipe_jcc ); %} // Jump Direct Conditional - Label defines a relative address from Jcc+1 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ match(CountedLoopEnd cop cr); effect(USE labl); ins_cost(300); format %{ "J$cop $labl\t# Loop end" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe( pipe_jcc ); %} // Jump Direct Conditional - Label defines a relative address from Jcc+1 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ match(CountedLoopEnd cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,u $labl\t# Loop end" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe( pipe_jcc ); %} instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ match(CountedLoopEnd cop cmp); effect(USE labl); ins_cost(200); format %{ "J$cop,u $labl\t# Loop end" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe( pipe_jcc ); %} // Jump Direct Conditional - using unsigned comparison instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,u $labl" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe(pipe_jcc); %} instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(200); format %{ "J$cop,u $labl" %} size(6); ins_encode %{ Label* L = $labl$$label; __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump %} ins_pipe(pipe_jcc); %} instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(200); format %{ $$template if ($cop$$cmpcode == Assembler::notEqual) { $$emit$$"JP,u $labl\n\t" $$emit$$"J$cop,u $labl" } else { $$emit$$"JP,u done\n\t" $$emit$$"J$cop,u $labl\n\t" $$emit$$"done:" } %} ins_encode %{ Label* l = $labl$$label; if ($cop$$cmpcode == Assembler::notEqual) { __ jcc(Assembler::parity, *l, false); __ jcc(Assembler::notEqual, *l, false); } else if ($cop$$cmpcode == Assembler::equal) { Label done; __ jccb(Assembler::parity, done); __ jcc(Assembler::equal, *l, false); __ bind(done); } else { ShouldNotReachHere(); } %} ins_pipe(pipe_jcc); %} // ============================================================================ // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass // array for an instance of the superklass. Set a hidden internal cache on a // hit (cache is checked with exposed code in gen_subtype_check()). Return // NZ for a miss or zero for a hit. The encoding ALSO sets flags. instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ match(Set result (PartialSubtypeCheck sub super)); effect( KILL rcx, KILL cr ); ins_cost(1100); // slightly larger than the next version format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" "JNE,s miss\t\t# Missed: EDI not-zero\n\t" "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" "XOR $result,$result\t\t Hit: EDI zero\n\t" "miss:\t" %} opcode(0x1); // Force a XOR of EDI ins_encode( enc_PartialSubtypeCheck() ); ins_pipe( pipe_slow ); %} instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); effect( KILL rcx, KILL result ); ins_cost(1000); format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" "JNE,s miss\t\t# Missed: flags NZ\n\t" "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" "miss:\t" %} opcode(0x0); // No need to XOR EDI ins_encode( enc_PartialSubtypeCheck() ); ins_pipe( pipe_slow ); %} // ============================================================================ // Branch Instructions -- short offset versions // // These instructions are used to replace jumps of a long offset (the default // match) with jumps of a shorter offset. These instructions are all tagged // with the ins_short_branch attribute, which causes the ADLC to suppress the // match rules in general matching. Instead, the ADLC generates a conversion // method in the MachNode which can be used to do in-place replacement of the // long variant with the shorter variant. The compiler will determine if a // branch can be taken by the is_short_branch_offset() predicate in the machine // specific code section of the file. // Jump Direct - Label defines a relative address from JMP+1 instruct jmpDir_short(label labl) %{ match(Goto); effect(USE labl); ins_cost(300); format %{ "JMP,s $labl" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jmpb(*L); %} ins_pipe( pipe_jmp ); ins_short_branch(1); %} // Jump Direct Conditional - Label defines a relative address from Jcc+1 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ match(If cop cr); effect(USE labl); ins_cost(300); format %{ "J$cop,s $labl" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} // Jump Direct Conditional - Label defines a relative address from Jcc+1 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ match(CountedLoopEnd cop cr); effect(USE labl); ins_cost(300); format %{ "J$cop,s $labl\t# Loop end" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} // Jump Direct Conditional - Label defines a relative address from Jcc+1 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ match(CountedLoopEnd cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,us $labl\t# Loop end" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ match(CountedLoopEnd cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,us $labl\t# Loop end" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} // Jump Direct Conditional - using unsigned comparison instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,us $labl" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(300); format %{ "J$cop,us $labl" %} size(2); ins_encode %{ Label* L = $labl$$label; __ jccb((Assembler::Condition)($cop$$cmpcode), *L); %} ins_pipe( pipe_jcc ); ins_short_branch(1); %} instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ match(If cop cmp); effect(USE labl); ins_cost(300); format %{ $$template if ($cop$$cmpcode == Assembler::notEqual) { $$emit$$"JP,u,s $labl\n\t" $$emit$$"J$cop,u,s $labl" } else { $$emit$$"JP,u,s done\n\t" $$emit$$"J$cop,u,s $labl\n\t" $$emit$$"done:" } %} size(4); ins_encode %{ Label* l = $labl$$label; if ($cop$$cmpcode == Assembler::notEqual) { __ jccb(Assembler::parity, *l); __ jccb(Assembler::notEqual, *l); } else if ($cop$$cmpcode == Assembler::equal) { Label done; __ jccb(Assembler::parity, done); __ jccb(Assembler::equal, *l); __ bind(done); } else { ShouldNotReachHere(); } %} ins_pipe(pipe_jcc); ins_short_branch(1); %} // ============================================================================ // Long Compare // // Currently we hold longs in 2 registers. Comparing such values efficiently // is tricky. The flavor of compare used depends on whether we are testing // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. // The GE test is the negated LT test. The LE test can be had by commuting // the operands (yielding a GE test) and then negating; negate again for the // GT test. The EQ test is done by ORcc'ing the high and low halves, and the // NE test is negated from that. // Due to a shortcoming in the ADLC, it mixes up expressions like: // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the // difference between 'Y' and '0L'. The tree-matches for the CmpI sections // are collapsed internally in the ADLC's dfa-gen code. The match for // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the // foo match ends up with the wrong leaf. One fix is to not match both // reg-reg and reg-zero forms of long-compare. This is unfortunate because // both forms beat the trinary form of long-compare and both are very useful // on Intel which has so few registers. // Manifest a CmpL result in an integer register. Very painful. // This is the test to avoid. instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ match(Set dst (CmpL3 src1 src2)); effect( KILL flags ); ins_cost(1000); format %{ "XOR $dst,$dst\n\t" "CMP $src1.hi,$src2.hi\n\t" "JLT,s m_one\n\t" "JGT,s p_one\n\t" "CMP $src1.lo,$src2.lo\n\t" "JB,s m_one\n\t" "JEQ,s done\n" "p_one:\tINC $dst\n\t" "JMP,s done\n" "m_one:\tDEC $dst\n" "done:" %} ins_encode %{ Label p_one, m_one, done; __ xorptr($dst$$Register, $dst$$Register); __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); __ jccb(Assembler::less, m_one); __ jccb(Assembler::greater, p_one); __ cmpl($src1$$Register, $src2$$Register); __ jccb(Assembler::below, m_one); __ jccb(Assembler::equal, done); __ bind(p_one); __ incrementl($dst$$Register); __ jmpb(done); __ bind(m_one); __ decrementl($dst$$Register); __ bind(done); %} ins_pipe( pipe_slow ); %} //====== // Manifest a CmpL result in the normal flags. Only good for LT or GE // compares. Can be used for LE or GT compares by reversing arguments. // NOT GOOD FOR EQ/NE tests. instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ match( Set flags (CmpL src zero )); ins_cost(100); format %{ "TEST $src.hi,$src.hi" %} opcode(0x85); ins_encode( OpcP, RegReg_Hi2( src, src ) ); ins_pipe( ialu_cr_reg_reg ); %} // Manifest a CmpL result in the normal flags. Only good for LT or GE // compares. Can be used for LE or GT compares by reversing arguments. // NOT GOOD FOR EQ/NE tests. instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ match( Set flags (CmpL src1 src2 )); effect( TEMP tmp ); ins_cost(300); format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" "MOV $tmp,$src1.hi\n\t" "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} ins_encode( long_cmp_flags2( src1, src2, tmp ) ); ins_pipe( ialu_cr_reg_reg ); %} // Long compares reg < zero/req OR reg >= zero/req. // Just a wrapper for a normal branch, plus the predicate test. instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ match(If cmp flags); effect(USE labl); predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); expand %{ jmpCon(cmp,flags,labl); // JLT or JGE... %} %} // Compare 2 longs and CMOVE longs. instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); ins_cost(400); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); ins_pipe( pipe_cmov_reg_long ); %} instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); ins_cost(500); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); ins_pipe( pipe_cmov_reg_long ); %} // Compare 2 longs and CMOVE ints. instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg ); %} instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); ins_pipe( pipe_cmov_mem ); %} // Compare 2 longs and CMOVE ints. instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg ); %} // Compare 2 longs and CMOVE doubles instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovDPR_regS(cmp,flags,dst,src); %} %} // Compare 2 longs and CMOVE doubles instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovD_regS(cmp,flags,dst,src); %} %} instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovFPR_regS(cmp,flags,dst,src); %} %} instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovF_regS(cmp,flags,dst,src); %} %} //====== // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ match( Set flags (CmpL src zero )); effect(TEMP tmp); ins_cost(200); format %{ "MOV $tmp,$src.lo\n\t" "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} ins_encode( long_cmp_flags0( src, tmp ) ); ins_pipe( ialu_reg_reg_long ); %} // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ match( Set flags (CmpL src1 src2 )); ins_cost(200+300); format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" "JNE,s skip\n\t" "CMP $src1.hi,$src2.hi\n\t" "skip:\t" %} ins_encode( long_cmp_flags1( src1, src2 ) ); ins_pipe( ialu_cr_reg_reg ); %} // Long compare reg == zero/reg OR reg != zero/reg // Just a wrapper for a normal branch, plus the predicate test. instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ match(If cmp flags); effect(USE labl); predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); expand %{ jmpCon(cmp,flags,labl); // JEQ or JNE... %} %} // Compare 2 longs and CMOVE longs. instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); ins_cost(400); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); ins_pipe( pipe_cmov_reg_long ); %} instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); ins_cost(500); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); ins_pipe( pipe_cmov_reg_long ); %} // Compare 2 longs and CMOVE ints. instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg ); %} instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); ins_pipe( pipe_cmov_mem ); %} // Compare 2 longs and CMOVE ints. instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg ); %} // Compare 2 longs and CMOVE doubles instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovDPR_regS(cmp,flags,dst,src); %} %} // Compare 2 longs and CMOVE doubles instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovD_regS(cmp,flags,dst,src); %} %} instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovFPR_regS(cmp,flags,dst,src); %} %} instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovF_regS(cmp,flags,dst,src); %} %} //====== // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. // Same as cmpL_reg_flags_LEGT except must negate src instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ match( Set flags (CmpL src zero )); effect( TEMP tmp ); ins_cost(300); format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" "CMP $tmp,$src.lo\n\t" "SBB $tmp,$src.hi\n\t" %} ins_encode( long_cmp_flags3(src, tmp) ); ins_pipe( ialu_reg_reg_long ); %} // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands // requires a commuted test to get the same result. instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ match( Set flags (CmpL src1 src2 )); effect( TEMP tmp ); ins_cost(300); format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" "MOV $tmp,$src2.hi\n\t" "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} ins_encode( long_cmp_flags2( src2, src1, tmp ) ); ins_pipe( ialu_cr_reg_reg ); %} // Long compares reg < zero/req OR reg >= zero/req. // Just a wrapper for a normal branch, plus the predicate test instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ match(If cmp flags); effect(USE labl); predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); ins_cost(300); expand %{ jmpCon(cmp,flags,labl); // JGT or JLE... %} %} // Compare 2 longs and CMOVE longs. instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); ins_cost(400); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); ins_pipe( pipe_cmov_reg_long ); %} instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); ins_cost(500); format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" "CMOV$cmp $dst.hi,$src.hi+4" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); ins_pipe( pipe_cmov_reg_long ); %} // Compare 2 longs and CMOVE ints. instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg ); %} instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); ins_pipe( pipe_cmov_mem ); %} // Compare 2 longs and CMOVE ptrs. instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); ins_cost(200); format %{ "CMOV$cmp $dst,$src" %} opcode(0x0F,0x40); ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); ins_pipe( pipe_cmov_reg ); %} // Compare 2 longs and CMOVE doubles instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovDPR_regS(cmp,flags,dst,src); %} %} // Compare 2 longs and CMOVE doubles instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovD_regS(cmp,flags,dst,src); %} %} instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovFPR_regS(cmp,flags,dst,src); %} %} instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); ins_cost(200); expand %{ fcmovF_regS(cmp,flags,dst,src); %} %} // ============================================================================ // Procedure Call/Return Instructions // Call Java Static Instruction // Note: If this code changes, the corresponding ret_addr_offset() and // compute_padding() functions will have to be adjusted. instruct CallStaticJavaDirect(method meth) %{ match(CallStaticJava); effect(USE meth); ins_cost(300); format %{ "CALL,static " %} opcode(0xE8); /* E8 cd */ ins_encode( pre_call_resets, Java_Static_Call( meth ), call_epilog, post_call_FPU ); ins_pipe( pipe_slow ); ins_alignment(4); %} // Call Java Dynamic Instruction // Note: If this code changes, the corresponding ret_addr_offset() and // compute_padding() functions will have to be adjusted. instruct CallDynamicJavaDirect(method meth) %{ match(CallDynamicJava); effect(USE meth); ins_cost(300); format %{ "MOV EAX,(oop)-1\n\t" "CALL,dynamic" %} opcode(0xE8); /* E8 cd */ ins_encode( pre_call_resets, Java_Dynamic_Call( meth ), call_epilog, post_call_FPU ); ins_pipe( pipe_slow ); ins_alignment(4); %} // Call Runtime Instruction instruct CallRuntimeDirect(method meth) %{ match(CallRuntime ); effect(USE meth); ins_cost(300); format %{ "CALL,runtime " %} opcode(0xE8); /* E8 cd */ // Use FFREEs to clear entries in float stack ins_encode( pre_call_resets, FFree_Float_Stack_All, Java_To_Runtime( meth ), post_call_FPU ); ins_pipe( pipe_slow ); %} // Call runtime without safepoint instruct CallLeafDirect(method meth) %{ match(CallLeaf); effect(USE meth); ins_cost(300); format %{ "CALL_LEAF,runtime " %} opcode(0xE8); /* E8 cd */ ins_encode( pre_call_resets, FFree_Float_Stack_All, Java_To_Runtime( meth ), Verify_FPU_For_Leaf, post_call_FPU ); ins_pipe( pipe_slow ); %} instruct CallLeafNoFPDirect(method meth) %{ match(CallLeafNoFP); effect(USE meth); ins_cost(300); format %{ "CALL_LEAF_NOFP,runtime " %} opcode(0xE8); /* E8 cd */ ins_encode(Java_To_Runtime(meth)); ins_pipe( pipe_slow ); %} // Return Instruction // Remove the return address & jump to it. instruct Ret() %{ match(Return); format %{ "RET" %} opcode(0xC3); ins_encode(OpcP); ins_pipe( pipe_jmp ); %} // Tail Call; Jump from runtime stub to Java code. // Also known as an 'interprocedural jump'. // Target of jump will eventually return to caller. // TailJump below removes the return address. instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ match(TailCall jump_target method_oop ); ins_cost(300); format %{ "JMP $jump_target \t# EBX holds method oop" %} opcode(0xFF, 0x4); /* Opcode FF /4 */ ins_encode( OpcP, RegOpc(jump_target) ); ins_pipe( pipe_jmp ); %} // Tail Jump; remove the return address; jump to target. // TailCall above leaves the return address around. instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ match( TailJump jump_target ex_oop ); ins_cost(300); format %{ "POP EDX\t# pop return address into dummy\n\t" "JMP $jump_target " %} opcode(0xFF, 0x4); /* Opcode FF /4 */ ins_encode( enc_pop_rdx, OpcP, RegOpc(jump_target) ); ins_pipe( pipe_jmp ); %} // Create exception oop: created by stack-crawling runtime code. // Created exception is now available to this handler, and is setup // just prior to jumping to this handler. No code emitted. instruct CreateException( eAXRegP ex_oop ) %{ match(Set ex_oop (CreateEx)); size(0); // use the following format syntax format %{ "# exception oop is in EAX; no code emitted" %} ins_encode(); ins_pipe( empty ); %} // Rethrow exception: // The exception oop will come in the first argument position. // Then JUMP (not call) to the rethrow stub code. instruct RethrowException() %{ match(Rethrow); // use the following format syntax format %{ "JMP rethrow_stub" %} ins_encode(enc_rethrow); ins_pipe( pipe_jmp ); %} // inlined locking and unlocking instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ predicate(Compile::current()->use_rtm()); match(Set cr (FastLock object box)); effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); ins_cost(300); format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, $cx1$$Register, $cx2$$Register, _counters, _rtm_counters, _stack_rtm_counters, ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), true, ra_->C->profile_rtm()); %} ins_pipe(pipe_slow); %} instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ predicate(!Compile::current()->use_rtm()); match(Set cr (FastLock object box)); effect(TEMP tmp, TEMP scr, USE_KILL box); ins_cost(300); format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); %} ins_pipe(pipe_slow); %} instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ match(Set cr (FastUnlock object box)); effect(TEMP tmp, USE_KILL box); ins_cost(300); format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} ins_encode %{ __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); %} ins_pipe(pipe_slow); %} // ============================================================================ // Safepoint Instruction instruct safePoint_poll(eFlagsReg cr) %{ match(SafePoint); effect(KILL cr); // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. // On SPARC that might be acceptable as we can generate the address with // just a sethi, saving an or. By polling at offset 0 we can end up // putting additional pressure on the index-0 in the D$. Because of // alignment (just like the situation at hand) the lower indices tend // to see more traffic. It'd be better to change the polling address // to offset 0 of the last $line in the polling page. format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} ins_cost(125); size(6) ; ins_encode( Safepoint_Poll() ); ins_pipe( ialu_reg_mem ); %} // ============================================================================ // This name is KNOWN by the ADLC and cannot be changed. // The ADLC forces a 'TypeRawPtr::BOTTOM' output type // for this guy. instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ match(Set dst (ThreadLocal)); effect(DEF dst, KILL cr); format %{ "MOV $dst, Thread::current()" %} ins_encode %{ Register dstReg = as_Register($dst$$reg); __ get_thread(dstReg); %} ins_pipe( ialu_reg_fat ); %} //----------PEEPHOLE RULES----------------------------------------------------- // These must follow all instruction definitions as they use the names // defined in the instructions definitions. // // peepmatch ( root_instr_name [preceding_instruction]* ); // // peepconstraint %{ // (instruction_number.operand_name relational_op instruction_number.operand_name // [, ...] ); // // instruction numbers are zero-based using left to right order in peepmatch // // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); // // provide an instruction_number.operand_name for each operand that appears // // in the replacement instruction's match rule // // ---------VM FLAGS--------------------------------------------------------- // // All peephole optimizations can be turned off using -XX:-OptoPeephole // // Each peephole rule is given an identifying number starting with zero and // increasing by one in the order seen by the parser. An individual peephole // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# // on the command-line. // // ---------CURRENT LIMITATIONS---------------------------------------------- // // Only match adjacent instructions in same basic block // Only equality constraints // Only constraints between operands, not (0.dest_reg == EAX_enc) // Only one replacement instruction // // ---------EXAMPLE---------------------------------------------------------- // // // pertinent parts of existing instructions in architecture description // instruct movI(rRegI dst, rRegI src) %{ // match(Set dst (CopyI src)); // %} // // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ // match(Set dst (AddI dst src)); // effect(KILL cr); // %} // // // Change (inc mov) to lea // peephole %{ // // increment preceeded by register-register move // peepmatch ( incI_eReg movI ); // // require that the destination register of the increment // // match the destination register of the move // peepconstraint ( 0.dst == 1.dst ); // // construct a replacement instruction that sets // // the destination to ( move's source register + one ) // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); // %} // // Implementation no longer uses movX instructions since // machine-independent system no longer uses CopyX nodes. // // peephole %{ // peepmatch ( incI_eReg movI ); // peepconstraint ( 0.dst == 1.dst ); // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); // %} // // peephole %{ // peepmatch ( decI_eReg movI ); // peepconstraint ( 0.dst == 1.dst ); // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); // %} // // peephole %{ // peepmatch ( addI_eReg_imm movI ); // peepconstraint ( 0.dst == 1.dst ); // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); // %} // // peephole %{ // peepmatch ( addP_eReg_imm movP ); // peepconstraint ( 0.dst == 1.dst ); // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); // %} // // Change load of spilled value to only a spill // instruct storeI(memory mem, rRegI src) %{ // match(Set mem (StoreI mem src)); // %} // // instruct loadI(rRegI dst, memory mem) %{ // match(Set dst (LoadI mem)); // %} // peephole %{ peepmatch ( loadI storeI ); peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); peepreplace ( storeI( 1.mem 1.mem 1.src ) ); %} //----------SMARTSPILL RULES--------------------------------------------------- // These must follow all instruction definitions as they use the names // defined in the instructions definitions.