1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 674 if (C->max_vector_size() > 16) { 675 // Clear upper bits of YMM registers when current compiled code uses 676 // wide vectors to avoid AVX <-> SSE transition penalty during call. 677 MacroAssembler masm(&cbuf); 678 masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 MacroAssembler masm(&cbuf); 683 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 684 } 685 686 int framesize = C->frame_size_in_bytes(); 687 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 688 // Remove two words for return addr and rbp, 689 framesize -= 2*wordSize; 690 691 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 692 693 if (framesize >= 128) { 694 emit_opcode(cbuf, 0x81); // add SP, #framesize 695 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 696 emit_d32(cbuf, framesize); 697 } else if (framesize) { 698 emit_opcode(cbuf, 0x83); // add SP, #framesize 699 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 700 emit_d8(cbuf, framesize); 701 } 702 703 emit_opcode(cbuf, 0x58 | EBP_enc); 704 705 if (do_polling() && C->is_method_compilation()) { 706 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 707 emit_opcode(cbuf,0x85); 708 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 709 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 710 } 711 } 712 713 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 714 Compile *C = ra_->C; 715 // If method set FPU control word, restore to standard control word 716 int size = C->in_24_bit_fp_mode() ? 6 : 0; 717 if (C->max_vector_size() > 16) size += 3; // vzeroupper 718 if (do_polling() && C->is_method_compilation()) size += 6; 719 720 int framesize = C->frame_size_in_bytes(); 721 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 722 // Remove two words for return addr and rbp, 723 framesize -= 2*wordSize; 724 725 size++; // popl rbp, 726 727 if (framesize >= 128) { 728 size += 6; 729 } else { 730 size += framesize ? 3 : 0; 731 } 732 return size; 733 } 734 735 int MachEpilogNode::reloc() const { 736 return 0; // a large enough number 737 } 738 739 const Pipeline * MachEpilogNode::pipeline() const { 740 return MachNode::pipeline_class(); 741 } 742 743 int MachEpilogNode::safepoint_offset() const { return 0; } 744 745 //============================================================================= 746 747 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 748 static enum RC rc_class( OptoReg::Name reg ) { 749 750 if( !OptoReg::is_valid(reg) ) return rc_bad; 751 if (OptoReg::is_stack(reg)) return rc_stack; 752 753 VMReg r = OptoReg::as_VMReg(reg); 754 if (r->is_Register()) return rc_int; 755 if (r->is_FloatRegister()) { 756 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 757 return rc_float; 758 } 759 assert(r->is_XMMRegister(), "must be"); 760 return rc_xmm; 761 } 762 763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 764 int opcode, const char *op_str, int size, outputStream* st ) { 765 if( cbuf ) { 766 emit_opcode (*cbuf, opcode ); 767 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 768 #ifndef PRODUCT 769 } else if( !do_size ) { 770 if( size != 0 ) st->print("\n\t"); 771 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 772 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 773 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 774 } else { // FLD, FST, PUSH, POP 775 st->print("%s [ESP + #%d]",op_str,offset); 776 } 777 #endif 778 } 779 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 780 return size+3+offset_size; 781 } 782 783 // Helper for XMM registers. Extra opcode bits, limited syntax. 784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 785 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 786 int in_size_in_bits = Assembler::EVEX_32bit; 787 int evex_encoding = 0; 788 if (reg_lo+1 == reg_hi) { 789 in_size_in_bits = Assembler::EVEX_64bit; 790 evex_encoding = Assembler::VEX_W; 791 } 792 if (cbuf) { 793 MacroAssembler _masm(cbuf); 794 if (reg_lo+1 == reg_hi) { // double move? 795 if (is_load) { 796 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 797 } else { 798 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 799 } 800 } else { 801 if (is_load) { 802 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 803 } else { 804 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 805 } 806 } 807 #ifndef PRODUCT 808 } else if (!do_size) { 809 if (size != 0) st->print("\n\t"); 810 if (reg_lo+1 == reg_hi) { // double move? 811 if (is_load) st->print("%s %s,[ESP + #%d]", 812 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 813 Matcher::regName[reg_lo], offset); 814 else st->print("MOVSD [ESP + #%d],%s", 815 offset, Matcher::regName[reg_lo]); 816 } else { 817 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 818 Matcher::regName[reg_lo], offset); 819 else st->print("MOVSS [ESP + #%d],%s", 820 offset, Matcher::regName[reg_lo]); 821 } 822 #endif 823 } 824 bool is_single_byte = false; 825 if ((UseAVX > 2) && (offset != 0)) { 826 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 827 } 828 int offset_size = 0; 829 if (UseAVX > 2 ) { 830 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 831 } else { 832 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 833 } 834 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 835 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 836 return size+5+offset_size; 837 } 838 839 840 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 841 int src_hi, int dst_hi, int size, outputStream* st ) { 842 if (cbuf) { 843 MacroAssembler _masm(cbuf); 844 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 845 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 846 as_XMMRegister(Matcher::_regEncode[src_lo])); 847 } else { 848 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 849 as_XMMRegister(Matcher::_regEncode[src_lo])); 850 } 851 #ifndef PRODUCT 852 } else if (!do_size) { 853 if (size != 0) st->print("\n\t"); 854 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 855 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 856 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } else { 861 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 862 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } else { 864 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } 866 } 867 #endif 868 } 869 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 870 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 871 int sz = (UseAVX > 2) ? 6 : 4; 872 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 873 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 874 return size + sz; 875 } 876 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 878 int src_hi, int dst_hi, int size, outputStream* st ) { 879 // 32-bit 880 if (cbuf) { 881 MacroAssembler _masm(cbuf); 882 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 883 as_Register(Matcher::_regEncode[src_lo])); 884 #ifndef PRODUCT 885 } else if (!do_size) { 886 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 887 #endif 888 } 889 return (UseAVX> 2) ? 6 : 4; 890 } 891 892 893 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 894 int src_hi, int dst_hi, int size, outputStream* st ) { 895 // 32-bit 896 if (cbuf) { 897 MacroAssembler _masm(cbuf); 898 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 899 as_XMMRegister(Matcher::_regEncode[src_lo])); 900 #ifndef PRODUCT 901 } else if (!do_size) { 902 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 903 #endif 904 } 905 return (UseAVX> 2) ? 6 : 4; 906 } 907 908 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 909 if( cbuf ) { 910 emit_opcode(*cbuf, 0x8B ); 911 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 912 #ifndef PRODUCT 913 } else if( !do_size ) { 914 if( size != 0 ) st->print("\n\t"); 915 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 916 #endif 917 } 918 return size+2; 919 } 920 921 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 922 int offset, int size, outputStream* st ) { 923 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 924 if( cbuf ) { 925 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 926 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 927 #ifndef PRODUCT 928 } else if( !do_size ) { 929 if( size != 0 ) st->print("\n\t"); 930 st->print("FLD %s",Matcher::regName[src_lo]); 931 #endif 932 } 933 size += 2; 934 } 935 936 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 937 const char *op_str; 938 int op; 939 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 940 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 941 op = 0xDD; 942 } else { // 32-bit store 943 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 944 op = 0xD9; 945 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 946 } 947 948 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 949 } 950 951 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 952 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 953 int src_hi, int dst_hi, uint ireg, outputStream* st); 954 955 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 956 int stack_offset, int reg, uint ireg, outputStream* st); 957 958 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 959 int dst_offset, uint ireg, outputStream* st) { 960 int calc_size = 0; 961 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 962 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 963 switch (ireg) { 964 case Op_VecS: 965 calc_size = 3+src_offset_size + 3+dst_offset_size; 966 break; 967 case Op_VecD: 968 calc_size = 3+src_offset_size + 3+dst_offset_size; 969 src_offset += 4; 970 dst_offset += 4; 971 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 972 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 973 calc_size += 3+src_offset_size + 3+dst_offset_size; 974 break; 975 case Op_VecX: 976 case Op_VecY: 977 case Op_VecZ: 978 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 979 break; 980 default: 981 ShouldNotReachHere(); 982 } 983 if (cbuf) { 984 MacroAssembler _masm(cbuf); 985 int offset = __ offset(); 986 switch (ireg) { 987 case Op_VecS: 988 __ pushl(Address(rsp, src_offset)); 989 __ popl (Address(rsp, dst_offset)); 990 break; 991 case Op_VecD: 992 __ pushl(Address(rsp, src_offset)); 993 __ popl (Address(rsp, dst_offset)); 994 __ pushl(Address(rsp, src_offset+4)); 995 __ popl (Address(rsp, dst_offset+4)); 996 break; 997 case Op_VecX: 998 __ movdqu(Address(rsp, -16), xmm0); 999 __ movdqu(xmm0, Address(rsp, src_offset)); 1000 __ movdqu(Address(rsp, dst_offset), xmm0); 1001 __ movdqu(xmm0, Address(rsp, -16)); 1002 break; 1003 case Op_VecY: 1004 __ vmovdqu(Address(rsp, -32), xmm0); 1005 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1006 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1007 __ vmovdqu(xmm0, Address(rsp, -32)); 1008 case Op_VecZ: 1009 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1010 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1011 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1012 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1013 break; 1014 default: 1015 ShouldNotReachHere(); 1016 } 1017 int size = __ offset() - offset; 1018 assert(size == calc_size, "incorrect size calculattion"); 1019 return size; 1020 #ifndef PRODUCT 1021 } else if (!do_size) { 1022 switch (ireg) { 1023 case Op_VecS: 1024 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1025 "popl [rsp + #%d]", 1026 src_offset, dst_offset); 1027 break; 1028 case Op_VecD: 1029 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1030 "popq [rsp + #%d]\n\t" 1031 "pushl [rsp + #%d]\n\t" 1032 "popq [rsp + #%d]", 1033 src_offset, dst_offset, src_offset+4, dst_offset+4); 1034 break; 1035 case Op_VecX: 1036 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1037 "movdqu xmm0, [rsp + #%d]\n\t" 1038 "movdqu [rsp + #%d], xmm0\n\t" 1039 "movdqu xmm0, [rsp - #16]", 1040 src_offset, dst_offset); 1041 break; 1042 case Op_VecY: 1043 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1044 "vmovdqu xmm0, [rsp + #%d]\n\t" 1045 "vmovdqu [rsp + #%d], xmm0\n\t" 1046 "vmovdqu xmm0, [rsp - #32]", 1047 src_offset, dst_offset); 1048 case Op_VecZ: 1049 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #64]", 1053 src_offset, dst_offset); 1054 break; 1055 default: 1056 ShouldNotReachHere(); 1057 } 1058 #endif 1059 } 1060 return calc_size; 1061 } 1062 1063 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1064 // Get registers to move 1065 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1066 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1067 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1068 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1069 1070 enum RC src_second_rc = rc_class(src_second); 1071 enum RC src_first_rc = rc_class(src_first); 1072 enum RC dst_second_rc = rc_class(dst_second); 1073 enum RC dst_first_rc = rc_class(dst_first); 1074 1075 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1076 1077 // Generate spill code! 1078 int size = 0; 1079 1080 if( src_first == dst_first && src_second == dst_second ) 1081 return size; // Self copy, no move 1082 1083 if (bottom_type()->isa_vect() != NULL) { 1084 uint ireg = ideal_reg(); 1085 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1086 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1087 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1088 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1089 // mem -> mem 1090 int src_offset = ra_->reg2offset(src_first); 1091 int dst_offset = ra_->reg2offset(dst_first); 1092 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1093 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1094 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1095 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1096 int stack_offset = ra_->reg2offset(dst_first); 1097 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1098 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1099 int stack_offset = ra_->reg2offset(src_first); 1100 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1101 } else { 1102 ShouldNotReachHere(); 1103 } 1104 } 1105 1106 // -------------------------------------- 1107 // Check for mem-mem move. push/pop to move. 1108 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1109 if( src_second == dst_first ) { // overlapping stack copy ranges 1110 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1111 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1112 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1113 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1114 } 1115 // move low bits 1116 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1117 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1118 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1119 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1120 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1121 } 1122 return size; 1123 } 1124 1125 // -------------------------------------- 1126 // Check for integer reg-reg copy 1127 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1128 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1129 1130 // Check for integer store 1131 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1132 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1133 1134 // Check for integer load 1135 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1136 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1137 1138 // Check for integer reg-xmm reg copy 1139 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1140 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1141 "no 64 bit integer-float reg moves" ); 1142 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1143 } 1144 // -------------------------------------- 1145 // Check for float reg-reg copy 1146 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1148 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1149 if( cbuf ) { 1150 1151 // Note the mucking with the register encode to compensate for the 0/1 1152 // indexing issue mentioned in a comment in the reg_def sections 1153 // for FPR registers many lines above here. 1154 1155 if( src_first != FPR1L_num ) { 1156 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1157 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1158 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1159 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1160 } else { 1161 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1162 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1163 } 1164 #ifndef PRODUCT 1165 } else if( !do_size ) { 1166 if( size != 0 ) st->print("\n\t"); 1167 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1168 else st->print( "FST %s", Matcher::regName[dst_first]); 1169 #endif 1170 } 1171 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1172 } 1173 1174 // Check for float store 1175 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1176 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1177 } 1178 1179 // Check for float load 1180 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1181 int offset = ra_->reg2offset(src_first); 1182 const char *op_str; 1183 int op; 1184 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1185 op_str = "FLD_D"; 1186 op = 0xDD; 1187 } else { // 32-bit load 1188 op_str = "FLD_S"; 1189 op = 0xD9; 1190 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1191 } 1192 if( cbuf ) { 1193 emit_opcode (*cbuf, op ); 1194 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1195 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1196 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1197 #ifndef PRODUCT 1198 } else if( !do_size ) { 1199 if( size != 0 ) st->print("\n\t"); 1200 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1201 #endif 1202 } 1203 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1204 return size + 3+offset_size+2; 1205 } 1206 1207 // Check for xmm reg-reg copy 1208 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1209 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1210 (src_first+1 == src_second && dst_first+1 == dst_second), 1211 "no non-adjacent float-moves" ); 1212 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1213 } 1214 1215 // Check for xmm reg-integer reg copy 1216 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1217 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1218 "no 64 bit float-integer reg moves" ); 1219 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm store 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1224 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1225 } 1226 1227 // Check for float xmm load 1228 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1229 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1230 } 1231 1232 // Copy from float reg to xmm reg 1233 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1234 // copy to the top of stack from floating point reg 1235 // and use LEA to preserve flags 1236 if( cbuf ) { 1237 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1238 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1239 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1240 emit_d8(*cbuf,0xF8); 1241 #ifndef PRODUCT 1242 } else if( !do_size ) { 1243 if( size != 0 ) st->print("\n\t"); 1244 st->print("LEA ESP,[ESP-8]"); 1245 #endif 1246 } 1247 size += 4; 1248 1249 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1250 1251 // Copy from the temp memory to the xmm reg. 1252 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1253 1254 if( cbuf ) { 1255 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1256 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1257 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1258 emit_d8(*cbuf,0x08); 1259 #ifndef PRODUCT 1260 } else if( !do_size ) { 1261 if( size != 0 ) st->print("\n\t"); 1262 st->print("LEA ESP,[ESP+8]"); 1263 #endif 1264 } 1265 size += 4; 1266 return size; 1267 } 1268 1269 assert( size > 0, "missed a case" ); 1270 1271 // -------------------------------------------------------------------- 1272 // Check for second bits still needing moving. 1273 if( src_second == dst_second ) 1274 return size; // Self copy; no move 1275 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1276 1277 // Check for second word int-int move 1278 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1279 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1280 1281 // Check for second word integer store 1282 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1283 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1284 1285 // Check for second word integer load 1286 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1287 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1288 1289 1290 Unimplemented(); 1291 return 0; // Mute compiler 1292 } 1293 1294 #ifndef PRODUCT 1295 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1296 implementation( NULL, ra_, false, st ); 1297 } 1298 #endif 1299 1300 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1301 implementation( &cbuf, ra_, false, NULL ); 1302 } 1303 1304 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1305 return implementation( NULL, ra_, true, NULL ); 1306 } 1307 1308 1309 //============================================================================= 1310 #ifndef PRODUCT 1311 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1312 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1313 int reg = ra_->get_reg_first(this); 1314 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1315 } 1316 #endif 1317 1318 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_encode(this); 1321 if( offset >= 128 ) { 1322 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1323 emit_rm(cbuf, 0x2, reg, 0x04); 1324 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1325 emit_d32(cbuf, offset); 1326 } 1327 else { 1328 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1329 emit_rm(cbuf, 0x1, reg, 0x04); 1330 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1331 emit_d8(cbuf, offset); 1332 } 1333 } 1334 1335 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1336 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1337 if( offset >= 128 ) { 1338 return 7; 1339 } 1340 else { 1341 return 4; 1342 } 1343 } 1344 1345 //============================================================================= 1346 #ifndef PRODUCT 1347 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1348 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1349 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1350 st->print_cr("\tNOP"); 1351 st->print_cr("\tNOP"); 1352 if( !OptoBreakpoint ) 1353 st->print_cr("\tNOP"); 1354 } 1355 #endif 1356 1357 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1358 MacroAssembler masm(&cbuf); 1359 #ifdef ASSERT 1360 uint insts_size = cbuf.insts_size(); 1361 #endif 1362 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1363 masm.jump_cc(Assembler::notEqual, 1364 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1365 /* WARNING these NOPs are critical so that verified entry point is properly 1366 aligned for patching by NativeJump::patch_verified_entry() */ 1367 int nops_cnt = 2; 1368 if( !OptoBreakpoint ) // Leave space for int3 1369 nops_cnt += 1; 1370 masm.nop(nops_cnt); 1371 1372 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1373 } 1374 1375 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1376 return OptoBreakpoint ? 11 : 12; 1377 } 1378 1379 1380 //============================================================================= 1381 1382 int Matcher::regnum_to_fpu_offset(int regnum) { 1383 return regnum - 32; // The FP registers are in the second chunk 1384 } 1385 1386 // This is UltraSparc specific, true just means we have fast l2f conversion 1387 const bool Matcher::convL2FSupported(void) { 1388 return true; 1389 } 1390 1391 // Is this branch offset short enough that a short branch can be used? 1392 // 1393 // NOTE: If the platform does not provide any short branch variants, then 1394 // this method should return false for offset 0. 1395 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1396 // The passed offset is relative to address of the branch. 1397 // On 86 a branch displacement is calculated relative to address 1398 // of a next instruction. 1399 offset -= br_size; 1400 1401 // the short version of jmpConUCF2 contains multiple branches, 1402 // making the reach slightly less 1403 if (rule == jmpConUCF2_rule) 1404 return (-126 <= offset && offset <= 125); 1405 return (-128 <= offset && offset <= 127); 1406 } 1407 1408 const bool Matcher::isSimpleConstant64(jlong value) { 1409 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1410 return false; 1411 } 1412 1413 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1414 const bool Matcher::init_array_count_is_in_bytes = false; 1415 1416 // Threshold size for cleararray. 1417 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1418 1419 // Needs 2 CMOV's for longs. 1420 const int Matcher::long_cmove_cost() { return 1; } 1421 1422 // No CMOVF/CMOVD with SSE/SSE2 1423 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1424 1425 // Does the CPU require late expand (see block.cpp for description of late expand)? 1426 const bool Matcher::require_postalloc_expand = false; 1427 1428 // Should the Matcher clone shifts on addressing modes, expecting them to 1429 // be subsumed into complex addressing expressions or compute them into 1430 // registers? True for Intel but false for most RISCs 1431 const bool Matcher::clone_shift_expressions = true; 1432 1433 // Do we need to mask the count passed to shift instructions or does 1434 // the cpu only look at the lower 5/6 bits anyway? 1435 const bool Matcher::need_masked_shift_count = false; 1436 1437 bool Matcher::narrow_oop_use_complex_address() { 1438 ShouldNotCallThis(); 1439 return true; 1440 } 1441 1442 bool Matcher::narrow_klass_use_complex_address() { 1443 ShouldNotCallThis(); 1444 return true; 1445 } 1446 1447 1448 // Is it better to copy float constants, or load them directly from memory? 1449 // Intel can load a float constant from a direct address, requiring no 1450 // extra registers. Most RISCs will have to materialize an address into a 1451 // register first, so they would do better to copy the constant from stack. 1452 const bool Matcher::rematerialize_float_constants = true; 1453 1454 // If CPU can load and store mis-aligned doubles directly then no fixup is 1455 // needed. Else we split the double into 2 integer pieces and move it 1456 // piece-by-piece. Only happens when passing doubles into C code as the 1457 // Java calling convention forces doubles to be aligned. 1458 const bool Matcher::misaligned_doubles_ok = true; 1459 1460 1461 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1462 // Get the memory operand from the node 1463 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1464 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1465 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1466 uint opcnt = 1; // First operand 1467 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1468 while( idx >= skipped+num_edges ) { 1469 skipped += num_edges; 1470 opcnt++; // Bump operand count 1471 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1472 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1473 } 1474 1475 MachOper *memory = node->_opnds[opcnt]; 1476 MachOper *new_memory = NULL; 1477 switch (memory->opcode()) { 1478 case DIRECT: 1479 case INDOFFSET32X: 1480 // No transformation necessary. 1481 return; 1482 case INDIRECT: 1483 new_memory = new indirect_win95_safeOper( ); 1484 break; 1485 case INDOFFSET8: 1486 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1487 break; 1488 case INDOFFSET32: 1489 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1490 break; 1491 case INDINDEXOFFSET: 1492 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1493 break; 1494 case INDINDEXSCALE: 1495 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1496 break; 1497 case INDINDEXSCALEOFFSET: 1498 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1499 break; 1500 case LOAD_LONG_INDIRECT: 1501 case LOAD_LONG_INDOFFSET32: 1502 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1503 return; 1504 default: 1505 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1506 return; 1507 } 1508 node->_opnds[opcnt] = new_memory; 1509 } 1510 1511 // Advertise here if the CPU requires explicit rounding operations 1512 // to implement the UseStrictFP mode. 1513 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1514 1515 // Are floats conerted to double when stored to stack during deoptimization? 1516 // On x32 it is stored with convertion only when FPU is used for floats. 1517 bool Matcher::float_in_double() { return (UseSSE == 0); } 1518 1519 // Do ints take an entire long register or just half? 1520 const bool Matcher::int_in_long = false; 1521 1522 // Return whether or not this register is ever used as an argument. This 1523 // function is used on startup to build the trampoline stubs in generateOptoStub. 1524 // Registers not mentioned will be killed by the VM call in the trampoline, and 1525 // arguments in those registers not be available to the callee. 1526 bool Matcher::can_be_java_arg( int reg ) { 1527 if( reg == ECX_num || reg == EDX_num ) return true; 1528 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1529 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1530 return false; 1531 } 1532 1533 bool Matcher::is_spillable_arg( int reg ) { 1534 return can_be_java_arg(reg); 1535 } 1536 1537 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1538 // Use hardware integer DIV instruction when 1539 // it is faster than a code which use multiply. 1540 // Only when constant divisor fits into 32 bit 1541 // (min_jint is excluded to get only correct 1542 // positive 32 bit values from negative). 1543 return VM_Version::has_fast_idiv() && 1544 (divisor == (int)divisor && divisor != min_jint); 1545 } 1546 1547 // Register for DIVI projection of divmodI 1548 RegMask Matcher::divI_proj_mask() { 1549 return EAX_REG_mask(); 1550 } 1551 1552 // Register for MODI projection of divmodI 1553 RegMask Matcher::modI_proj_mask() { 1554 return EDX_REG_mask(); 1555 } 1556 1557 // Register for DIVL projection of divmodL 1558 RegMask Matcher::divL_proj_mask() { 1559 ShouldNotReachHere(); 1560 return RegMask(); 1561 } 1562 1563 // Register for MODL projection of divmodL 1564 RegMask Matcher::modL_proj_mask() { 1565 ShouldNotReachHere(); 1566 return RegMask(); 1567 } 1568 1569 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1570 return NO_REG_mask(); 1571 } 1572 1573 // Returns true if the high 32 bits of the value is known to be zero. 1574 bool is_operand_hi32_zero(Node* n) { 1575 int opc = n->Opcode(); 1576 if (opc == Op_AndL) { 1577 Node* o2 = n->in(2); 1578 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1579 return true; 1580 } 1581 } 1582 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1583 return true; 1584 } 1585 return false; 1586 } 1587 1588 %} 1589 1590 //----------ENCODING BLOCK----------------------------------------------------- 1591 // This block specifies the encoding classes used by the compiler to output 1592 // byte streams. Encoding classes generate functions which are called by 1593 // Machine Instruction Nodes in order to generate the bit encoding of the 1594 // instruction. Operands specify their base encoding interface with the 1595 // interface keyword. There are currently supported four interfaces, 1596 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1597 // operand to generate a function which returns its register number when 1598 // queried. CONST_INTER causes an operand to generate a function which 1599 // returns the value of the constant when queried. MEMORY_INTER causes an 1600 // operand to generate four functions which return the Base Register, the 1601 // Index Register, the Scale Value, and the Offset Value of the operand when 1602 // queried. COND_INTER causes an operand to generate six functions which 1603 // return the encoding code (ie - encoding bits for the instruction) 1604 // associated with each basic boolean condition for a conditional instruction. 1605 // Instructions specify two basic values for encoding. They use the 1606 // ins_encode keyword to specify their encoding class (which must be one of 1607 // the class names specified in the encoding block), and they use the 1608 // opcode keyword to specify, in order, their primary, secondary, and 1609 // tertiary opcode. Only the opcode sections which a particular instruction 1610 // needs for encoding need to be specified. 1611 encode %{ 1612 // Build emit functions for each basic byte or larger field in the intel 1613 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1614 // code in the enc_class source block. Emit functions will live in the 1615 // main source block for now. In future, we can generalize this by 1616 // adding a syntax that specifies the sizes of fields in an order, 1617 // so that the adlc can build the emit functions automagically 1618 1619 // Emit primary opcode 1620 enc_class OpcP %{ 1621 emit_opcode(cbuf, $primary); 1622 %} 1623 1624 // Emit secondary opcode 1625 enc_class OpcS %{ 1626 emit_opcode(cbuf, $secondary); 1627 %} 1628 1629 // Emit opcode directly 1630 enc_class Opcode(immI d8) %{ 1631 emit_opcode(cbuf, $d8$$constant); 1632 %} 1633 1634 enc_class SizePrefix %{ 1635 emit_opcode(cbuf,0x66); 1636 %} 1637 1638 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1639 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1640 %} 1641 1642 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1643 emit_opcode(cbuf,$opcode$$constant); 1644 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1645 %} 1646 1647 enc_class mov_r32_imm0( rRegI dst ) %{ 1648 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1649 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1650 %} 1651 1652 enc_class cdq_enc %{ 1653 // Full implementation of Java idiv and irem; checks for 1654 // special case as described in JVM spec., p.243 & p.271. 1655 // 1656 // normal case special case 1657 // 1658 // input : rax,: dividend min_int 1659 // reg: divisor -1 1660 // 1661 // output: rax,: quotient (= rax, idiv reg) min_int 1662 // rdx: remainder (= rax, irem reg) 0 1663 // 1664 // Code sequnce: 1665 // 1666 // 81 F8 00 00 00 80 cmp rax,80000000h 1667 // 0F 85 0B 00 00 00 jne normal_case 1668 // 33 D2 xor rdx,edx 1669 // 83 F9 FF cmp rcx,0FFh 1670 // 0F 84 03 00 00 00 je done 1671 // normal_case: 1672 // 99 cdq 1673 // F7 F9 idiv rax,ecx 1674 // done: 1675 // 1676 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1677 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1678 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1679 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1680 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1681 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1682 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1683 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1684 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1685 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1686 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1687 // normal_case: 1688 emit_opcode(cbuf,0x99); // cdq 1689 // idiv (note: must be emitted by the user of this rule) 1690 // normal: 1691 %} 1692 1693 // Dense encoding for older common ops 1694 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1695 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1696 %} 1697 1698 1699 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1700 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1701 // Check for 8-bit immediate, and set sign extend bit in opcode 1702 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1703 emit_opcode(cbuf, $primary | 0x02); 1704 } 1705 else { // If 32-bit immediate 1706 emit_opcode(cbuf, $primary); 1707 } 1708 %} 1709 1710 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1711 // Emit primary opcode and set sign-extend bit 1712 // Check for 8-bit immediate, and set sign extend bit in opcode 1713 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1714 emit_opcode(cbuf, $primary | 0x02); } 1715 else { // If 32-bit immediate 1716 emit_opcode(cbuf, $primary); 1717 } 1718 // Emit r/m byte with secondary opcode, after primary opcode. 1719 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1720 %} 1721 1722 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1723 // Check for 8-bit immediate, and set sign extend bit in opcode 1724 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1725 $$$emit8$imm$$constant; 1726 } 1727 else { // If 32-bit immediate 1728 // Output immediate 1729 $$$emit32$imm$$constant; 1730 } 1731 %} 1732 1733 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1734 // Emit primary opcode and set sign-extend bit 1735 // Check for 8-bit immediate, and set sign extend bit in opcode 1736 int con = (int)$imm$$constant; // Throw away top bits 1737 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1738 // Emit r/m byte with secondary opcode, after primary opcode. 1739 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1740 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1741 else emit_d32(cbuf,con); 1742 %} 1743 1744 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1745 // Emit primary opcode and set sign-extend bit 1746 // Check for 8-bit immediate, and set sign extend bit in opcode 1747 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1748 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1749 // Emit r/m byte with tertiary opcode, after primary opcode. 1750 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1751 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1752 else emit_d32(cbuf,con); 1753 %} 1754 1755 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1756 emit_cc(cbuf, $secondary, $dst$$reg ); 1757 %} 1758 1759 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1760 int destlo = $dst$$reg; 1761 int desthi = HIGH_FROM_LOW(destlo); 1762 // bswap lo 1763 emit_opcode(cbuf, 0x0F); 1764 emit_cc(cbuf, 0xC8, destlo); 1765 // bswap hi 1766 emit_opcode(cbuf, 0x0F); 1767 emit_cc(cbuf, 0xC8, desthi); 1768 // xchg lo and hi 1769 emit_opcode(cbuf, 0x87); 1770 emit_rm(cbuf, 0x3, destlo, desthi); 1771 %} 1772 1773 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1774 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1775 %} 1776 1777 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1778 $$$emit8$primary; 1779 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1780 %} 1781 1782 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1783 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1784 emit_d8(cbuf, op >> 8 ); 1785 emit_d8(cbuf, op & 255); 1786 %} 1787 1788 // emulate a CMOV with a conditional branch around a MOV 1789 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1790 // Invert sense of branch from sense of CMOV 1791 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1792 emit_d8( cbuf, $brOffs$$constant ); 1793 %} 1794 1795 enc_class enc_PartialSubtypeCheck( ) %{ 1796 Register Redi = as_Register(EDI_enc); // result register 1797 Register Reax = as_Register(EAX_enc); // super class 1798 Register Recx = as_Register(ECX_enc); // killed 1799 Register Resi = as_Register(ESI_enc); // sub class 1800 Label miss; 1801 1802 MacroAssembler _masm(&cbuf); 1803 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1804 NULL, &miss, 1805 /*set_cond_codes:*/ true); 1806 if ($primary) { 1807 __ xorptr(Redi, Redi); 1808 } 1809 __ bind(miss); 1810 %} 1811 1812 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1813 MacroAssembler masm(&cbuf); 1814 int start = masm.offset(); 1815 if (UseSSE >= 2) { 1816 if (VerifyFPU) { 1817 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1818 } 1819 } else { 1820 // External c_calling_convention expects the FPU stack to be 'clean'. 1821 // Compiled code leaves it dirty. Do cleanup now. 1822 masm.empty_FPU_stack(); 1823 } 1824 if (sizeof_FFree_Float_Stack_All == -1) { 1825 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1826 } else { 1827 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1828 } 1829 %} 1830 1831 enc_class Verify_FPU_For_Leaf %{ 1832 if( VerifyFPU ) { 1833 MacroAssembler masm(&cbuf); 1834 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1835 } 1836 %} 1837 1838 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1839 // This is the instruction starting address for relocation info. 1840 cbuf.set_insts_mark(); 1841 $$$emit8$primary; 1842 // CALL directly to the runtime 1843 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1844 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1845 1846 if (UseSSE >= 2) { 1847 MacroAssembler _masm(&cbuf); 1848 BasicType rt = tf()->return_type(); 1849 1850 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1851 // A C runtime call where the return value is unused. In SSE2+ 1852 // mode the result needs to be removed from the FPU stack. It's 1853 // likely that this function call could be removed by the 1854 // optimizer if the C function is a pure function. 1855 __ ffree(0); 1856 } else if (rt == T_FLOAT) { 1857 __ lea(rsp, Address(rsp, -4)); 1858 __ fstp_s(Address(rsp, 0)); 1859 __ movflt(xmm0, Address(rsp, 0)); 1860 __ lea(rsp, Address(rsp, 4)); 1861 } else if (rt == T_DOUBLE) { 1862 __ lea(rsp, Address(rsp, -8)); 1863 __ fstp_d(Address(rsp, 0)); 1864 __ movdbl(xmm0, Address(rsp, 0)); 1865 __ lea(rsp, Address(rsp, 8)); 1866 } 1867 } 1868 %} 1869 1870 1871 enc_class pre_call_resets %{ 1872 // If method sets FPU control word restore it here 1873 debug_only(int off0 = cbuf.insts_size()); 1874 if (ra_->C->in_24_bit_fp_mode()) { 1875 MacroAssembler _masm(&cbuf); 1876 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1877 } 1878 if (ra_->C->max_vector_size() > 16) { 1879 // Clear upper bits of YMM registers when current compiled code uses 1880 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1881 MacroAssembler _masm(&cbuf); 1882 __ vzeroupper(); 1883 } 1884 debug_only(int off1 = cbuf.insts_size()); 1885 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1886 %} 1887 1888 enc_class post_call_FPU %{ 1889 // If method sets FPU control word do it here also 1890 if (Compile::current()->in_24_bit_fp_mode()) { 1891 MacroAssembler masm(&cbuf); 1892 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1893 } 1894 %} 1895 1896 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1897 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1898 // who we intended to call. 1899 cbuf.set_insts_mark(); 1900 $$$emit8$primary; 1901 1902 if (!_method) { 1903 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1904 runtime_call_Relocation::spec(), 1905 RELOC_IMM32); 1906 } else { 1907 int method_index = resolved_method_index(cbuf); 1908 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1909 : static_call_Relocation::spec(method_index); 1910 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1911 rspec, RELOC_DISP32); 1912 // Emit stubs for static call. 1913 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1914 if (stub == NULL) { 1915 ciEnv::current()->record_failure("CodeCache is full"); 1916 return; 1917 } 1918 } 1919 %} 1920 1921 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1922 MacroAssembler _masm(&cbuf); 1923 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1924 %} 1925 1926 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1927 int disp = in_bytes(Method::from_compiled_offset()); 1928 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1929 1930 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1931 cbuf.set_insts_mark(); 1932 $$$emit8$primary; 1933 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1934 emit_d8(cbuf, disp); // Displacement 1935 1936 %} 1937 1938 // Following encoding is no longer used, but may be restored if calling 1939 // convention changes significantly. 1940 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1941 // 1942 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1943 // // int ic_reg = Matcher::inline_cache_reg(); 1944 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1945 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1946 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1947 // 1948 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1949 // // // so we load it immediately before the call 1950 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1951 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1952 // 1953 // // xor rbp,ebp 1954 // emit_opcode(cbuf, 0x33); 1955 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1956 // 1957 // // CALL to interpreter. 1958 // cbuf.set_insts_mark(); 1959 // $$$emit8$primary; 1960 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1961 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1962 // %} 1963 1964 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1965 $$$emit8$primary; 1966 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1967 $$$emit8$shift$$constant; 1968 %} 1969 1970 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1971 // Load immediate does not have a zero or sign extended version 1972 // for 8-bit immediates 1973 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1974 $$$emit32$src$$constant; 1975 %} 1976 1977 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1978 // Load immediate does not have a zero or sign extended version 1979 // for 8-bit immediates 1980 emit_opcode(cbuf, $primary + $dst$$reg); 1981 $$$emit32$src$$constant; 1982 %} 1983 1984 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1985 // Load immediate does not have a zero or sign extended version 1986 // for 8-bit immediates 1987 int dst_enc = $dst$$reg; 1988 int src_con = $src$$constant & 0x0FFFFFFFFL; 1989 if (src_con == 0) { 1990 // xor dst, dst 1991 emit_opcode(cbuf, 0x33); 1992 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1993 } else { 1994 emit_opcode(cbuf, $primary + dst_enc); 1995 emit_d32(cbuf, src_con); 1996 } 1997 %} 1998 1999 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2000 // Load immediate does not have a zero or sign extended version 2001 // for 8-bit immediates 2002 int dst_enc = $dst$$reg + 2; 2003 int src_con = ((julong)($src$$constant)) >> 32; 2004 if (src_con == 0) { 2005 // xor dst, dst 2006 emit_opcode(cbuf, 0x33); 2007 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2008 } else { 2009 emit_opcode(cbuf, $primary + dst_enc); 2010 emit_d32(cbuf, src_con); 2011 } 2012 %} 2013 2014 2015 // Encode a reg-reg copy. If it is useless, then empty encoding. 2016 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2017 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2018 %} 2019 2020 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2021 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2022 %} 2023 2024 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2025 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2026 %} 2027 2028 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2029 $$$emit8$primary; 2030 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2031 %} 2032 2033 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2034 $$$emit8$secondary; 2035 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2036 %} 2037 2038 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2039 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2040 %} 2041 2042 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2043 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2044 %} 2045 2046 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2047 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2048 %} 2049 2050 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2051 // Output immediate 2052 $$$emit32$src$$constant; 2053 %} 2054 2055 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2056 // Output Float immediate bits 2057 jfloat jf = $src$$constant; 2058 int jf_as_bits = jint_cast( jf ); 2059 emit_d32(cbuf, jf_as_bits); 2060 %} 2061 2062 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2063 // Output Float immediate bits 2064 jfloat jf = $src$$constant; 2065 int jf_as_bits = jint_cast( jf ); 2066 emit_d32(cbuf, jf_as_bits); 2067 %} 2068 2069 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2070 // Output immediate 2071 $$$emit16$src$$constant; 2072 %} 2073 2074 enc_class Con_d32(immI src) %{ 2075 emit_d32(cbuf,$src$$constant); 2076 %} 2077 2078 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2079 // Output immediate memory reference 2080 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2081 emit_d32(cbuf, 0x00); 2082 %} 2083 2084 enc_class lock_prefix( ) %{ 2085 if( os::is_MP() ) 2086 emit_opcode(cbuf,0xF0); // [Lock] 2087 %} 2088 2089 // Cmp-xchg long value. 2090 // Note: we need to swap rbx, and rcx before and after the 2091 // cmpxchg8 instruction because the instruction uses 2092 // rcx as the high order word of the new value to store but 2093 // our register encoding uses rbx,. 2094 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2095 2096 // XCHG rbx,ecx 2097 emit_opcode(cbuf,0x87); 2098 emit_opcode(cbuf,0xD9); 2099 // [Lock] 2100 if( os::is_MP() ) 2101 emit_opcode(cbuf,0xF0); 2102 // CMPXCHG8 [Eptr] 2103 emit_opcode(cbuf,0x0F); 2104 emit_opcode(cbuf,0xC7); 2105 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2106 // XCHG rbx,ecx 2107 emit_opcode(cbuf,0x87); 2108 emit_opcode(cbuf,0xD9); 2109 %} 2110 2111 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2112 // [Lock] 2113 if( os::is_MP() ) 2114 emit_opcode(cbuf,0xF0); 2115 2116 // CMPXCHG [Eptr] 2117 emit_opcode(cbuf,0x0F); 2118 emit_opcode(cbuf,0xB1); 2119 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2120 %} 2121 2122 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2123 int res_encoding = $res$$reg; 2124 2125 // MOV res,0 2126 emit_opcode( cbuf, 0xB8 + res_encoding); 2127 emit_d32( cbuf, 0 ); 2128 // JNE,s fail 2129 emit_opcode(cbuf,0x75); 2130 emit_d8(cbuf, 5 ); 2131 // MOV res,1 2132 emit_opcode( cbuf, 0xB8 + res_encoding); 2133 emit_d32( cbuf, 1 ); 2134 // fail: 2135 %} 2136 2137 enc_class set_instruction_start( ) %{ 2138 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2139 %} 2140 2141 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2142 int reg_encoding = $ereg$$reg; 2143 int base = $mem$$base; 2144 int index = $mem$$index; 2145 int scale = $mem$$scale; 2146 int displace = $mem$$disp; 2147 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2148 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2149 %} 2150 2151 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2152 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2153 int base = $mem$$base; 2154 int index = $mem$$index; 2155 int scale = $mem$$scale; 2156 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2157 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2158 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2159 %} 2160 2161 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2162 int r1, r2; 2163 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2164 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2165 emit_opcode(cbuf,0x0F); 2166 emit_opcode(cbuf,$tertiary); 2167 emit_rm(cbuf, 0x3, r1, r2); 2168 emit_d8(cbuf,$cnt$$constant); 2169 emit_d8(cbuf,$primary); 2170 emit_rm(cbuf, 0x3, $secondary, r1); 2171 emit_d8(cbuf,$cnt$$constant); 2172 %} 2173 2174 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2175 emit_opcode( cbuf, 0x8B ); // Move 2176 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2177 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2178 emit_d8(cbuf,$primary); 2179 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2180 emit_d8(cbuf,$cnt$$constant-32); 2181 } 2182 emit_d8(cbuf,$primary); 2183 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2184 emit_d8(cbuf,31); 2185 %} 2186 2187 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2188 int r1, r2; 2189 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2190 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2191 2192 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2193 emit_rm(cbuf, 0x3, r1, r2); 2194 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2195 emit_opcode(cbuf,$primary); 2196 emit_rm(cbuf, 0x3, $secondary, r1); 2197 emit_d8(cbuf,$cnt$$constant-32); 2198 } 2199 emit_opcode(cbuf,0x33); // XOR r2,r2 2200 emit_rm(cbuf, 0x3, r2, r2); 2201 %} 2202 2203 // Clone of RegMem but accepts an extra parameter to access each 2204 // half of a double in memory; it never needs relocation info. 2205 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2206 emit_opcode(cbuf,$opcode$$constant); 2207 int reg_encoding = $rm_reg$$reg; 2208 int base = $mem$$base; 2209 int index = $mem$$index; 2210 int scale = $mem$$scale; 2211 int displace = $mem$$disp + $disp_for_half$$constant; 2212 relocInfo::relocType disp_reloc = relocInfo::none; 2213 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2214 %} 2215 2216 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2217 // 2218 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2219 // and it never needs relocation information. 2220 // Frequently used to move data between FPU's Stack Top and memory. 2221 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2222 int rm_byte_opcode = $rm_opcode$$constant; 2223 int base = $mem$$base; 2224 int index = $mem$$index; 2225 int scale = $mem$$scale; 2226 int displace = $mem$$disp; 2227 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2228 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2229 %} 2230 2231 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2232 int rm_byte_opcode = $rm_opcode$$constant; 2233 int base = $mem$$base; 2234 int index = $mem$$index; 2235 int scale = $mem$$scale; 2236 int displace = $mem$$disp; 2237 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2238 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2239 %} 2240 2241 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2242 int reg_encoding = $dst$$reg; 2243 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2244 int index = 0x04; // 0x04 indicates no index 2245 int scale = 0x00; // 0x00 indicates no scale 2246 int displace = $src1$$constant; // 0x00 indicates no displacement 2247 relocInfo::relocType disp_reloc = relocInfo::none; 2248 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2249 %} 2250 2251 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2252 // Compare dst,src 2253 emit_opcode(cbuf,0x3B); 2254 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2255 // jmp dst < src around move 2256 emit_opcode(cbuf,0x7C); 2257 emit_d8(cbuf,2); 2258 // move dst,src 2259 emit_opcode(cbuf,0x8B); 2260 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2261 %} 2262 2263 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2264 // Compare dst,src 2265 emit_opcode(cbuf,0x3B); 2266 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2267 // jmp dst > src around move 2268 emit_opcode(cbuf,0x7F); 2269 emit_d8(cbuf,2); 2270 // move dst,src 2271 emit_opcode(cbuf,0x8B); 2272 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2273 %} 2274 2275 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2276 // If src is FPR1, we can just FST to store it. 2277 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2278 int reg_encoding = 0x2; // Just store 2279 int base = $mem$$base; 2280 int index = $mem$$index; 2281 int scale = $mem$$scale; 2282 int displace = $mem$$disp; 2283 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2284 if( $src$$reg != FPR1L_enc ) { 2285 reg_encoding = 0x3; // Store & pop 2286 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2287 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2288 } 2289 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2290 emit_opcode(cbuf,$primary); 2291 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2292 %} 2293 2294 enc_class neg_reg(rRegI dst) %{ 2295 // NEG $dst 2296 emit_opcode(cbuf,0xF7); 2297 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2298 %} 2299 2300 enc_class setLT_reg(eCXRegI dst) %{ 2301 // SETLT $dst 2302 emit_opcode(cbuf,0x0F); 2303 emit_opcode(cbuf,0x9C); 2304 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2305 %} 2306 2307 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2308 int tmpReg = $tmp$$reg; 2309 2310 // SUB $p,$q 2311 emit_opcode(cbuf,0x2B); 2312 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2313 // SBB $tmp,$tmp 2314 emit_opcode(cbuf,0x1B); 2315 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2316 // AND $tmp,$y 2317 emit_opcode(cbuf,0x23); 2318 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2319 // ADD $p,$tmp 2320 emit_opcode(cbuf,0x03); 2321 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2322 %} 2323 2324 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2325 // TEST shift,32 2326 emit_opcode(cbuf,0xF7); 2327 emit_rm(cbuf, 0x3, 0, ECX_enc); 2328 emit_d32(cbuf,0x20); 2329 // JEQ,s small 2330 emit_opcode(cbuf, 0x74); 2331 emit_d8(cbuf, 0x04); 2332 // MOV $dst.hi,$dst.lo 2333 emit_opcode( cbuf, 0x8B ); 2334 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2335 // CLR $dst.lo 2336 emit_opcode(cbuf, 0x33); 2337 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2338 // small: 2339 // SHLD $dst.hi,$dst.lo,$shift 2340 emit_opcode(cbuf,0x0F); 2341 emit_opcode(cbuf,0xA5); 2342 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2343 // SHL $dst.lo,$shift" 2344 emit_opcode(cbuf,0xD3); 2345 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2346 %} 2347 2348 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2349 // TEST shift,32 2350 emit_opcode(cbuf,0xF7); 2351 emit_rm(cbuf, 0x3, 0, ECX_enc); 2352 emit_d32(cbuf,0x20); 2353 // JEQ,s small 2354 emit_opcode(cbuf, 0x74); 2355 emit_d8(cbuf, 0x04); 2356 // MOV $dst.lo,$dst.hi 2357 emit_opcode( cbuf, 0x8B ); 2358 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2359 // CLR $dst.hi 2360 emit_opcode(cbuf, 0x33); 2361 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2362 // small: 2363 // SHRD $dst.lo,$dst.hi,$shift 2364 emit_opcode(cbuf,0x0F); 2365 emit_opcode(cbuf,0xAD); 2366 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2367 // SHR $dst.hi,$shift" 2368 emit_opcode(cbuf,0xD3); 2369 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2370 %} 2371 2372 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2373 // TEST shift,32 2374 emit_opcode(cbuf,0xF7); 2375 emit_rm(cbuf, 0x3, 0, ECX_enc); 2376 emit_d32(cbuf,0x20); 2377 // JEQ,s small 2378 emit_opcode(cbuf, 0x74); 2379 emit_d8(cbuf, 0x05); 2380 // MOV $dst.lo,$dst.hi 2381 emit_opcode( cbuf, 0x8B ); 2382 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2383 // SAR $dst.hi,31 2384 emit_opcode(cbuf, 0xC1); 2385 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2386 emit_d8(cbuf, 0x1F ); 2387 // small: 2388 // SHRD $dst.lo,$dst.hi,$shift 2389 emit_opcode(cbuf,0x0F); 2390 emit_opcode(cbuf,0xAD); 2391 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2392 // SAR $dst.hi,$shift" 2393 emit_opcode(cbuf,0xD3); 2394 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2395 %} 2396 2397 2398 // ----------------- Encodings for floating point unit ----------------- 2399 // May leave result in FPU-TOS or FPU reg depending on opcodes 2400 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2401 $$$emit8$primary; 2402 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2403 %} 2404 2405 // Pop argument in FPR0 with FSTP ST(0) 2406 enc_class PopFPU() %{ 2407 emit_opcode( cbuf, 0xDD ); 2408 emit_d8( cbuf, 0xD8 ); 2409 %} 2410 2411 // !!!!! equivalent to Pop_Reg_F 2412 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2413 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2414 emit_d8( cbuf, 0xD8+$dst$$reg ); 2415 %} 2416 2417 enc_class Push_Reg_DPR( regDPR dst ) %{ 2418 emit_opcode( cbuf, 0xD9 ); 2419 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2420 %} 2421 2422 enc_class strictfp_bias1( regDPR dst ) %{ 2423 emit_opcode( cbuf, 0xDB ); // FLD m80real 2424 emit_opcode( cbuf, 0x2D ); 2425 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2426 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2427 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2428 %} 2429 2430 enc_class strictfp_bias2( regDPR dst ) %{ 2431 emit_opcode( cbuf, 0xDB ); // FLD m80real 2432 emit_opcode( cbuf, 0x2D ); 2433 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2434 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2435 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2436 %} 2437 2438 // Special case for moving an integer register to a stack slot. 2439 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2440 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2441 %} 2442 2443 // Special case for moving a register to a stack slot. 2444 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2445 // Opcode already emitted 2446 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2447 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2448 emit_d32(cbuf, $dst$$disp); // Displacement 2449 %} 2450 2451 // Push the integer in stackSlot 'src' onto FP-stack 2452 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2453 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2454 %} 2455 2456 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2457 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2458 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2459 %} 2460 2461 // Same as Pop_Mem_F except for opcode 2462 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2463 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2464 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2465 %} 2466 2467 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2468 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2469 emit_d8( cbuf, 0xD8+$dst$$reg ); 2470 %} 2471 2472 enc_class Push_Reg_FPR( regFPR dst ) %{ 2473 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2474 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2475 %} 2476 2477 // Push FPU's float to a stack-slot, and pop FPU-stack 2478 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2479 int pop = 0x02; 2480 if ($src$$reg != FPR1L_enc) { 2481 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2482 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2483 pop = 0x03; 2484 } 2485 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2486 %} 2487 2488 // Push FPU's double to a stack-slot, and pop FPU-stack 2489 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2490 int pop = 0x02; 2491 if ($src$$reg != FPR1L_enc) { 2492 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2493 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2494 pop = 0x03; 2495 } 2496 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2497 %} 2498 2499 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2500 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2501 int pop = 0xD0 - 1; // -1 since we skip FLD 2502 if ($src$$reg != FPR1L_enc) { 2503 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2504 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2505 pop = 0xD8; 2506 } 2507 emit_opcode( cbuf, 0xDD ); 2508 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2509 %} 2510 2511 2512 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2513 // load dst in FPR0 2514 emit_opcode( cbuf, 0xD9 ); 2515 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2516 if ($src$$reg != FPR1L_enc) { 2517 // fincstp 2518 emit_opcode (cbuf, 0xD9); 2519 emit_opcode (cbuf, 0xF7); 2520 // swap src with FPR1: 2521 // FXCH FPR1 with src 2522 emit_opcode(cbuf, 0xD9); 2523 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2524 // fdecstp 2525 emit_opcode (cbuf, 0xD9); 2526 emit_opcode (cbuf, 0xF6); 2527 } 2528 %} 2529 2530 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2531 MacroAssembler _masm(&cbuf); 2532 __ subptr(rsp, 8); 2533 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2534 __ fld_d(Address(rsp, 0)); 2535 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2536 __ fld_d(Address(rsp, 0)); 2537 %} 2538 2539 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2540 MacroAssembler _masm(&cbuf); 2541 __ subptr(rsp, 4); 2542 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2543 __ fld_s(Address(rsp, 0)); 2544 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2545 __ fld_s(Address(rsp, 0)); 2546 %} 2547 2548 enc_class Push_ResultD(regD dst) %{ 2549 MacroAssembler _masm(&cbuf); 2550 __ fstp_d(Address(rsp, 0)); 2551 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2552 __ addptr(rsp, 8); 2553 %} 2554 2555 enc_class Push_ResultF(regF dst, immI d8) %{ 2556 MacroAssembler _masm(&cbuf); 2557 __ fstp_s(Address(rsp, 0)); 2558 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2559 __ addptr(rsp, $d8$$constant); 2560 %} 2561 2562 enc_class Push_SrcD(regD src) %{ 2563 MacroAssembler _masm(&cbuf); 2564 __ subptr(rsp, 8); 2565 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2566 __ fld_d(Address(rsp, 0)); 2567 %} 2568 2569 enc_class push_stack_temp_qword() %{ 2570 MacroAssembler _masm(&cbuf); 2571 __ subptr(rsp, 8); 2572 %} 2573 2574 enc_class pop_stack_temp_qword() %{ 2575 MacroAssembler _masm(&cbuf); 2576 __ addptr(rsp, 8); 2577 %} 2578 2579 enc_class push_xmm_to_fpr1(regD src) %{ 2580 MacroAssembler _masm(&cbuf); 2581 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2582 __ fld_d(Address(rsp, 0)); 2583 %} 2584 2585 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2586 if ($src$$reg != FPR1L_enc) { 2587 // fincstp 2588 emit_opcode (cbuf, 0xD9); 2589 emit_opcode (cbuf, 0xF7); 2590 // FXCH FPR1 with src 2591 emit_opcode(cbuf, 0xD9); 2592 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2593 // fdecstp 2594 emit_opcode (cbuf, 0xD9); 2595 emit_opcode (cbuf, 0xF6); 2596 } 2597 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2598 // // FSTP FPR$dst$$reg 2599 // emit_opcode( cbuf, 0xDD ); 2600 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2601 %} 2602 2603 enc_class fnstsw_sahf_skip_parity() %{ 2604 // fnstsw ax 2605 emit_opcode( cbuf, 0xDF ); 2606 emit_opcode( cbuf, 0xE0 ); 2607 // sahf 2608 emit_opcode( cbuf, 0x9E ); 2609 // jnp ::skip 2610 emit_opcode( cbuf, 0x7B ); 2611 emit_opcode( cbuf, 0x05 ); 2612 %} 2613 2614 enc_class emitModDPR() %{ 2615 // fprem must be iterative 2616 // :: loop 2617 // fprem 2618 emit_opcode( cbuf, 0xD9 ); 2619 emit_opcode( cbuf, 0xF8 ); 2620 // wait 2621 emit_opcode( cbuf, 0x9b ); 2622 // fnstsw ax 2623 emit_opcode( cbuf, 0xDF ); 2624 emit_opcode( cbuf, 0xE0 ); 2625 // sahf 2626 emit_opcode( cbuf, 0x9E ); 2627 // jp ::loop 2628 emit_opcode( cbuf, 0x0F ); 2629 emit_opcode( cbuf, 0x8A ); 2630 emit_opcode( cbuf, 0xF4 ); 2631 emit_opcode( cbuf, 0xFF ); 2632 emit_opcode( cbuf, 0xFF ); 2633 emit_opcode( cbuf, 0xFF ); 2634 %} 2635 2636 enc_class fpu_flags() %{ 2637 // fnstsw_ax 2638 emit_opcode( cbuf, 0xDF); 2639 emit_opcode( cbuf, 0xE0); 2640 // test ax,0x0400 2641 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2642 emit_opcode( cbuf, 0xA9 ); 2643 emit_d16 ( cbuf, 0x0400 ); 2644 // // // This sequence works, but stalls for 12-16 cycles on PPro 2645 // // test rax,0x0400 2646 // emit_opcode( cbuf, 0xA9 ); 2647 // emit_d32 ( cbuf, 0x00000400 ); 2648 // 2649 // jz exit (no unordered comparison) 2650 emit_opcode( cbuf, 0x74 ); 2651 emit_d8 ( cbuf, 0x02 ); 2652 // mov ah,1 - treat as LT case (set carry flag) 2653 emit_opcode( cbuf, 0xB4 ); 2654 emit_d8 ( cbuf, 0x01 ); 2655 // sahf 2656 emit_opcode( cbuf, 0x9E); 2657 %} 2658 2659 enc_class cmpF_P6_fixup() %{ 2660 // Fixup the integer flags in case comparison involved a NaN 2661 // 2662 // JNP exit (no unordered comparison, P-flag is set by NaN) 2663 emit_opcode( cbuf, 0x7B ); 2664 emit_d8 ( cbuf, 0x03 ); 2665 // MOV AH,1 - treat as LT case (set carry flag) 2666 emit_opcode( cbuf, 0xB4 ); 2667 emit_d8 ( cbuf, 0x01 ); 2668 // SAHF 2669 emit_opcode( cbuf, 0x9E); 2670 // NOP // target for branch to avoid branch to branch 2671 emit_opcode( cbuf, 0x90); 2672 %} 2673 2674 // fnstsw_ax(); 2675 // sahf(); 2676 // movl(dst, nan_result); 2677 // jcc(Assembler::parity, exit); 2678 // movl(dst, less_result); 2679 // jcc(Assembler::below, exit); 2680 // movl(dst, equal_result); 2681 // jcc(Assembler::equal, exit); 2682 // movl(dst, greater_result); 2683 2684 // less_result = 1; 2685 // greater_result = -1; 2686 // equal_result = 0; 2687 // nan_result = -1; 2688 2689 enc_class CmpF_Result(rRegI dst) %{ 2690 // fnstsw_ax(); 2691 emit_opcode( cbuf, 0xDF); 2692 emit_opcode( cbuf, 0xE0); 2693 // sahf 2694 emit_opcode( cbuf, 0x9E); 2695 // movl(dst, nan_result); 2696 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2697 emit_d32( cbuf, -1 ); 2698 // jcc(Assembler::parity, exit); 2699 emit_opcode( cbuf, 0x7A ); 2700 emit_d8 ( cbuf, 0x13 ); 2701 // movl(dst, less_result); 2702 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2703 emit_d32( cbuf, -1 ); 2704 // jcc(Assembler::below, exit); 2705 emit_opcode( cbuf, 0x72 ); 2706 emit_d8 ( cbuf, 0x0C ); 2707 // movl(dst, equal_result); 2708 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2709 emit_d32( cbuf, 0 ); 2710 // jcc(Assembler::equal, exit); 2711 emit_opcode( cbuf, 0x74 ); 2712 emit_d8 ( cbuf, 0x05 ); 2713 // movl(dst, greater_result); 2714 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2715 emit_d32( cbuf, 1 ); 2716 %} 2717 2718 2719 // Compare the longs and set flags 2720 // BROKEN! Do Not use as-is 2721 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2722 // CMP $src1.hi,$src2.hi 2723 emit_opcode( cbuf, 0x3B ); 2724 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2725 // JNE,s done 2726 emit_opcode(cbuf,0x75); 2727 emit_d8(cbuf, 2 ); 2728 // CMP $src1.lo,$src2.lo 2729 emit_opcode( cbuf, 0x3B ); 2730 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2731 // done: 2732 %} 2733 2734 enc_class convert_int_long( regL dst, rRegI src ) %{ 2735 // mov $dst.lo,$src 2736 int dst_encoding = $dst$$reg; 2737 int src_encoding = $src$$reg; 2738 encode_Copy( cbuf, dst_encoding , src_encoding ); 2739 // mov $dst.hi,$src 2740 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2741 // sar $dst.hi,31 2742 emit_opcode( cbuf, 0xC1 ); 2743 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2744 emit_d8(cbuf, 0x1F ); 2745 %} 2746 2747 enc_class convert_long_double( eRegL src ) %{ 2748 // push $src.hi 2749 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2750 // push $src.lo 2751 emit_opcode(cbuf, 0x50+$src$$reg ); 2752 // fild 64-bits at [SP] 2753 emit_opcode(cbuf,0xdf); 2754 emit_d8(cbuf, 0x6C); 2755 emit_d8(cbuf, 0x24); 2756 emit_d8(cbuf, 0x00); 2757 // pop stack 2758 emit_opcode(cbuf, 0x83); // add SP, #8 2759 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2760 emit_d8(cbuf, 0x8); 2761 %} 2762 2763 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2764 // IMUL EDX:EAX,$src1 2765 emit_opcode( cbuf, 0xF7 ); 2766 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2767 // SAR EDX,$cnt-32 2768 int shift_count = ((int)$cnt$$constant) - 32; 2769 if (shift_count > 0) { 2770 emit_opcode(cbuf, 0xC1); 2771 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2772 emit_d8(cbuf, shift_count); 2773 } 2774 %} 2775 2776 // this version doesn't have add sp, 8 2777 enc_class convert_long_double2( eRegL src ) %{ 2778 // push $src.hi 2779 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2780 // push $src.lo 2781 emit_opcode(cbuf, 0x50+$src$$reg ); 2782 // fild 64-bits at [SP] 2783 emit_opcode(cbuf,0xdf); 2784 emit_d8(cbuf, 0x6C); 2785 emit_d8(cbuf, 0x24); 2786 emit_d8(cbuf, 0x00); 2787 %} 2788 2789 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2790 // Basic idea: long = (long)int * (long)int 2791 // IMUL EDX:EAX, src 2792 emit_opcode( cbuf, 0xF7 ); 2793 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2794 %} 2795 2796 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2797 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2798 // MUL EDX:EAX, src 2799 emit_opcode( cbuf, 0xF7 ); 2800 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2801 %} 2802 2803 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2804 // Basic idea: lo(result) = lo(x_lo * y_lo) 2805 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2806 // MOV $tmp,$src.lo 2807 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2808 // IMUL $tmp,EDX 2809 emit_opcode( cbuf, 0x0F ); 2810 emit_opcode( cbuf, 0xAF ); 2811 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2812 // MOV EDX,$src.hi 2813 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2814 // IMUL EDX,EAX 2815 emit_opcode( cbuf, 0x0F ); 2816 emit_opcode( cbuf, 0xAF ); 2817 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2818 // ADD $tmp,EDX 2819 emit_opcode( cbuf, 0x03 ); 2820 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2821 // MUL EDX:EAX,$src.lo 2822 emit_opcode( cbuf, 0xF7 ); 2823 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2824 // ADD EDX,ESI 2825 emit_opcode( cbuf, 0x03 ); 2826 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2827 %} 2828 2829 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2830 // Basic idea: lo(result) = lo(src * y_lo) 2831 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2832 // IMUL $tmp,EDX,$src 2833 emit_opcode( cbuf, 0x6B ); 2834 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2835 emit_d8( cbuf, (int)$src$$constant ); 2836 // MOV EDX,$src 2837 emit_opcode(cbuf, 0xB8 + EDX_enc); 2838 emit_d32( cbuf, (int)$src$$constant ); 2839 // MUL EDX:EAX,EDX 2840 emit_opcode( cbuf, 0xF7 ); 2841 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2842 // ADD EDX,ESI 2843 emit_opcode( cbuf, 0x03 ); 2844 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2845 %} 2846 2847 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2848 // PUSH src1.hi 2849 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2850 // PUSH src1.lo 2851 emit_opcode(cbuf, 0x50+$src1$$reg ); 2852 // PUSH src2.hi 2853 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2854 // PUSH src2.lo 2855 emit_opcode(cbuf, 0x50+$src2$$reg ); 2856 // CALL directly to the runtime 2857 cbuf.set_insts_mark(); 2858 emit_opcode(cbuf,0xE8); // Call into runtime 2859 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2860 // Restore stack 2861 emit_opcode(cbuf, 0x83); // add SP, #framesize 2862 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2863 emit_d8(cbuf, 4*4); 2864 %} 2865 2866 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2867 // PUSH src1.hi 2868 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2869 // PUSH src1.lo 2870 emit_opcode(cbuf, 0x50+$src1$$reg ); 2871 // PUSH src2.hi 2872 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2873 // PUSH src2.lo 2874 emit_opcode(cbuf, 0x50+$src2$$reg ); 2875 // CALL directly to the runtime 2876 cbuf.set_insts_mark(); 2877 emit_opcode(cbuf,0xE8); // Call into runtime 2878 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2879 // Restore stack 2880 emit_opcode(cbuf, 0x83); // add SP, #framesize 2881 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2882 emit_d8(cbuf, 4*4); 2883 %} 2884 2885 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2886 // MOV $tmp,$src.lo 2887 emit_opcode(cbuf, 0x8B); 2888 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2889 // OR $tmp,$src.hi 2890 emit_opcode(cbuf, 0x0B); 2891 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2892 %} 2893 2894 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2895 // CMP $src1.lo,$src2.lo 2896 emit_opcode( cbuf, 0x3B ); 2897 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2898 // JNE,s skip 2899 emit_cc(cbuf, 0x70, 0x5); 2900 emit_d8(cbuf,2); 2901 // CMP $src1.hi,$src2.hi 2902 emit_opcode( cbuf, 0x3B ); 2903 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2904 %} 2905 2906 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2907 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2908 emit_opcode( cbuf, 0x3B ); 2909 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2910 // MOV $tmp,$src1.hi 2911 emit_opcode( cbuf, 0x8B ); 2912 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2913 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2914 emit_opcode( cbuf, 0x1B ); 2915 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2916 %} 2917 2918 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2919 // XOR $tmp,$tmp 2920 emit_opcode(cbuf,0x33); // XOR 2921 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2922 // CMP $tmp,$src.lo 2923 emit_opcode( cbuf, 0x3B ); 2924 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2925 // SBB $tmp,$src.hi 2926 emit_opcode( cbuf, 0x1B ); 2927 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2928 %} 2929 2930 // Sniff, sniff... smells like Gnu Superoptimizer 2931 enc_class neg_long( eRegL dst ) %{ 2932 emit_opcode(cbuf,0xF7); // NEG hi 2933 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2934 emit_opcode(cbuf,0xF7); // NEG lo 2935 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2936 emit_opcode(cbuf,0x83); // SBB hi,0 2937 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2938 emit_d8 (cbuf,0 ); 2939 %} 2940 2941 enc_class enc_pop_rdx() %{ 2942 emit_opcode(cbuf,0x5A); 2943 %} 2944 2945 enc_class enc_rethrow() %{ 2946 cbuf.set_insts_mark(); 2947 emit_opcode(cbuf, 0xE9); // jmp entry 2948 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2949 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2950 %} 2951 2952 2953 // Convert a double to an int. Java semantics require we do complex 2954 // manglelations in the corner cases. So we set the rounding mode to 2955 // 'zero', store the darned double down as an int, and reset the 2956 // rounding mode to 'nearest'. The hardware throws an exception which 2957 // patches up the correct value directly to the stack. 2958 enc_class DPR2I_encoding( regDPR src ) %{ 2959 // Flip to round-to-zero mode. We attempted to allow invalid-op 2960 // exceptions here, so that a NAN or other corner-case value will 2961 // thrown an exception (but normal values get converted at full speed). 2962 // However, I2C adapters and other float-stack manglers leave pending 2963 // invalid-op exceptions hanging. We would have to clear them before 2964 // enabling them and that is more expensive than just testing for the 2965 // invalid value Intel stores down in the corner cases. 2966 emit_opcode(cbuf,0xD9); // FLDCW trunc 2967 emit_opcode(cbuf,0x2D); 2968 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2969 // Allocate a word 2970 emit_opcode(cbuf,0x83); // SUB ESP,4 2971 emit_opcode(cbuf,0xEC); 2972 emit_d8(cbuf,0x04); 2973 // Encoding assumes a double has been pushed into FPR0. 2974 // Store down the double as an int, popping the FPU stack 2975 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2976 emit_opcode(cbuf,0x1C); 2977 emit_d8(cbuf,0x24); 2978 // Restore the rounding mode; mask the exception 2979 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2980 emit_opcode(cbuf,0x2D); 2981 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2982 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2983 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2984 2985 // Load the converted int; adjust CPU stack 2986 emit_opcode(cbuf,0x58); // POP EAX 2987 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2988 emit_d32 (cbuf,0x80000000); // 0x80000000 2989 emit_opcode(cbuf,0x75); // JNE around_slow_call 2990 emit_d8 (cbuf,0x07); // Size of slow_call 2991 // Push src onto stack slow-path 2992 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2993 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2994 // CALL directly to the runtime 2995 cbuf.set_insts_mark(); 2996 emit_opcode(cbuf,0xE8); // Call into runtime 2997 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2998 // Carry on here... 2999 %} 3000 3001 enc_class DPR2L_encoding( regDPR src ) %{ 3002 emit_opcode(cbuf,0xD9); // FLDCW trunc 3003 emit_opcode(cbuf,0x2D); 3004 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3005 // Allocate a word 3006 emit_opcode(cbuf,0x83); // SUB ESP,8 3007 emit_opcode(cbuf,0xEC); 3008 emit_d8(cbuf,0x08); 3009 // Encoding assumes a double has been pushed into FPR0. 3010 // Store down the double as a long, popping the FPU stack 3011 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3012 emit_opcode(cbuf,0x3C); 3013 emit_d8(cbuf,0x24); 3014 // Restore the rounding mode; mask the exception 3015 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3016 emit_opcode(cbuf,0x2D); 3017 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3018 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3019 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3020 3021 // Load the converted int; adjust CPU stack 3022 emit_opcode(cbuf,0x58); // POP EAX 3023 emit_opcode(cbuf,0x5A); // POP EDX 3024 emit_opcode(cbuf,0x81); // CMP EDX,imm 3025 emit_d8 (cbuf,0xFA); // rdx 3026 emit_d32 (cbuf,0x80000000); // 0x80000000 3027 emit_opcode(cbuf,0x75); // JNE around_slow_call 3028 emit_d8 (cbuf,0x07+4); // Size of slow_call 3029 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3030 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3031 emit_opcode(cbuf,0x75); // JNE around_slow_call 3032 emit_d8 (cbuf,0x07); // Size of slow_call 3033 // Push src onto stack slow-path 3034 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3035 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3036 // CALL directly to the runtime 3037 cbuf.set_insts_mark(); 3038 emit_opcode(cbuf,0xE8); // Call into runtime 3039 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3040 // Carry on here... 3041 %} 3042 3043 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3044 // Operand was loaded from memory into fp ST (stack top) 3045 // FMUL ST,$src /* D8 C8+i */ 3046 emit_opcode(cbuf, 0xD8); 3047 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3048 %} 3049 3050 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3051 // FADDP ST,src2 /* D8 C0+i */ 3052 emit_opcode(cbuf, 0xD8); 3053 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3054 //could use FADDP src2,fpST /* DE C0+i */ 3055 %} 3056 3057 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3058 // FADDP src2,ST /* DE C0+i */ 3059 emit_opcode(cbuf, 0xDE); 3060 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3061 %} 3062 3063 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3064 // Operand has been loaded into fp ST (stack top) 3065 // FSUB ST,$src1 3066 emit_opcode(cbuf, 0xD8); 3067 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3068 3069 // FDIV 3070 emit_opcode(cbuf, 0xD8); 3071 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3072 %} 3073 3074 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3075 // Operand was loaded from memory into fp ST (stack top) 3076 // FADD ST,$src /* D8 C0+i */ 3077 emit_opcode(cbuf, 0xD8); 3078 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3079 3080 // FMUL ST,src2 /* D8 C*+i */ 3081 emit_opcode(cbuf, 0xD8); 3082 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3083 %} 3084 3085 3086 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3087 // Operand was loaded from memory into fp ST (stack top) 3088 // FADD ST,$src /* D8 C0+i */ 3089 emit_opcode(cbuf, 0xD8); 3090 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3091 3092 // FMULP src2,ST /* DE C8+i */ 3093 emit_opcode(cbuf, 0xDE); 3094 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3095 %} 3096 3097 // Atomically load the volatile long 3098 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3099 emit_opcode(cbuf,0xDF); 3100 int rm_byte_opcode = 0x05; 3101 int base = $mem$$base; 3102 int index = $mem$$index; 3103 int scale = $mem$$scale; 3104 int displace = $mem$$disp; 3105 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3106 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3107 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3108 %} 3109 3110 // Volatile Store Long. Must be atomic, so move it into 3111 // the FP TOS and then do a 64-bit FIST. Has to probe the 3112 // target address before the store (for null-ptr checks) 3113 // so the memory operand is used twice in the encoding. 3114 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3115 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3116 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3117 emit_opcode(cbuf,0xDF); 3118 int rm_byte_opcode = 0x07; 3119 int base = $mem$$base; 3120 int index = $mem$$index; 3121 int scale = $mem$$scale; 3122 int displace = $mem$$disp; 3123 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3124 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3125 %} 3126 3127 // Safepoint Poll. This polls the safepoint page, and causes an 3128 // exception if it is not readable. Unfortunately, it kills the condition code 3129 // in the process 3130 // We current use TESTL [spp],EDI 3131 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3132 3133 enc_class Safepoint_Poll() %{ 3134 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3135 emit_opcode(cbuf,0x85); 3136 emit_rm (cbuf, 0x0, 0x7, 0x5); 3137 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3138 %} 3139 %} 3140 3141 3142 //----------FRAME-------------------------------------------------------------- 3143 // Definition of frame structure and management information. 3144 // 3145 // S T A C K L A Y O U T Allocators stack-slot number 3146 // | (to get allocators register number 3147 // G Owned by | | v add OptoReg::stack0()) 3148 // r CALLER | | 3149 // o | +--------+ pad to even-align allocators stack-slot 3150 // w V | pad0 | numbers; owned by CALLER 3151 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3152 // h ^ | in | 5 3153 // | | args | 4 Holes in incoming args owned by SELF 3154 // | | | | 3 3155 // | | +--------+ 3156 // V | | old out| Empty on Intel, window on Sparc 3157 // | old |preserve| Must be even aligned. 3158 // | SP-+--------+----> Matcher::_old_SP, even aligned 3159 // | | in | 3 area for Intel ret address 3160 // Owned by |preserve| Empty on Sparc. 3161 // SELF +--------+ 3162 // | | pad2 | 2 pad to align old SP 3163 // | +--------+ 1 3164 // | | locks | 0 3165 // | +--------+----> OptoReg::stack0(), even aligned 3166 // | | pad1 | 11 pad to align new SP 3167 // | +--------+ 3168 // | | | 10 3169 // | | spills | 9 spills 3170 // V | | 8 (pad0 slot for callee) 3171 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3172 // ^ | out | 7 3173 // | | args | 6 Holes in outgoing args owned by CALLEE 3174 // Owned by +--------+ 3175 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3176 // | new |preserve| Must be even-aligned. 3177 // | SP-+--------+----> Matcher::_new_SP, even aligned 3178 // | | | 3179 // 3180 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3181 // known from SELF's arguments and the Java calling convention. 3182 // Region 6-7 is determined per call site. 3183 // Note 2: If the calling convention leaves holes in the incoming argument 3184 // area, those holes are owned by SELF. Holes in the outgoing area 3185 // are owned by the CALLEE. Holes should not be nessecary in the 3186 // incoming area, as the Java calling convention is completely under 3187 // the control of the AD file. Doubles can be sorted and packed to 3188 // avoid holes. Holes in the outgoing arguments may be nessecary for 3189 // varargs C calling conventions. 3190 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3191 // even aligned with pad0 as needed. 3192 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3193 // region 6-11 is even aligned; it may be padded out more so that 3194 // the region from SP to FP meets the minimum stack alignment. 3195 3196 frame %{ 3197 // What direction does stack grow in (assumed to be same for C & Java) 3198 stack_direction(TOWARDS_LOW); 3199 3200 // These three registers define part of the calling convention 3201 // between compiled code and the interpreter. 3202 inline_cache_reg(EAX); // Inline Cache Register 3203 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3204 3205 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3206 cisc_spilling_operand_name(indOffset32); 3207 3208 // Number of stack slots consumed by locking an object 3209 sync_stack_slots(1); 3210 3211 // Compiled code's Frame Pointer 3212 frame_pointer(ESP); 3213 // Interpreter stores its frame pointer in a register which is 3214 // stored to the stack by I2CAdaptors. 3215 // I2CAdaptors convert from interpreted java to compiled java. 3216 interpreter_frame_pointer(EBP); 3217 3218 // Stack alignment requirement 3219 // Alignment size in bytes (128-bit -> 16 bytes) 3220 stack_alignment(StackAlignmentInBytes); 3221 3222 // Number of stack slots between incoming argument block and the start of 3223 // a new frame. The PROLOG must add this many slots to the stack. The 3224 // EPILOG must remove this many slots. Intel needs one slot for 3225 // return address and one for rbp, (must save rbp) 3226 in_preserve_stack_slots(2+VerifyStackAtCalls); 3227 3228 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3229 // for calls to C. Supports the var-args backing area for register parms. 3230 varargs_C_out_slots_killed(0); 3231 3232 // The after-PROLOG location of the return address. Location of 3233 // return address specifies a type (REG or STACK) and a number 3234 // representing the register number (i.e. - use a register name) or 3235 // stack slot. 3236 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3237 // Otherwise, it is above the locks and verification slot and alignment word 3238 return_addr(STACK - 1 + 3239 round_to((Compile::current()->in_preserve_stack_slots() + 3240 Compile::current()->fixed_slots()), 3241 stack_alignment_in_slots())); 3242 3243 // Body of function which returns an integer array locating 3244 // arguments either in registers or in stack slots. Passed an array 3245 // of ideal registers called "sig" and a "length" count. Stack-slot 3246 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3247 // arguments for a CALLEE. Incoming stack arguments are 3248 // automatically biased by the preserve_stack_slots field above. 3249 calling_convention %{ 3250 // No difference between ingoing/outgoing just pass false 3251 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3252 %} 3253 3254 3255 // Body of function which returns an integer array locating 3256 // arguments either in registers or in stack slots. Passed an array 3257 // of ideal registers called "sig" and a "length" count. Stack-slot 3258 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3259 // arguments for a CALLEE. Incoming stack arguments are 3260 // automatically biased by the preserve_stack_slots field above. 3261 c_calling_convention %{ 3262 // This is obviously always outgoing 3263 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3264 %} 3265 3266 // Location of C & interpreter return values 3267 c_return_value %{ 3268 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3269 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3270 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3271 3272 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3273 // that C functions return float and double results in XMM0. 3274 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3275 return OptoRegPair(XMM0b_num,XMM0_num); 3276 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3277 return OptoRegPair(OptoReg::Bad,XMM0_num); 3278 3279 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3280 %} 3281 3282 // Location of return values 3283 return_value %{ 3284 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3285 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3286 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3287 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3288 return OptoRegPair(XMM0b_num,XMM0_num); 3289 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3290 return OptoRegPair(OptoReg::Bad,XMM0_num); 3291 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3292 %} 3293 3294 %} 3295 3296 //----------ATTRIBUTES--------------------------------------------------------- 3297 //----------Operand Attributes------------------------------------------------- 3298 op_attrib op_cost(0); // Required cost attribute 3299 3300 //----------Instruction Attributes--------------------------------------------- 3301 ins_attrib ins_cost(100); // Required cost attribute 3302 ins_attrib ins_size(8); // Required size attribute (in bits) 3303 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3304 // non-matching short branch variant of some 3305 // long branch? 3306 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3307 // specifies the alignment that some part of the instruction (not 3308 // necessarily the start) requires. If > 1, a compute_padding() 3309 // function must be provided for the instruction 3310 3311 //----------OPERANDS----------------------------------------------------------- 3312 // Operand definitions must precede instruction definitions for correct parsing 3313 // in the ADLC because operands constitute user defined types which are used in 3314 // instruction definitions. 3315 3316 //----------Simple Operands---------------------------------------------------- 3317 // Immediate Operands 3318 // Integer Immediate 3319 operand immI() %{ 3320 match(ConI); 3321 3322 op_cost(10); 3323 format %{ %} 3324 interface(CONST_INTER); 3325 %} 3326 3327 // Constant for test vs zero 3328 operand immI0() %{ 3329 predicate(n->get_int() == 0); 3330 match(ConI); 3331 3332 op_cost(0); 3333 format %{ %} 3334 interface(CONST_INTER); 3335 %} 3336 3337 // Constant for increment 3338 operand immI1() %{ 3339 predicate(n->get_int() == 1); 3340 match(ConI); 3341 3342 op_cost(0); 3343 format %{ %} 3344 interface(CONST_INTER); 3345 %} 3346 3347 // Constant for decrement 3348 operand immI_M1() %{ 3349 predicate(n->get_int() == -1); 3350 match(ConI); 3351 3352 op_cost(0); 3353 format %{ %} 3354 interface(CONST_INTER); 3355 %} 3356 3357 // Valid scale values for addressing modes 3358 operand immI2() %{ 3359 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3360 match(ConI); 3361 3362 format %{ %} 3363 interface(CONST_INTER); 3364 %} 3365 3366 operand immI8() %{ 3367 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3368 match(ConI); 3369 3370 op_cost(5); 3371 format %{ %} 3372 interface(CONST_INTER); 3373 %} 3374 3375 operand immI16() %{ 3376 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3377 match(ConI); 3378 3379 op_cost(10); 3380 format %{ %} 3381 interface(CONST_INTER); 3382 %} 3383 3384 // Int Immediate non-negative 3385 operand immU31() 3386 %{ 3387 predicate(n->get_int() >= 0); 3388 match(ConI); 3389 3390 op_cost(0); 3391 format %{ %} 3392 interface(CONST_INTER); 3393 %} 3394 3395 // Constant for long shifts 3396 operand immI_32() %{ 3397 predicate( n->get_int() == 32 ); 3398 match(ConI); 3399 3400 op_cost(0); 3401 format %{ %} 3402 interface(CONST_INTER); 3403 %} 3404 3405 operand immI_1_31() %{ 3406 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3407 match(ConI); 3408 3409 op_cost(0); 3410 format %{ %} 3411 interface(CONST_INTER); 3412 %} 3413 3414 operand immI_32_63() %{ 3415 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3416 match(ConI); 3417 op_cost(0); 3418 3419 format %{ %} 3420 interface(CONST_INTER); 3421 %} 3422 3423 operand immI_1() %{ 3424 predicate( n->get_int() == 1 ); 3425 match(ConI); 3426 3427 op_cost(0); 3428 format %{ %} 3429 interface(CONST_INTER); 3430 %} 3431 3432 operand immI_2() %{ 3433 predicate( n->get_int() == 2 ); 3434 match(ConI); 3435 3436 op_cost(0); 3437 format %{ %} 3438 interface(CONST_INTER); 3439 %} 3440 3441 operand immI_3() %{ 3442 predicate( n->get_int() == 3 ); 3443 match(ConI); 3444 3445 op_cost(0); 3446 format %{ %} 3447 interface(CONST_INTER); 3448 %} 3449 3450 // Pointer Immediate 3451 operand immP() %{ 3452 match(ConP); 3453 3454 op_cost(10); 3455 format %{ %} 3456 interface(CONST_INTER); 3457 %} 3458 3459 // NULL Pointer Immediate 3460 operand immP0() %{ 3461 predicate( n->get_ptr() == 0 ); 3462 match(ConP); 3463 op_cost(0); 3464 3465 format %{ %} 3466 interface(CONST_INTER); 3467 %} 3468 3469 // Long Immediate 3470 operand immL() %{ 3471 match(ConL); 3472 3473 op_cost(20); 3474 format %{ %} 3475 interface(CONST_INTER); 3476 %} 3477 3478 // Long Immediate zero 3479 operand immL0() %{ 3480 predicate( n->get_long() == 0L ); 3481 match(ConL); 3482 op_cost(0); 3483 3484 format %{ %} 3485 interface(CONST_INTER); 3486 %} 3487 3488 // Long Immediate zero 3489 operand immL_M1() %{ 3490 predicate( n->get_long() == -1L ); 3491 match(ConL); 3492 op_cost(0); 3493 3494 format %{ %} 3495 interface(CONST_INTER); 3496 %} 3497 3498 // Long immediate from 0 to 127. 3499 // Used for a shorter form of long mul by 10. 3500 operand immL_127() %{ 3501 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3502 match(ConL); 3503 op_cost(0); 3504 3505 format %{ %} 3506 interface(CONST_INTER); 3507 %} 3508 3509 // Long Immediate: low 32-bit mask 3510 operand immL_32bits() %{ 3511 predicate(n->get_long() == 0xFFFFFFFFL); 3512 match(ConL); 3513 op_cost(0); 3514 3515 format %{ %} 3516 interface(CONST_INTER); 3517 %} 3518 3519 // Long Immediate: low 32-bit mask 3520 operand immL32() %{ 3521 predicate(n->get_long() == (int)(n->get_long())); 3522 match(ConL); 3523 op_cost(20); 3524 3525 format %{ %} 3526 interface(CONST_INTER); 3527 %} 3528 3529 //Double Immediate zero 3530 operand immDPR0() %{ 3531 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3532 // bug that generates code such that NaNs compare equal to 0.0 3533 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3534 match(ConD); 3535 3536 op_cost(5); 3537 format %{ %} 3538 interface(CONST_INTER); 3539 %} 3540 3541 // Double Immediate one 3542 operand immDPR1() %{ 3543 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3544 match(ConD); 3545 3546 op_cost(5); 3547 format %{ %} 3548 interface(CONST_INTER); 3549 %} 3550 3551 // Double Immediate 3552 operand immDPR() %{ 3553 predicate(UseSSE<=1); 3554 match(ConD); 3555 3556 op_cost(5); 3557 format %{ %} 3558 interface(CONST_INTER); 3559 %} 3560 3561 operand immD() %{ 3562 predicate(UseSSE>=2); 3563 match(ConD); 3564 3565 op_cost(5); 3566 format %{ %} 3567 interface(CONST_INTER); 3568 %} 3569 3570 // Double Immediate zero 3571 operand immD0() %{ 3572 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3573 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3574 // compare equal to -0.0. 3575 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3576 match(ConD); 3577 3578 format %{ %} 3579 interface(CONST_INTER); 3580 %} 3581 3582 // Float Immediate zero 3583 operand immFPR0() %{ 3584 predicate(UseSSE == 0 && n->getf() == 0.0F); 3585 match(ConF); 3586 3587 op_cost(5); 3588 format %{ %} 3589 interface(CONST_INTER); 3590 %} 3591 3592 // Float Immediate one 3593 operand immFPR1() %{ 3594 predicate(UseSSE == 0 && n->getf() == 1.0F); 3595 match(ConF); 3596 3597 op_cost(5); 3598 format %{ %} 3599 interface(CONST_INTER); 3600 %} 3601 3602 // Float Immediate 3603 operand immFPR() %{ 3604 predicate( UseSSE == 0 ); 3605 match(ConF); 3606 3607 op_cost(5); 3608 format %{ %} 3609 interface(CONST_INTER); 3610 %} 3611 3612 // Float Immediate 3613 operand immF() %{ 3614 predicate(UseSSE >= 1); 3615 match(ConF); 3616 3617 op_cost(5); 3618 format %{ %} 3619 interface(CONST_INTER); 3620 %} 3621 3622 // Float Immediate zero. Zero and not -0.0 3623 operand immF0() %{ 3624 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3625 match(ConF); 3626 3627 op_cost(5); 3628 format %{ %} 3629 interface(CONST_INTER); 3630 %} 3631 3632 // Immediates for special shifts (sign extend) 3633 3634 // Constants for increment 3635 operand immI_16() %{ 3636 predicate( n->get_int() == 16 ); 3637 match(ConI); 3638 3639 format %{ %} 3640 interface(CONST_INTER); 3641 %} 3642 3643 operand immI_24() %{ 3644 predicate( n->get_int() == 24 ); 3645 match(ConI); 3646 3647 format %{ %} 3648 interface(CONST_INTER); 3649 %} 3650 3651 // Constant for byte-wide masking 3652 operand immI_255() %{ 3653 predicate( n->get_int() == 255 ); 3654 match(ConI); 3655 3656 format %{ %} 3657 interface(CONST_INTER); 3658 %} 3659 3660 // Constant for short-wide masking 3661 operand immI_65535() %{ 3662 predicate(n->get_int() == 65535); 3663 match(ConI); 3664 3665 format %{ %} 3666 interface(CONST_INTER); 3667 %} 3668 3669 // Register Operands 3670 // Integer Register 3671 operand rRegI() %{ 3672 constraint(ALLOC_IN_RC(int_reg)); 3673 match(RegI); 3674 match(xRegI); 3675 match(eAXRegI); 3676 match(eBXRegI); 3677 match(eCXRegI); 3678 match(eDXRegI); 3679 match(eDIRegI); 3680 match(eSIRegI); 3681 3682 format %{ %} 3683 interface(REG_INTER); 3684 %} 3685 3686 // Subset of Integer Register 3687 operand xRegI(rRegI reg) %{ 3688 constraint(ALLOC_IN_RC(int_x_reg)); 3689 match(reg); 3690 match(eAXRegI); 3691 match(eBXRegI); 3692 match(eCXRegI); 3693 match(eDXRegI); 3694 3695 format %{ %} 3696 interface(REG_INTER); 3697 %} 3698 3699 // Special Registers 3700 operand eAXRegI(xRegI reg) %{ 3701 constraint(ALLOC_IN_RC(eax_reg)); 3702 match(reg); 3703 match(rRegI); 3704 3705 format %{ "EAX" %} 3706 interface(REG_INTER); 3707 %} 3708 3709 // Special Registers 3710 operand eBXRegI(xRegI reg) %{ 3711 constraint(ALLOC_IN_RC(ebx_reg)); 3712 match(reg); 3713 match(rRegI); 3714 3715 format %{ "EBX" %} 3716 interface(REG_INTER); 3717 %} 3718 3719 operand eCXRegI(xRegI reg) %{ 3720 constraint(ALLOC_IN_RC(ecx_reg)); 3721 match(reg); 3722 match(rRegI); 3723 3724 format %{ "ECX" %} 3725 interface(REG_INTER); 3726 %} 3727 3728 operand eDXRegI(xRegI reg) %{ 3729 constraint(ALLOC_IN_RC(edx_reg)); 3730 match(reg); 3731 match(rRegI); 3732 3733 format %{ "EDX" %} 3734 interface(REG_INTER); 3735 %} 3736 3737 operand eDIRegI(xRegI reg) %{ 3738 constraint(ALLOC_IN_RC(edi_reg)); 3739 match(reg); 3740 match(rRegI); 3741 3742 format %{ "EDI" %} 3743 interface(REG_INTER); 3744 %} 3745 3746 operand naxRegI() %{ 3747 constraint(ALLOC_IN_RC(nax_reg)); 3748 match(RegI); 3749 match(eCXRegI); 3750 match(eDXRegI); 3751 match(eSIRegI); 3752 match(eDIRegI); 3753 3754 format %{ %} 3755 interface(REG_INTER); 3756 %} 3757 3758 operand nadxRegI() %{ 3759 constraint(ALLOC_IN_RC(nadx_reg)); 3760 match(RegI); 3761 match(eBXRegI); 3762 match(eCXRegI); 3763 match(eSIRegI); 3764 match(eDIRegI); 3765 3766 format %{ %} 3767 interface(REG_INTER); 3768 %} 3769 3770 operand ncxRegI() %{ 3771 constraint(ALLOC_IN_RC(ncx_reg)); 3772 match(RegI); 3773 match(eAXRegI); 3774 match(eDXRegI); 3775 match(eSIRegI); 3776 match(eDIRegI); 3777 3778 format %{ %} 3779 interface(REG_INTER); 3780 %} 3781 3782 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3783 // // 3784 operand eSIRegI(xRegI reg) %{ 3785 constraint(ALLOC_IN_RC(esi_reg)); 3786 match(reg); 3787 match(rRegI); 3788 3789 format %{ "ESI" %} 3790 interface(REG_INTER); 3791 %} 3792 3793 // Pointer Register 3794 operand anyRegP() %{ 3795 constraint(ALLOC_IN_RC(any_reg)); 3796 match(RegP); 3797 match(eAXRegP); 3798 match(eBXRegP); 3799 match(eCXRegP); 3800 match(eDIRegP); 3801 match(eRegP); 3802 3803 format %{ %} 3804 interface(REG_INTER); 3805 %} 3806 3807 operand eRegP() %{ 3808 constraint(ALLOC_IN_RC(int_reg)); 3809 match(RegP); 3810 match(eAXRegP); 3811 match(eBXRegP); 3812 match(eCXRegP); 3813 match(eDIRegP); 3814 3815 format %{ %} 3816 interface(REG_INTER); 3817 %} 3818 3819 // On windows95, EBP is not safe to use for implicit null tests. 3820 operand eRegP_no_EBP() %{ 3821 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3822 match(RegP); 3823 match(eAXRegP); 3824 match(eBXRegP); 3825 match(eCXRegP); 3826 match(eDIRegP); 3827 3828 op_cost(100); 3829 format %{ %} 3830 interface(REG_INTER); 3831 %} 3832 3833 operand naxRegP() %{ 3834 constraint(ALLOC_IN_RC(nax_reg)); 3835 match(RegP); 3836 match(eBXRegP); 3837 match(eDXRegP); 3838 match(eCXRegP); 3839 match(eSIRegP); 3840 match(eDIRegP); 3841 3842 format %{ %} 3843 interface(REG_INTER); 3844 %} 3845 3846 operand nabxRegP() %{ 3847 constraint(ALLOC_IN_RC(nabx_reg)); 3848 match(RegP); 3849 match(eCXRegP); 3850 match(eDXRegP); 3851 match(eSIRegP); 3852 match(eDIRegP); 3853 3854 format %{ %} 3855 interface(REG_INTER); 3856 %} 3857 3858 operand pRegP() %{ 3859 constraint(ALLOC_IN_RC(p_reg)); 3860 match(RegP); 3861 match(eBXRegP); 3862 match(eDXRegP); 3863 match(eSIRegP); 3864 match(eDIRegP); 3865 3866 format %{ %} 3867 interface(REG_INTER); 3868 %} 3869 3870 // Special Registers 3871 // Return a pointer value 3872 operand eAXRegP(eRegP reg) %{ 3873 constraint(ALLOC_IN_RC(eax_reg)); 3874 match(reg); 3875 format %{ "EAX" %} 3876 interface(REG_INTER); 3877 %} 3878 3879 // Used in AtomicAdd 3880 operand eBXRegP(eRegP reg) %{ 3881 constraint(ALLOC_IN_RC(ebx_reg)); 3882 match(reg); 3883 format %{ "EBX" %} 3884 interface(REG_INTER); 3885 %} 3886 3887 // Tail-call (interprocedural jump) to interpreter 3888 operand eCXRegP(eRegP reg) %{ 3889 constraint(ALLOC_IN_RC(ecx_reg)); 3890 match(reg); 3891 format %{ "ECX" %} 3892 interface(REG_INTER); 3893 %} 3894 3895 operand eSIRegP(eRegP reg) %{ 3896 constraint(ALLOC_IN_RC(esi_reg)); 3897 match(reg); 3898 format %{ "ESI" %} 3899 interface(REG_INTER); 3900 %} 3901 3902 // Used in rep stosw 3903 operand eDIRegP(eRegP reg) %{ 3904 constraint(ALLOC_IN_RC(edi_reg)); 3905 match(reg); 3906 format %{ "EDI" %} 3907 interface(REG_INTER); 3908 %} 3909 3910 operand eRegL() %{ 3911 constraint(ALLOC_IN_RC(long_reg)); 3912 match(RegL); 3913 match(eADXRegL); 3914 3915 format %{ %} 3916 interface(REG_INTER); 3917 %} 3918 3919 operand eADXRegL( eRegL reg ) %{ 3920 constraint(ALLOC_IN_RC(eadx_reg)); 3921 match(reg); 3922 3923 format %{ "EDX:EAX" %} 3924 interface(REG_INTER); 3925 %} 3926 3927 operand eBCXRegL( eRegL reg ) %{ 3928 constraint(ALLOC_IN_RC(ebcx_reg)); 3929 match(reg); 3930 3931 format %{ "EBX:ECX" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 // Special case for integer high multiply 3936 operand eADXRegL_low_only() %{ 3937 constraint(ALLOC_IN_RC(eadx_reg)); 3938 match(RegL); 3939 3940 format %{ "EAX" %} 3941 interface(REG_INTER); 3942 %} 3943 3944 // Flags register, used as output of compare instructions 3945 operand eFlagsReg() %{ 3946 constraint(ALLOC_IN_RC(int_flags)); 3947 match(RegFlags); 3948 3949 format %{ "EFLAGS" %} 3950 interface(REG_INTER); 3951 %} 3952 3953 // Flags register, used as output of FLOATING POINT compare instructions 3954 operand eFlagsRegU() %{ 3955 constraint(ALLOC_IN_RC(int_flags)); 3956 match(RegFlags); 3957 3958 format %{ "EFLAGS_U" %} 3959 interface(REG_INTER); 3960 %} 3961 3962 operand eFlagsRegUCF() %{ 3963 constraint(ALLOC_IN_RC(int_flags)); 3964 match(RegFlags); 3965 predicate(false); 3966 3967 format %{ "EFLAGS_U_CF" %} 3968 interface(REG_INTER); 3969 %} 3970 3971 // Condition Code Register used by long compare 3972 operand flagsReg_long_LTGE() %{ 3973 constraint(ALLOC_IN_RC(int_flags)); 3974 match(RegFlags); 3975 format %{ "FLAGS_LTGE" %} 3976 interface(REG_INTER); 3977 %} 3978 operand flagsReg_long_EQNE() %{ 3979 constraint(ALLOC_IN_RC(int_flags)); 3980 match(RegFlags); 3981 format %{ "FLAGS_EQNE" %} 3982 interface(REG_INTER); 3983 %} 3984 operand flagsReg_long_LEGT() %{ 3985 constraint(ALLOC_IN_RC(int_flags)); 3986 match(RegFlags); 3987 format %{ "FLAGS_LEGT" %} 3988 interface(REG_INTER); 3989 %} 3990 3991 // Float register operands 3992 operand regDPR() %{ 3993 predicate( UseSSE < 2 ); 3994 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3995 match(RegD); 3996 match(regDPR1); 3997 match(regDPR2); 3998 format %{ %} 3999 interface(REG_INTER); 4000 %} 4001 4002 operand regDPR1(regDPR reg) %{ 4003 predicate( UseSSE < 2 ); 4004 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4005 match(reg); 4006 format %{ "FPR1" %} 4007 interface(REG_INTER); 4008 %} 4009 4010 operand regDPR2(regDPR reg) %{ 4011 predicate( UseSSE < 2 ); 4012 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4013 match(reg); 4014 format %{ "FPR2" %} 4015 interface(REG_INTER); 4016 %} 4017 4018 operand regnotDPR1(regDPR reg) %{ 4019 predicate( UseSSE < 2 ); 4020 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4021 match(reg); 4022 format %{ %} 4023 interface(REG_INTER); 4024 %} 4025 4026 // Float register operands 4027 operand regFPR() %{ 4028 predicate( UseSSE < 2 ); 4029 constraint(ALLOC_IN_RC(fp_flt_reg)); 4030 match(RegF); 4031 match(regFPR1); 4032 format %{ %} 4033 interface(REG_INTER); 4034 %} 4035 4036 // Float register operands 4037 operand regFPR1(regFPR reg) %{ 4038 predicate( UseSSE < 2 ); 4039 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4040 match(reg); 4041 format %{ "FPR1" %} 4042 interface(REG_INTER); 4043 %} 4044 4045 // XMM Float register operands 4046 operand regF() %{ 4047 predicate( UseSSE>=1 ); 4048 constraint(ALLOC_IN_RC(float_reg_legacy)); 4049 match(RegF); 4050 format %{ %} 4051 interface(REG_INTER); 4052 %} 4053 4054 // XMM Double register operands 4055 operand regD() %{ 4056 predicate( UseSSE>=2 ); 4057 constraint(ALLOC_IN_RC(double_reg_legacy)); 4058 match(RegD); 4059 format %{ %} 4060 interface(REG_INTER); 4061 %} 4062 4063 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4064 // runtime code generation via reg_class_dynamic. 4065 operand vecS() %{ 4066 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4067 match(VecS); 4068 4069 format %{ %} 4070 interface(REG_INTER); 4071 %} 4072 4073 operand vecD() %{ 4074 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4075 match(VecD); 4076 4077 format %{ %} 4078 interface(REG_INTER); 4079 %} 4080 4081 operand vecX() %{ 4082 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4083 match(VecX); 4084 4085 format %{ %} 4086 interface(REG_INTER); 4087 %} 4088 4089 operand vecY() %{ 4090 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4091 match(VecY); 4092 4093 format %{ %} 4094 interface(REG_INTER); 4095 %} 4096 4097 //----------Memory Operands---------------------------------------------------- 4098 // Direct Memory Operand 4099 operand direct(immP addr) %{ 4100 match(addr); 4101 4102 format %{ "[$addr]" %} 4103 interface(MEMORY_INTER) %{ 4104 base(0xFFFFFFFF); 4105 index(0x4); 4106 scale(0x0); 4107 disp($addr); 4108 %} 4109 %} 4110 4111 // Indirect Memory Operand 4112 operand indirect(eRegP reg) %{ 4113 constraint(ALLOC_IN_RC(int_reg)); 4114 match(reg); 4115 4116 format %{ "[$reg]" %} 4117 interface(MEMORY_INTER) %{ 4118 base($reg); 4119 index(0x4); 4120 scale(0x0); 4121 disp(0x0); 4122 %} 4123 %} 4124 4125 // Indirect Memory Plus Short Offset Operand 4126 operand indOffset8(eRegP reg, immI8 off) %{ 4127 match(AddP reg off); 4128 4129 format %{ "[$reg + $off]" %} 4130 interface(MEMORY_INTER) %{ 4131 base($reg); 4132 index(0x4); 4133 scale(0x0); 4134 disp($off); 4135 %} 4136 %} 4137 4138 // Indirect Memory Plus Long Offset Operand 4139 operand indOffset32(eRegP reg, immI off) %{ 4140 match(AddP reg off); 4141 4142 format %{ "[$reg + $off]" %} 4143 interface(MEMORY_INTER) %{ 4144 base($reg); 4145 index(0x4); 4146 scale(0x0); 4147 disp($off); 4148 %} 4149 %} 4150 4151 // Indirect Memory Plus Long Offset Operand 4152 operand indOffset32X(rRegI reg, immP off) %{ 4153 match(AddP off reg); 4154 4155 format %{ "[$reg + $off]" %} 4156 interface(MEMORY_INTER) %{ 4157 base($reg); 4158 index(0x4); 4159 scale(0x0); 4160 disp($off); 4161 %} 4162 %} 4163 4164 // Indirect Memory Plus Index Register Plus Offset Operand 4165 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4166 match(AddP (AddP reg ireg) off); 4167 4168 op_cost(10); 4169 format %{"[$reg + $off + $ireg]" %} 4170 interface(MEMORY_INTER) %{ 4171 base($reg); 4172 index($ireg); 4173 scale(0x0); 4174 disp($off); 4175 %} 4176 %} 4177 4178 // Indirect Memory Plus Index Register Plus Offset Operand 4179 operand indIndex(eRegP reg, rRegI ireg) %{ 4180 match(AddP reg ireg); 4181 4182 op_cost(10); 4183 format %{"[$reg + $ireg]" %} 4184 interface(MEMORY_INTER) %{ 4185 base($reg); 4186 index($ireg); 4187 scale(0x0); 4188 disp(0x0); 4189 %} 4190 %} 4191 4192 // // ------------------------------------------------------------------------- 4193 // // 486 architecture doesn't support "scale * index + offset" with out a base 4194 // // ------------------------------------------------------------------------- 4195 // // Scaled Memory Operands 4196 // // Indirect Memory Times Scale Plus Offset Operand 4197 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4198 // match(AddP off (LShiftI ireg scale)); 4199 // 4200 // op_cost(10); 4201 // format %{"[$off + $ireg << $scale]" %} 4202 // interface(MEMORY_INTER) %{ 4203 // base(0x4); 4204 // index($ireg); 4205 // scale($scale); 4206 // disp($off); 4207 // %} 4208 // %} 4209 4210 // Indirect Memory Times Scale Plus Index Register 4211 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4212 match(AddP reg (LShiftI ireg scale)); 4213 4214 op_cost(10); 4215 format %{"[$reg + $ireg << $scale]" %} 4216 interface(MEMORY_INTER) %{ 4217 base($reg); 4218 index($ireg); 4219 scale($scale); 4220 disp(0x0); 4221 %} 4222 %} 4223 4224 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4225 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4226 match(AddP (AddP reg (LShiftI ireg scale)) off); 4227 4228 op_cost(10); 4229 format %{"[$reg + $off + $ireg << $scale]" %} 4230 interface(MEMORY_INTER) %{ 4231 base($reg); 4232 index($ireg); 4233 scale($scale); 4234 disp($off); 4235 %} 4236 %} 4237 4238 //----------Load Long Memory Operands------------------------------------------ 4239 // The load-long idiom will use it's address expression again after loading 4240 // the first word of the long. If the load-long destination overlaps with 4241 // registers used in the addressing expression, the 2nd half will be loaded 4242 // from a clobbered address. Fix this by requiring that load-long use 4243 // address registers that do not overlap with the load-long target. 4244 4245 // load-long support 4246 operand load_long_RegP() %{ 4247 constraint(ALLOC_IN_RC(esi_reg)); 4248 match(RegP); 4249 match(eSIRegP); 4250 op_cost(100); 4251 format %{ %} 4252 interface(REG_INTER); 4253 %} 4254 4255 // Indirect Memory Operand Long 4256 operand load_long_indirect(load_long_RegP reg) %{ 4257 constraint(ALLOC_IN_RC(esi_reg)); 4258 match(reg); 4259 4260 format %{ "[$reg]" %} 4261 interface(MEMORY_INTER) %{ 4262 base($reg); 4263 index(0x4); 4264 scale(0x0); 4265 disp(0x0); 4266 %} 4267 %} 4268 4269 // Indirect Memory Plus Long Offset Operand 4270 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4271 match(AddP reg off); 4272 4273 format %{ "[$reg + $off]" %} 4274 interface(MEMORY_INTER) %{ 4275 base($reg); 4276 index(0x4); 4277 scale(0x0); 4278 disp($off); 4279 %} 4280 %} 4281 4282 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4283 4284 4285 //----------Special Memory Operands-------------------------------------------- 4286 // Stack Slot Operand - This operand is used for loading and storing temporary 4287 // values on the stack where a match requires a value to 4288 // flow through memory. 4289 operand stackSlotP(sRegP reg) %{ 4290 constraint(ALLOC_IN_RC(stack_slots)); 4291 // No match rule because this operand is only generated in matching 4292 format %{ "[$reg]" %} 4293 interface(MEMORY_INTER) %{ 4294 base(0x4); // ESP 4295 index(0x4); // No Index 4296 scale(0x0); // No Scale 4297 disp($reg); // Stack Offset 4298 %} 4299 %} 4300 4301 operand stackSlotI(sRegI reg) %{ 4302 constraint(ALLOC_IN_RC(stack_slots)); 4303 // No match rule because this operand is only generated in matching 4304 format %{ "[$reg]" %} 4305 interface(MEMORY_INTER) %{ 4306 base(0x4); // ESP 4307 index(0x4); // No Index 4308 scale(0x0); // No Scale 4309 disp($reg); // Stack Offset 4310 %} 4311 %} 4312 4313 operand stackSlotF(sRegF reg) %{ 4314 constraint(ALLOC_IN_RC(stack_slots)); 4315 // No match rule because this operand is only generated in matching 4316 format %{ "[$reg]" %} 4317 interface(MEMORY_INTER) %{ 4318 base(0x4); // ESP 4319 index(0x4); // No Index 4320 scale(0x0); // No Scale 4321 disp($reg); // Stack Offset 4322 %} 4323 %} 4324 4325 operand stackSlotD(sRegD reg) %{ 4326 constraint(ALLOC_IN_RC(stack_slots)); 4327 // No match rule because this operand is only generated in matching 4328 format %{ "[$reg]" %} 4329 interface(MEMORY_INTER) %{ 4330 base(0x4); // ESP 4331 index(0x4); // No Index 4332 scale(0x0); // No Scale 4333 disp($reg); // Stack Offset 4334 %} 4335 %} 4336 4337 operand stackSlotL(sRegL reg) %{ 4338 constraint(ALLOC_IN_RC(stack_slots)); 4339 // No match rule because this operand is only generated in matching 4340 format %{ "[$reg]" %} 4341 interface(MEMORY_INTER) %{ 4342 base(0x4); // ESP 4343 index(0x4); // No Index 4344 scale(0x0); // No Scale 4345 disp($reg); // Stack Offset 4346 %} 4347 %} 4348 4349 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4350 // Indirect Memory Operand 4351 operand indirect_win95_safe(eRegP_no_EBP reg) 4352 %{ 4353 constraint(ALLOC_IN_RC(int_reg)); 4354 match(reg); 4355 4356 op_cost(100); 4357 format %{ "[$reg]" %} 4358 interface(MEMORY_INTER) %{ 4359 base($reg); 4360 index(0x4); 4361 scale(0x0); 4362 disp(0x0); 4363 %} 4364 %} 4365 4366 // Indirect Memory Plus Short Offset Operand 4367 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4368 %{ 4369 match(AddP reg off); 4370 4371 op_cost(100); 4372 format %{ "[$reg + $off]" %} 4373 interface(MEMORY_INTER) %{ 4374 base($reg); 4375 index(0x4); 4376 scale(0x0); 4377 disp($off); 4378 %} 4379 %} 4380 4381 // Indirect Memory Plus Long Offset Operand 4382 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4383 %{ 4384 match(AddP reg off); 4385 4386 op_cost(100); 4387 format %{ "[$reg + $off]" %} 4388 interface(MEMORY_INTER) %{ 4389 base($reg); 4390 index(0x4); 4391 scale(0x0); 4392 disp($off); 4393 %} 4394 %} 4395 4396 // Indirect Memory Plus Index Register Plus Offset Operand 4397 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4398 %{ 4399 match(AddP (AddP reg ireg) off); 4400 4401 op_cost(100); 4402 format %{"[$reg + $off + $ireg]" %} 4403 interface(MEMORY_INTER) %{ 4404 base($reg); 4405 index($ireg); 4406 scale(0x0); 4407 disp($off); 4408 %} 4409 %} 4410 4411 // Indirect Memory Times Scale Plus Index Register 4412 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4413 %{ 4414 match(AddP reg (LShiftI ireg scale)); 4415 4416 op_cost(100); 4417 format %{"[$reg + $ireg << $scale]" %} 4418 interface(MEMORY_INTER) %{ 4419 base($reg); 4420 index($ireg); 4421 scale($scale); 4422 disp(0x0); 4423 %} 4424 %} 4425 4426 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4427 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4428 %{ 4429 match(AddP (AddP reg (LShiftI ireg scale)) off); 4430 4431 op_cost(100); 4432 format %{"[$reg + $off + $ireg << $scale]" %} 4433 interface(MEMORY_INTER) %{ 4434 base($reg); 4435 index($ireg); 4436 scale($scale); 4437 disp($off); 4438 %} 4439 %} 4440 4441 //----------Conditional Branch Operands---------------------------------------- 4442 // Comparison Op - This is the operation of the comparison, and is limited to 4443 // the following set of codes: 4444 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4445 // 4446 // Other attributes of the comparison, such as unsignedness, are specified 4447 // by the comparison instruction that sets a condition code flags register. 4448 // That result is represented by a flags operand whose subtype is appropriate 4449 // to the unsignedness (etc.) of the comparison. 4450 // 4451 // Later, the instruction which matches both the Comparison Op (a Bool) and 4452 // the flags (produced by the Cmp) specifies the coding of the comparison op 4453 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4454 4455 // Comparision Code 4456 operand cmpOp() %{ 4457 match(Bool); 4458 4459 format %{ "" %} 4460 interface(COND_INTER) %{ 4461 equal(0x4, "e"); 4462 not_equal(0x5, "ne"); 4463 less(0xC, "l"); 4464 greater_equal(0xD, "ge"); 4465 less_equal(0xE, "le"); 4466 greater(0xF, "g"); 4467 overflow(0x0, "o"); 4468 no_overflow(0x1, "no"); 4469 %} 4470 %} 4471 4472 // Comparison Code, unsigned compare. Used by FP also, with 4473 // C2 (unordered) turned into GT or LT already. The other bits 4474 // C0 and C3 are turned into Carry & Zero flags. 4475 operand cmpOpU() %{ 4476 match(Bool); 4477 4478 format %{ "" %} 4479 interface(COND_INTER) %{ 4480 equal(0x4, "e"); 4481 not_equal(0x5, "ne"); 4482 less(0x2, "b"); 4483 greater_equal(0x3, "nb"); 4484 less_equal(0x6, "be"); 4485 greater(0x7, "nbe"); 4486 overflow(0x0, "o"); 4487 no_overflow(0x1, "no"); 4488 %} 4489 %} 4490 4491 // Floating comparisons that don't require any fixup for the unordered case 4492 operand cmpOpUCF() %{ 4493 match(Bool); 4494 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4495 n->as_Bool()->_test._test == BoolTest::ge || 4496 n->as_Bool()->_test._test == BoolTest::le || 4497 n->as_Bool()->_test._test == BoolTest::gt); 4498 format %{ "" %} 4499 interface(COND_INTER) %{ 4500 equal(0x4, "e"); 4501 not_equal(0x5, "ne"); 4502 less(0x2, "b"); 4503 greater_equal(0x3, "nb"); 4504 less_equal(0x6, "be"); 4505 greater(0x7, "nbe"); 4506 overflow(0x0, "o"); 4507 no_overflow(0x1, "no"); 4508 %} 4509 %} 4510 4511 4512 // Floating comparisons that can be fixed up with extra conditional jumps 4513 operand cmpOpUCF2() %{ 4514 match(Bool); 4515 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4516 n->as_Bool()->_test._test == BoolTest::eq); 4517 format %{ "" %} 4518 interface(COND_INTER) %{ 4519 equal(0x4, "e"); 4520 not_equal(0x5, "ne"); 4521 less(0x2, "b"); 4522 greater_equal(0x3, "nb"); 4523 less_equal(0x6, "be"); 4524 greater(0x7, "nbe"); 4525 overflow(0x0, "o"); 4526 no_overflow(0x1, "no"); 4527 %} 4528 %} 4529 4530 // Comparison Code for FP conditional move 4531 operand cmpOp_fcmov() %{ 4532 match(Bool); 4533 4534 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4535 n->as_Bool()->_test._test != BoolTest::no_overflow); 4536 format %{ "" %} 4537 interface(COND_INTER) %{ 4538 equal (0x0C8); 4539 not_equal (0x1C8); 4540 less (0x0C0); 4541 greater_equal(0x1C0); 4542 less_equal (0x0D0); 4543 greater (0x1D0); 4544 overflow(0x0, "o"); // not really supported by the instruction 4545 no_overflow(0x1, "no"); // not really supported by the instruction 4546 %} 4547 %} 4548 4549 // Comparision Code used in long compares 4550 operand cmpOp_commute() %{ 4551 match(Bool); 4552 4553 format %{ "" %} 4554 interface(COND_INTER) %{ 4555 equal(0x4, "e"); 4556 not_equal(0x5, "ne"); 4557 less(0xF, "g"); 4558 greater_equal(0xE, "le"); 4559 less_equal(0xD, "ge"); 4560 greater(0xC, "l"); 4561 overflow(0x0, "o"); 4562 no_overflow(0x1, "no"); 4563 %} 4564 %} 4565 4566 //----------OPERAND CLASSES---------------------------------------------------- 4567 // Operand Classes are groups of operands that are used as to simplify 4568 // instruction definitions by not requiring the AD writer to specify separate 4569 // instructions for every form of operand when the instruction accepts 4570 // multiple operand types with the same basic encoding and format. The classic 4571 // case of this is memory operands. 4572 4573 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4574 indIndex, indIndexScale, indIndexScaleOffset); 4575 4576 // Long memory operations are encoded in 2 instructions and a +4 offset. 4577 // This means some kind of offset is always required and you cannot use 4578 // an oop as the offset (done when working on static globals). 4579 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4580 indIndex, indIndexScale, indIndexScaleOffset); 4581 4582 4583 //----------PIPELINE----------------------------------------------------------- 4584 // Rules which define the behavior of the target architectures pipeline. 4585 pipeline %{ 4586 4587 //----------ATTRIBUTES--------------------------------------------------------- 4588 attributes %{ 4589 variable_size_instructions; // Fixed size instructions 4590 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4591 instruction_unit_size = 1; // An instruction is 1 bytes long 4592 instruction_fetch_unit_size = 16; // The processor fetches one line 4593 instruction_fetch_units = 1; // of 16 bytes 4594 4595 // List of nop instructions 4596 nops( MachNop ); 4597 %} 4598 4599 //----------RESOURCES---------------------------------------------------------- 4600 // Resources are the functional units available to the machine 4601 4602 // Generic P2/P3 pipeline 4603 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4604 // 3 instructions decoded per cycle. 4605 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4606 // 2 ALU op, only ALU0 handles mul/div instructions. 4607 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4608 MS0, MS1, MEM = MS0 | MS1, 4609 BR, FPU, 4610 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4611 4612 //----------PIPELINE DESCRIPTION----------------------------------------------- 4613 // Pipeline Description specifies the stages in the machine's pipeline 4614 4615 // Generic P2/P3 pipeline 4616 pipe_desc(S0, S1, S2, S3, S4, S5); 4617 4618 //----------PIPELINE CLASSES--------------------------------------------------- 4619 // Pipeline Classes describe the stages in which input and output are 4620 // referenced by the hardware pipeline. 4621 4622 // Naming convention: ialu or fpu 4623 // Then: _reg 4624 // Then: _reg if there is a 2nd register 4625 // Then: _long if it's a pair of instructions implementing a long 4626 // Then: _fat if it requires the big decoder 4627 // Or: _mem if it requires the big decoder and a memory unit. 4628 4629 // Integer ALU reg operation 4630 pipe_class ialu_reg(rRegI dst) %{ 4631 single_instruction; 4632 dst : S4(write); 4633 dst : S3(read); 4634 DECODE : S0; // any decoder 4635 ALU : S3; // any alu 4636 %} 4637 4638 // Long ALU reg operation 4639 pipe_class ialu_reg_long(eRegL dst) %{ 4640 instruction_count(2); 4641 dst : S4(write); 4642 dst : S3(read); 4643 DECODE : S0(2); // any 2 decoders 4644 ALU : S3(2); // both alus 4645 %} 4646 4647 // Integer ALU reg operation using big decoder 4648 pipe_class ialu_reg_fat(rRegI dst) %{ 4649 single_instruction; 4650 dst : S4(write); 4651 dst : S3(read); 4652 D0 : S0; // big decoder only 4653 ALU : S3; // any alu 4654 %} 4655 4656 // Long ALU reg operation using big decoder 4657 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4658 instruction_count(2); 4659 dst : S4(write); 4660 dst : S3(read); 4661 D0 : S0(2); // big decoder only; twice 4662 ALU : S3(2); // any 2 alus 4663 %} 4664 4665 // Integer ALU reg-reg operation 4666 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4667 single_instruction; 4668 dst : S4(write); 4669 src : S3(read); 4670 DECODE : S0; // any decoder 4671 ALU : S3; // any alu 4672 %} 4673 4674 // Long ALU reg-reg operation 4675 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4676 instruction_count(2); 4677 dst : S4(write); 4678 src : S3(read); 4679 DECODE : S0(2); // any 2 decoders 4680 ALU : S3(2); // both alus 4681 %} 4682 4683 // Integer ALU reg-reg operation 4684 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4685 single_instruction; 4686 dst : S4(write); 4687 src : S3(read); 4688 D0 : S0; // big decoder only 4689 ALU : S3; // any alu 4690 %} 4691 4692 // Long ALU reg-reg operation 4693 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4694 instruction_count(2); 4695 dst : S4(write); 4696 src : S3(read); 4697 D0 : S0(2); // big decoder only; twice 4698 ALU : S3(2); // both alus 4699 %} 4700 4701 // Integer ALU reg-mem operation 4702 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4703 single_instruction; 4704 dst : S5(write); 4705 mem : S3(read); 4706 D0 : S0; // big decoder only 4707 ALU : S4; // any alu 4708 MEM : S3; // any mem 4709 %} 4710 4711 // Long ALU reg-mem operation 4712 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4713 instruction_count(2); 4714 dst : S5(write); 4715 mem : S3(read); 4716 D0 : S0(2); // big decoder only; twice 4717 ALU : S4(2); // any 2 alus 4718 MEM : S3(2); // both mems 4719 %} 4720 4721 // Integer mem operation (prefetch) 4722 pipe_class ialu_mem(memory mem) 4723 %{ 4724 single_instruction; 4725 mem : S3(read); 4726 D0 : S0; // big decoder only 4727 MEM : S3; // any mem 4728 %} 4729 4730 // Integer Store to Memory 4731 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4732 single_instruction; 4733 mem : S3(read); 4734 src : S5(read); 4735 D0 : S0; // big decoder only 4736 ALU : S4; // any alu 4737 MEM : S3; 4738 %} 4739 4740 // Long Store to Memory 4741 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4742 instruction_count(2); 4743 mem : S3(read); 4744 src : S5(read); 4745 D0 : S0(2); // big decoder only; twice 4746 ALU : S4(2); // any 2 alus 4747 MEM : S3(2); // Both mems 4748 %} 4749 4750 // Integer Store to Memory 4751 pipe_class ialu_mem_imm(memory mem) %{ 4752 single_instruction; 4753 mem : S3(read); 4754 D0 : S0; // big decoder only 4755 ALU : S4; // any alu 4756 MEM : S3; 4757 %} 4758 4759 // Integer ALU0 reg-reg operation 4760 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4761 single_instruction; 4762 dst : S4(write); 4763 src : S3(read); 4764 D0 : S0; // Big decoder only 4765 ALU0 : S3; // only alu0 4766 %} 4767 4768 // Integer ALU0 reg-mem operation 4769 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4770 single_instruction; 4771 dst : S5(write); 4772 mem : S3(read); 4773 D0 : S0; // big decoder only 4774 ALU0 : S4; // ALU0 only 4775 MEM : S3; // any mem 4776 %} 4777 4778 // Integer ALU reg-reg operation 4779 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4780 single_instruction; 4781 cr : S4(write); 4782 src1 : S3(read); 4783 src2 : S3(read); 4784 DECODE : S0; // any decoder 4785 ALU : S3; // any alu 4786 %} 4787 4788 // Integer ALU reg-imm operation 4789 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4790 single_instruction; 4791 cr : S4(write); 4792 src1 : S3(read); 4793 DECODE : S0; // any decoder 4794 ALU : S3; // any alu 4795 %} 4796 4797 // Integer ALU reg-mem operation 4798 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4799 single_instruction; 4800 cr : S4(write); 4801 src1 : S3(read); 4802 src2 : S3(read); 4803 D0 : S0; // big decoder only 4804 ALU : S4; // any alu 4805 MEM : S3; 4806 %} 4807 4808 // Conditional move reg-reg 4809 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4810 instruction_count(4); 4811 y : S4(read); 4812 q : S3(read); 4813 p : S3(read); 4814 DECODE : S0(4); // any decoder 4815 %} 4816 4817 // Conditional move reg-reg 4818 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4819 single_instruction; 4820 dst : S4(write); 4821 src : S3(read); 4822 cr : S3(read); 4823 DECODE : S0; // any decoder 4824 %} 4825 4826 // Conditional move reg-mem 4827 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4828 single_instruction; 4829 dst : S4(write); 4830 src : S3(read); 4831 cr : S3(read); 4832 DECODE : S0; // any decoder 4833 MEM : S3; 4834 %} 4835 4836 // Conditional move reg-reg long 4837 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4838 single_instruction; 4839 dst : S4(write); 4840 src : S3(read); 4841 cr : S3(read); 4842 DECODE : S0(2); // any 2 decoders 4843 %} 4844 4845 // Conditional move double reg-reg 4846 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4847 single_instruction; 4848 dst : S4(write); 4849 src : S3(read); 4850 cr : S3(read); 4851 DECODE : S0; // any decoder 4852 %} 4853 4854 // Float reg-reg operation 4855 pipe_class fpu_reg(regDPR dst) %{ 4856 instruction_count(2); 4857 dst : S3(read); 4858 DECODE : S0(2); // any 2 decoders 4859 FPU : S3; 4860 %} 4861 4862 // Float reg-reg operation 4863 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4864 instruction_count(2); 4865 dst : S4(write); 4866 src : S3(read); 4867 DECODE : S0(2); // any 2 decoders 4868 FPU : S3; 4869 %} 4870 4871 // Float reg-reg operation 4872 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4873 instruction_count(3); 4874 dst : S4(write); 4875 src1 : S3(read); 4876 src2 : S3(read); 4877 DECODE : S0(3); // any 3 decoders 4878 FPU : S3(2); 4879 %} 4880 4881 // Float reg-reg operation 4882 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4883 instruction_count(4); 4884 dst : S4(write); 4885 src1 : S3(read); 4886 src2 : S3(read); 4887 src3 : S3(read); 4888 DECODE : S0(4); // any 3 decoders 4889 FPU : S3(2); 4890 %} 4891 4892 // Float reg-reg operation 4893 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4894 instruction_count(4); 4895 dst : S4(write); 4896 src1 : S3(read); 4897 src2 : S3(read); 4898 src3 : S3(read); 4899 DECODE : S1(3); // any 3 decoders 4900 D0 : S0; // Big decoder only 4901 FPU : S3(2); 4902 MEM : S3; 4903 %} 4904 4905 // Float reg-mem operation 4906 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4907 instruction_count(2); 4908 dst : S5(write); 4909 mem : S3(read); 4910 D0 : S0; // big decoder only 4911 DECODE : S1; // any decoder for FPU POP 4912 FPU : S4; 4913 MEM : S3; // any mem 4914 %} 4915 4916 // Float reg-mem operation 4917 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4918 instruction_count(3); 4919 dst : S5(write); 4920 src1 : S3(read); 4921 mem : S3(read); 4922 D0 : S0; // big decoder only 4923 DECODE : S1(2); // any decoder for FPU POP 4924 FPU : S4; 4925 MEM : S3; // any mem 4926 %} 4927 4928 // Float mem-reg operation 4929 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4930 instruction_count(2); 4931 src : S5(read); 4932 mem : S3(read); 4933 DECODE : S0; // any decoder for FPU PUSH 4934 D0 : S1; // big decoder only 4935 FPU : S4; 4936 MEM : S3; // any mem 4937 %} 4938 4939 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4940 instruction_count(3); 4941 src1 : S3(read); 4942 src2 : S3(read); 4943 mem : S3(read); 4944 DECODE : S0(2); // any decoder for FPU PUSH 4945 D0 : S1; // big decoder only 4946 FPU : S4; 4947 MEM : S3; // any mem 4948 %} 4949 4950 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4951 instruction_count(3); 4952 src1 : S3(read); 4953 src2 : S3(read); 4954 mem : S4(read); 4955 DECODE : S0; // any decoder for FPU PUSH 4956 D0 : S0(2); // big decoder only 4957 FPU : S4; 4958 MEM : S3(2); // any mem 4959 %} 4960 4961 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4962 instruction_count(2); 4963 src1 : S3(read); 4964 dst : S4(read); 4965 D0 : S0(2); // big decoder only 4966 MEM : S3(2); // any mem 4967 %} 4968 4969 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4970 instruction_count(3); 4971 src1 : S3(read); 4972 src2 : S3(read); 4973 dst : S4(read); 4974 D0 : S0(3); // big decoder only 4975 FPU : S4; 4976 MEM : S3(3); // any mem 4977 %} 4978 4979 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4980 instruction_count(3); 4981 src1 : S4(read); 4982 mem : S4(read); 4983 DECODE : S0; // any decoder for FPU PUSH 4984 D0 : S0(2); // big decoder only 4985 FPU : S4; 4986 MEM : S3(2); // any mem 4987 %} 4988 4989 // Float load constant 4990 pipe_class fpu_reg_con(regDPR dst) %{ 4991 instruction_count(2); 4992 dst : S5(write); 4993 D0 : S0; // big decoder only for the load 4994 DECODE : S1; // any decoder for FPU POP 4995 FPU : S4; 4996 MEM : S3; // any mem 4997 %} 4998 4999 // Float load constant 5000 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5001 instruction_count(3); 5002 dst : S5(write); 5003 src : S3(read); 5004 D0 : S0; // big decoder only for the load 5005 DECODE : S1(2); // any decoder for FPU POP 5006 FPU : S4; 5007 MEM : S3; // any mem 5008 %} 5009 5010 // UnConditional branch 5011 pipe_class pipe_jmp( label labl ) %{ 5012 single_instruction; 5013 BR : S3; 5014 %} 5015 5016 // Conditional branch 5017 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5018 single_instruction; 5019 cr : S1(read); 5020 BR : S3; 5021 %} 5022 5023 // Allocation idiom 5024 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5025 instruction_count(1); force_serialization; 5026 fixed_latency(6); 5027 heap_ptr : S3(read); 5028 DECODE : S0(3); 5029 D0 : S2; 5030 MEM : S3; 5031 ALU : S3(2); 5032 dst : S5(write); 5033 BR : S5; 5034 %} 5035 5036 // Generic big/slow expanded idiom 5037 pipe_class pipe_slow( ) %{ 5038 instruction_count(10); multiple_bundles; force_serialization; 5039 fixed_latency(100); 5040 D0 : S0(2); 5041 MEM : S3(2); 5042 %} 5043 5044 // The real do-nothing guy 5045 pipe_class empty( ) %{ 5046 instruction_count(0); 5047 %} 5048 5049 // Define the class for the Nop node 5050 define %{ 5051 MachNop = empty; 5052 %} 5053 5054 %} 5055 5056 //----------INSTRUCTIONS------------------------------------------------------- 5057 // 5058 // match -- States which machine-independent subtree may be replaced 5059 // by this instruction. 5060 // ins_cost -- The estimated cost of this instruction is used by instruction 5061 // selection to identify a minimum cost tree of machine 5062 // instructions that matches a tree of machine-independent 5063 // instructions. 5064 // format -- A string providing the disassembly for this instruction. 5065 // The value of an instruction's operand may be inserted 5066 // by referring to it with a '$' prefix. 5067 // opcode -- Three instruction opcodes may be provided. These are referred 5068 // to within an encode class as $primary, $secondary, and $tertiary 5069 // respectively. The primary opcode is commonly used to 5070 // indicate the type of machine instruction, while secondary 5071 // and tertiary are often used for prefix options or addressing 5072 // modes. 5073 // ins_encode -- A list of encode classes with parameters. The encode class 5074 // name must have been defined in an 'enc_class' specification 5075 // in the encode section of the architecture description. 5076 5077 //----------BSWAP-Instruction-------------------------------------------------- 5078 instruct bytes_reverse_int(rRegI dst) %{ 5079 match(Set dst (ReverseBytesI dst)); 5080 5081 format %{ "BSWAP $dst" %} 5082 opcode(0x0F, 0xC8); 5083 ins_encode( OpcP, OpcSReg(dst) ); 5084 ins_pipe( ialu_reg ); 5085 %} 5086 5087 instruct bytes_reverse_long(eRegL dst) %{ 5088 match(Set dst (ReverseBytesL dst)); 5089 5090 format %{ "BSWAP $dst.lo\n\t" 5091 "BSWAP $dst.hi\n\t" 5092 "XCHG $dst.lo $dst.hi" %} 5093 5094 ins_cost(125); 5095 ins_encode( bswap_long_bytes(dst) ); 5096 ins_pipe( ialu_reg_reg); 5097 %} 5098 5099 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5100 match(Set dst (ReverseBytesUS dst)); 5101 effect(KILL cr); 5102 5103 format %{ "BSWAP $dst\n\t" 5104 "SHR $dst,16\n\t" %} 5105 ins_encode %{ 5106 __ bswapl($dst$$Register); 5107 __ shrl($dst$$Register, 16); 5108 %} 5109 ins_pipe( ialu_reg ); 5110 %} 5111 5112 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5113 match(Set dst (ReverseBytesS dst)); 5114 effect(KILL cr); 5115 5116 format %{ "BSWAP $dst\n\t" 5117 "SAR $dst,16\n\t" %} 5118 ins_encode %{ 5119 __ bswapl($dst$$Register); 5120 __ sarl($dst$$Register, 16); 5121 %} 5122 ins_pipe( ialu_reg ); 5123 %} 5124 5125 5126 //---------- Zeros Count Instructions ------------------------------------------ 5127 5128 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5129 predicate(UseCountLeadingZerosInstruction); 5130 match(Set dst (CountLeadingZerosI src)); 5131 effect(KILL cr); 5132 5133 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5134 ins_encode %{ 5135 __ lzcntl($dst$$Register, $src$$Register); 5136 %} 5137 ins_pipe(ialu_reg); 5138 %} 5139 5140 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5141 predicate(!UseCountLeadingZerosInstruction); 5142 match(Set dst (CountLeadingZerosI src)); 5143 effect(KILL cr); 5144 5145 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5146 "JNZ skip\n\t" 5147 "MOV $dst, -1\n" 5148 "skip:\n\t" 5149 "NEG $dst\n\t" 5150 "ADD $dst, 31" %} 5151 ins_encode %{ 5152 Register Rdst = $dst$$Register; 5153 Register Rsrc = $src$$Register; 5154 Label skip; 5155 __ bsrl(Rdst, Rsrc); 5156 __ jccb(Assembler::notZero, skip); 5157 __ movl(Rdst, -1); 5158 __ bind(skip); 5159 __ negl(Rdst); 5160 __ addl(Rdst, BitsPerInt - 1); 5161 %} 5162 ins_pipe(ialu_reg); 5163 %} 5164 5165 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5166 predicate(UseCountLeadingZerosInstruction); 5167 match(Set dst (CountLeadingZerosL src)); 5168 effect(TEMP dst, KILL cr); 5169 5170 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5171 "JNC done\n\t" 5172 "LZCNT $dst, $src.lo\n\t" 5173 "ADD $dst, 32\n" 5174 "done:" %} 5175 ins_encode %{ 5176 Register Rdst = $dst$$Register; 5177 Register Rsrc = $src$$Register; 5178 Label done; 5179 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5180 __ jccb(Assembler::carryClear, done); 5181 __ lzcntl(Rdst, Rsrc); 5182 __ addl(Rdst, BitsPerInt); 5183 __ bind(done); 5184 %} 5185 ins_pipe(ialu_reg); 5186 %} 5187 5188 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5189 predicate(!UseCountLeadingZerosInstruction); 5190 match(Set dst (CountLeadingZerosL src)); 5191 effect(TEMP dst, KILL cr); 5192 5193 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5194 "JZ msw_is_zero\n\t" 5195 "ADD $dst, 32\n\t" 5196 "JMP not_zero\n" 5197 "msw_is_zero:\n\t" 5198 "BSR $dst, $src.lo\n\t" 5199 "JNZ not_zero\n\t" 5200 "MOV $dst, -1\n" 5201 "not_zero:\n\t" 5202 "NEG $dst\n\t" 5203 "ADD $dst, 63\n" %} 5204 ins_encode %{ 5205 Register Rdst = $dst$$Register; 5206 Register Rsrc = $src$$Register; 5207 Label msw_is_zero; 5208 Label not_zero; 5209 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5210 __ jccb(Assembler::zero, msw_is_zero); 5211 __ addl(Rdst, BitsPerInt); 5212 __ jmpb(not_zero); 5213 __ bind(msw_is_zero); 5214 __ bsrl(Rdst, Rsrc); 5215 __ jccb(Assembler::notZero, not_zero); 5216 __ movl(Rdst, -1); 5217 __ bind(not_zero); 5218 __ negl(Rdst); 5219 __ addl(Rdst, BitsPerLong - 1); 5220 %} 5221 ins_pipe(ialu_reg); 5222 %} 5223 5224 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5225 predicate(UseCountTrailingZerosInstruction); 5226 match(Set dst (CountTrailingZerosI src)); 5227 effect(KILL cr); 5228 5229 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5230 ins_encode %{ 5231 __ tzcntl($dst$$Register, $src$$Register); 5232 %} 5233 ins_pipe(ialu_reg); 5234 %} 5235 5236 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5237 predicate(!UseCountTrailingZerosInstruction); 5238 match(Set dst (CountTrailingZerosI src)); 5239 effect(KILL cr); 5240 5241 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5242 "JNZ done\n\t" 5243 "MOV $dst, 32\n" 5244 "done:" %} 5245 ins_encode %{ 5246 Register Rdst = $dst$$Register; 5247 Label done; 5248 __ bsfl(Rdst, $src$$Register); 5249 __ jccb(Assembler::notZero, done); 5250 __ movl(Rdst, BitsPerInt); 5251 __ bind(done); 5252 %} 5253 ins_pipe(ialu_reg); 5254 %} 5255 5256 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5257 predicate(UseCountTrailingZerosInstruction); 5258 match(Set dst (CountTrailingZerosL src)); 5259 effect(TEMP dst, KILL cr); 5260 5261 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5262 "JNC done\n\t" 5263 "TZCNT $dst, $src.hi\n\t" 5264 "ADD $dst, 32\n" 5265 "done:" %} 5266 ins_encode %{ 5267 Register Rdst = $dst$$Register; 5268 Register Rsrc = $src$$Register; 5269 Label done; 5270 __ tzcntl(Rdst, Rsrc); 5271 __ jccb(Assembler::carryClear, done); 5272 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5273 __ addl(Rdst, BitsPerInt); 5274 __ bind(done); 5275 %} 5276 ins_pipe(ialu_reg); 5277 %} 5278 5279 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5280 predicate(!UseCountTrailingZerosInstruction); 5281 match(Set dst (CountTrailingZerosL src)); 5282 effect(TEMP dst, KILL cr); 5283 5284 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5285 "JNZ done\n\t" 5286 "BSF $dst, $src.hi\n\t" 5287 "JNZ msw_not_zero\n\t" 5288 "MOV $dst, 32\n" 5289 "msw_not_zero:\n\t" 5290 "ADD $dst, 32\n" 5291 "done:" %} 5292 ins_encode %{ 5293 Register Rdst = $dst$$Register; 5294 Register Rsrc = $src$$Register; 5295 Label msw_not_zero; 5296 Label done; 5297 __ bsfl(Rdst, Rsrc); 5298 __ jccb(Assembler::notZero, done); 5299 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5300 __ jccb(Assembler::notZero, msw_not_zero); 5301 __ movl(Rdst, BitsPerInt); 5302 __ bind(msw_not_zero); 5303 __ addl(Rdst, BitsPerInt); 5304 __ bind(done); 5305 %} 5306 ins_pipe(ialu_reg); 5307 %} 5308 5309 5310 //---------- Population Count Instructions ------------------------------------- 5311 5312 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5313 predicate(UsePopCountInstruction); 5314 match(Set dst (PopCountI src)); 5315 effect(KILL cr); 5316 5317 format %{ "POPCNT $dst, $src" %} 5318 ins_encode %{ 5319 __ popcntl($dst$$Register, $src$$Register); 5320 %} 5321 ins_pipe(ialu_reg); 5322 %} 5323 5324 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5325 predicate(UsePopCountInstruction); 5326 match(Set dst (PopCountI (LoadI mem))); 5327 effect(KILL cr); 5328 5329 format %{ "POPCNT $dst, $mem" %} 5330 ins_encode %{ 5331 __ popcntl($dst$$Register, $mem$$Address); 5332 %} 5333 ins_pipe(ialu_reg); 5334 %} 5335 5336 // Note: Long.bitCount(long) returns an int. 5337 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5338 predicate(UsePopCountInstruction); 5339 match(Set dst (PopCountL src)); 5340 effect(KILL cr, TEMP tmp, TEMP dst); 5341 5342 format %{ "POPCNT $dst, $src.lo\n\t" 5343 "POPCNT $tmp, $src.hi\n\t" 5344 "ADD $dst, $tmp" %} 5345 ins_encode %{ 5346 __ popcntl($dst$$Register, $src$$Register); 5347 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5348 __ addl($dst$$Register, $tmp$$Register); 5349 %} 5350 ins_pipe(ialu_reg); 5351 %} 5352 5353 // Note: Long.bitCount(long) returns an int. 5354 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5355 predicate(UsePopCountInstruction); 5356 match(Set dst (PopCountL (LoadL mem))); 5357 effect(KILL cr, TEMP tmp, TEMP dst); 5358 5359 format %{ "POPCNT $dst, $mem\n\t" 5360 "POPCNT $tmp, $mem+4\n\t" 5361 "ADD $dst, $tmp" %} 5362 ins_encode %{ 5363 //__ popcntl($dst$$Register, $mem$$Address$$first); 5364 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5365 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5366 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5367 __ addl($dst$$Register, $tmp$$Register); 5368 %} 5369 ins_pipe(ialu_reg); 5370 %} 5371 5372 5373 //----------Load/Store/Move Instructions--------------------------------------- 5374 //----------Load Instructions-------------------------------------------------- 5375 // Load Byte (8bit signed) 5376 instruct loadB(xRegI dst, memory mem) %{ 5377 match(Set dst (LoadB mem)); 5378 5379 ins_cost(125); 5380 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5381 5382 ins_encode %{ 5383 __ movsbl($dst$$Register, $mem$$Address); 5384 %} 5385 5386 ins_pipe(ialu_reg_mem); 5387 %} 5388 5389 // Load Byte (8bit signed) into Long Register 5390 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5391 match(Set dst (ConvI2L (LoadB mem))); 5392 effect(KILL cr); 5393 5394 ins_cost(375); 5395 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5396 "MOV $dst.hi,$dst.lo\n\t" 5397 "SAR $dst.hi,7" %} 5398 5399 ins_encode %{ 5400 __ movsbl($dst$$Register, $mem$$Address); 5401 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5402 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5403 %} 5404 5405 ins_pipe(ialu_reg_mem); 5406 %} 5407 5408 // Load Unsigned Byte (8bit UNsigned) 5409 instruct loadUB(xRegI dst, memory mem) %{ 5410 match(Set dst (LoadUB mem)); 5411 5412 ins_cost(125); 5413 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5414 5415 ins_encode %{ 5416 __ movzbl($dst$$Register, $mem$$Address); 5417 %} 5418 5419 ins_pipe(ialu_reg_mem); 5420 %} 5421 5422 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5423 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5424 match(Set dst (ConvI2L (LoadUB mem))); 5425 effect(KILL cr); 5426 5427 ins_cost(250); 5428 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5429 "XOR $dst.hi,$dst.hi" %} 5430 5431 ins_encode %{ 5432 Register Rdst = $dst$$Register; 5433 __ movzbl(Rdst, $mem$$Address); 5434 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5435 %} 5436 5437 ins_pipe(ialu_reg_mem); 5438 %} 5439 5440 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5441 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5442 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5443 effect(KILL cr); 5444 5445 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5446 "XOR $dst.hi,$dst.hi\n\t" 5447 "AND $dst.lo,right_n_bits($mask, 8)" %} 5448 ins_encode %{ 5449 Register Rdst = $dst$$Register; 5450 __ movzbl(Rdst, $mem$$Address); 5451 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5452 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5453 %} 5454 ins_pipe(ialu_reg_mem); 5455 %} 5456 5457 // Load Short (16bit signed) 5458 instruct loadS(rRegI dst, memory mem) %{ 5459 match(Set dst (LoadS mem)); 5460 5461 ins_cost(125); 5462 format %{ "MOVSX $dst,$mem\t# short" %} 5463 5464 ins_encode %{ 5465 __ movswl($dst$$Register, $mem$$Address); 5466 %} 5467 5468 ins_pipe(ialu_reg_mem); 5469 %} 5470 5471 // Load Short (16 bit signed) to Byte (8 bit signed) 5472 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5473 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5474 5475 ins_cost(125); 5476 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5477 ins_encode %{ 5478 __ movsbl($dst$$Register, $mem$$Address); 5479 %} 5480 ins_pipe(ialu_reg_mem); 5481 %} 5482 5483 // Load Short (16bit signed) into Long Register 5484 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5485 match(Set dst (ConvI2L (LoadS mem))); 5486 effect(KILL cr); 5487 5488 ins_cost(375); 5489 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5490 "MOV $dst.hi,$dst.lo\n\t" 5491 "SAR $dst.hi,15" %} 5492 5493 ins_encode %{ 5494 __ movswl($dst$$Register, $mem$$Address); 5495 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5496 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5497 %} 5498 5499 ins_pipe(ialu_reg_mem); 5500 %} 5501 5502 // Load Unsigned Short/Char (16bit unsigned) 5503 instruct loadUS(rRegI dst, memory mem) %{ 5504 match(Set dst (LoadUS mem)); 5505 5506 ins_cost(125); 5507 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5508 5509 ins_encode %{ 5510 __ movzwl($dst$$Register, $mem$$Address); 5511 %} 5512 5513 ins_pipe(ialu_reg_mem); 5514 %} 5515 5516 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5517 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5518 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5519 5520 ins_cost(125); 5521 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5522 ins_encode %{ 5523 __ movsbl($dst$$Register, $mem$$Address); 5524 %} 5525 ins_pipe(ialu_reg_mem); 5526 %} 5527 5528 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5529 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5530 match(Set dst (ConvI2L (LoadUS mem))); 5531 effect(KILL cr); 5532 5533 ins_cost(250); 5534 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5535 "XOR $dst.hi,$dst.hi" %} 5536 5537 ins_encode %{ 5538 __ movzwl($dst$$Register, $mem$$Address); 5539 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5540 %} 5541 5542 ins_pipe(ialu_reg_mem); 5543 %} 5544 5545 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5546 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5547 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5548 effect(KILL cr); 5549 5550 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5551 "XOR $dst.hi,$dst.hi" %} 5552 ins_encode %{ 5553 Register Rdst = $dst$$Register; 5554 __ movzbl(Rdst, $mem$$Address); 5555 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5556 %} 5557 ins_pipe(ialu_reg_mem); 5558 %} 5559 5560 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5561 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5562 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5563 effect(KILL cr); 5564 5565 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5566 "XOR $dst.hi,$dst.hi\n\t" 5567 "AND $dst.lo,right_n_bits($mask, 16)" %} 5568 ins_encode %{ 5569 Register Rdst = $dst$$Register; 5570 __ movzwl(Rdst, $mem$$Address); 5571 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5572 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5573 %} 5574 ins_pipe(ialu_reg_mem); 5575 %} 5576 5577 // Load Integer 5578 instruct loadI(rRegI dst, memory mem) %{ 5579 match(Set dst (LoadI mem)); 5580 5581 ins_cost(125); 5582 format %{ "MOV $dst,$mem\t# int" %} 5583 5584 ins_encode %{ 5585 __ movl($dst$$Register, $mem$$Address); 5586 %} 5587 5588 ins_pipe(ialu_reg_mem); 5589 %} 5590 5591 // Load Integer (32 bit signed) to Byte (8 bit signed) 5592 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5593 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5594 5595 ins_cost(125); 5596 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5597 ins_encode %{ 5598 __ movsbl($dst$$Register, $mem$$Address); 5599 %} 5600 ins_pipe(ialu_reg_mem); 5601 %} 5602 5603 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5604 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5605 match(Set dst (AndI (LoadI mem) mask)); 5606 5607 ins_cost(125); 5608 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5609 ins_encode %{ 5610 __ movzbl($dst$$Register, $mem$$Address); 5611 %} 5612 ins_pipe(ialu_reg_mem); 5613 %} 5614 5615 // Load Integer (32 bit signed) to Short (16 bit signed) 5616 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5617 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5618 5619 ins_cost(125); 5620 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5621 ins_encode %{ 5622 __ movswl($dst$$Register, $mem$$Address); 5623 %} 5624 ins_pipe(ialu_reg_mem); 5625 %} 5626 5627 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5628 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5629 match(Set dst (AndI (LoadI mem) mask)); 5630 5631 ins_cost(125); 5632 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5633 ins_encode %{ 5634 __ movzwl($dst$$Register, $mem$$Address); 5635 %} 5636 ins_pipe(ialu_reg_mem); 5637 %} 5638 5639 // Load Integer into Long Register 5640 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5641 match(Set dst (ConvI2L (LoadI mem))); 5642 effect(KILL cr); 5643 5644 ins_cost(375); 5645 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5646 "MOV $dst.hi,$dst.lo\n\t" 5647 "SAR $dst.hi,31" %} 5648 5649 ins_encode %{ 5650 __ movl($dst$$Register, $mem$$Address); 5651 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5652 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5653 %} 5654 5655 ins_pipe(ialu_reg_mem); 5656 %} 5657 5658 // Load Integer with mask 0xFF into Long Register 5659 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5660 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5661 effect(KILL cr); 5662 5663 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5664 "XOR $dst.hi,$dst.hi" %} 5665 ins_encode %{ 5666 Register Rdst = $dst$$Register; 5667 __ movzbl(Rdst, $mem$$Address); 5668 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5669 %} 5670 ins_pipe(ialu_reg_mem); 5671 %} 5672 5673 // Load Integer with mask 0xFFFF into Long Register 5674 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5675 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5676 effect(KILL cr); 5677 5678 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5679 "XOR $dst.hi,$dst.hi" %} 5680 ins_encode %{ 5681 Register Rdst = $dst$$Register; 5682 __ movzwl(Rdst, $mem$$Address); 5683 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5684 %} 5685 ins_pipe(ialu_reg_mem); 5686 %} 5687 5688 // Load Integer with 31-bit mask into Long Register 5689 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5690 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5691 effect(KILL cr); 5692 5693 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5694 "XOR $dst.hi,$dst.hi\n\t" 5695 "AND $dst.lo,$mask" %} 5696 ins_encode %{ 5697 Register Rdst = $dst$$Register; 5698 __ movl(Rdst, $mem$$Address); 5699 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5700 __ andl(Rdst, $mask$$constant); 5701 %} 5702 ins_pipe(ialu_reg_mem); 5703 %} 5704 5705 // Load Unsigned Integer into Long Register 5706 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5707 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5708 effect(KILL cr); 5709 5710 ins_cost(250); 5711 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5712 "XOR $dst.hi,$dst.hi" %} 5713 5714 ins_encode %{ 5715 __ movl($dst$$Register, $mem$$Address); 5716 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5717 %} 5718 5719 ins_pipe(ialu_reg_mem); 5720 %} 5721 5722 // Load Long. Cannot clobber address while loading, so restrict address 5723 // register to ESI 5724 instruct loadL(eRegL dst, load_long_memory mem) %{ 5725 predicate(!((LoadLNode*)n)->require_atomic_access()); 5726 match(Set dst (LoadL mem)); 5727 5728 ins_cost(250); 5729 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5730 "MOV $dst.hi,$mem+4" %} 5731 5732 ins_encode %{ 5733 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5734 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5735 __ movl($dst$$Register, Amemlo); 5736 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5737 %} 5738 5739 ins_pipe(ialu_reg_long_mem); 5740 %} 5741 5742 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5743 // then store it down to the stack and reload on the int 5744 // side. 5745 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5746 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5747 match(Set dst (LoadL mem)); 5748 5749 ins_cost(200); 5750 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5751 "FISTp $dst" %} 5752 ins_encode(enc_loadL_volatile(mem,dst)); 5753 ins_pipe( fpu_reg_mem ); 5754 %} 5755 5756 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5757 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5758 match(Set dst (LoadL mem)); 5759 effect(TEMP tmp); 5760 ins_cost(180); 5761 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5762 "MOVSD $dst,$tmp" %} 5763 ins_encode %{ 5764 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5765 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5766 %} 5767 ins_pipe( pipe_slow ); 5768 %} 5769 5770 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5771 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5772 match(Set dst (LoadL mem)); 5773 effect(TEMP tmp); 5774 ins_cost(160); 5775 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5776 "MOVD $dst.lo,$tmp\n\t" 5777 "PSRLQ $tmp,32\n\t" 5778 "MOVD $dst.hi,$tmp" %} 5779 ins_encode %{ 5780 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5781 __ movdl($dst$$Register, $tmp$$XMMRegister); 5782 __ psrlq($tmp$$XMMRegister, 32); 5783 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5784 %} 5785 ins_pipe( pipe_slow ); 5786 %} 5787 5788 // Load Range 5789 instruct loadRange(rRegI dst, memory mem) %{ 5790 match(Set dst (LoadRange mem)); 5791 5792 ins_cost(125); 5793 format %{ "MOV $dst,$mem" %} 5794 opcode(0x8B); 5795 ins_encode( OpcP, RegMem(dst,mem)); 5796 ins_pipe( ialu_reg_mem ); 5797 %} 5798 5799 5800 // Load Pointer 5801 instruct loadP(eRegP dst, memory mem) %{ 5802 match(Set dst (LoadP mem)); 5803 5804 ins_cost(125); 5805 format %{ "MOV $dst,$mem" %} 5806 opcode(0x8B); 5807 ins_encode( OpcP, RegMem(dst,mem)); 5808 ins_pipe( ialu_reg_mem ); 5809 %} 5810 5811 // Load Klass Pointer 5812 instruct loadKlass(eRegP dst, memory mem) %{ 5813 match(Set dst (LoadKlass mem)); 5814 5815 ins_cost(125); 5816 format %{ "MOV $dst,$mem" %} 5817 opcode(0x8B); 5818 ins_encode( OpcP, RegMem(dst,mem)); 5819 ins_pipe( ialu_reg_mem ); 5820 %} 5821 5822 // Load Double 5823 instruct loadDPR(regDPR dst, memory mem) %{ 5824 predicate(UseSSE<=1); 5825 match(Set dst (LoadD mem)); 5826 5827 ins_cost(150); 5828 format %{ "FLD_D ST,$mem\n\t" 5829 "FSTP $dst" %} 5830 opcode(0xDD); /* DD /0 */ 5831 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5832 Pop_Reg_DPR(dst) ); 5833 ins_pipe( fpu_reg_mem ); 5834 %} 5835 5836 // Load Double to XMM 5837 instruct loadD(regD dst, memory mem) %{ 5838 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5839 match(Set dst (LoadD mem)); 5840 ins_cost(145); 5841 format %{ "MOVSD $dst,$mem" %} 5842 ins_encode %{ 5843 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5844 %} 5845 ins_pipe( pipe_slow ); 5846 %} 5847 5848 instruct loadD_partial(regD dst, memory mem) %{ 5849 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5850 match(Set dst (LoadD mem)); 5851 ins_cost(145); 5852 format %{ "MOVLPD $dst,$mem" %} 5853 ins_encode %{ 5854 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5855 %} 5856 ins_pipe( pipe_slow ); 5857 %} 5858 5859 // Load to XMM register (single-precision floating point) 5860 // MOVSS instruction 5861 instruct loadF(regF dst, memory mem) %{ 5862 predicate(UseSSE>=1); 5863 match(Set dst (LoadF mem)); 5864 ins_cost(145); 5865 format %{ "MOVSS $dst,$mem" %} 5866 ins_encode %{ 5867 __ movflt ($dst$$XMMRegister, $mem$$Address); 5868 %} 5869 ins_pipe( pipe_slow ); 5870 %} 5871 5872 // Load Float 5873 instruct loadFPR(regFPR dst, memory mem) %{ 5874 predicate(UseSSE==0); 5875 match(Set dst (LoadF mem)); 5876 5877 ins_cost(150); 5878 format %{ "FLD_S ST,$mem\n\t" 5879 "FSTP $dst" %} 5880 opcode(0xD9); /* D9 /0 */ 5881 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5882 Pop_Reg_FPR(dst) ); 5883 ins_pipe( fpu_reg_mem ); 5884 %} 5885 5886 // Load Effective Address 5887 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5888 match(Set dst mem); 5889 5890 ins_cost(110); 5891 format %{ "LEA $dst,$mem" %} 5892 opcode(0x8D); 5893 ins_encode( OpcP, RegMem(dst,mem)); 5894 ins_pipe( ialu_reg_reg_fat ); 5895 %} 5896 5897 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5898 match(Set dst mem); 5899 5900 ins_cost(110); 5901 format %{ "LEA $dst,$mem" %} 5902 opcode(0x8D); 5903 ins_encode( OpcP, RegMem(dst,mem)); 5904 ins_pipe( ialu_reg_reg_fat ); 5905 %} 5906 5907 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5908 match(Set dst mem); 5909 5910 ins_cost(110); 5911 format %{ "LEA $dst,$mem" %} 5912 opcode(0x8D); 5913 ins_encode( OpcP, RegMem(dst,mem)); 5914 ins_pipe( ialu_reg_reg_fat ); 5915 %} 5916 5917 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5918 match(Set dst mem); 5919 5920 ins_cost(110); 5921 format %{ "LEA $dst,$mem" %} 5922 opcode(0x8D); 5923 ins_encode( OpcP, RegMem(dst,mem)); 5924 ins_pipe( ialu_reg_reg_fat ); 5925 %} 5926 5927 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5928 match(Set dst mem); 5929 5930 ins_cost(110); 5931 format %{ "LEA $dst,$mem" %} 5932 opcode(0x8D); 5933 ins_encode( OpcP, RegMem(dst,mem)); 5934 ins_pipe( ialu_reg_reg_fat ); 5935 %} 5936 5937 // Load Constant 5938 instruct loadConI(rRegI dst, immI src) %{ 5939 match(Set dst src); 5940 5941 format %{ "MOV $dst,$src" %} 5942 ins_encode( LdImmI(dst, src) ); 5943 ins_pipe( ialu_reg_fat ); 5944 %} 5945 5946 // Load Constant zero 5947 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5948 match(Set dst src); 5949 effect(KILL cr); 5950 5951 ins_cost(50); 5952 format %{ "XOR $dst,$dst" %} 5953 opcode(0x33); /* + rd */ 5954 ins_encode( OpcP, RegReg( dst, dst ) ); 5955 ins_pipe( ialu_reg ); 5956 %} 5957 5958 instruct loadConP(eRegP dst, immP src) %{ 5959 match(Set dst src); 5960 5961 format %{ "MOV $dst,$src" %} 5962 opcode(0xB8); /* + rd */ 5963 ins_encode( LdImmP(dst, src) ); 5964 ins_pipe( ialu_reg_fat ); 5965 %} 5966 5967 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5968 match(Set dst src); 5969 effect(KILL cr); 5970 ins_cost(200); 5971 format %{ "MOV $dst.lo,$src.lo\n\t" 5972 "MOV $dst.hi,$src.hi" %} 5973 opcode(0xB8); 5974 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5975 ins_pipe( ialu_reg_long_fat ); 5976 %} 5977 5978 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5979 match(Set dst src); 5980 effect(KILL cr); 5981 ins_cost(150); 5982 format %{ "XOR $dst.lo,$dst.lo\n\t" 5983 "XOR $dst.hi,$dst.hi" %} 5984 opcode(0x33,0x33); 5985 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5986 ins_pipe( ialu_reg_long ); 5987 %} 5988 5989 // The instruction usage is guarded by predicate in operand immFPR(). 5990 instruct loadConFPR(regFPR dst, immFPR con) %{ 5991 match(Set dst con); 5992 ins_cost(125); 5993 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5994 "FSTP $dst" %} 5995 ins_encode %{ 5996 __ fld_s($constantaddress($con)); 5997 __ fstp_d($dst$$reg); 5998 %} 5999 ins_pipe(fpu_reg_con); 6000 %} 6001 6002 // The instruction usage is guarded by predicate in operand immFPR0(). 6003 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6004 match(Set dst con); 6005 ins_cost(125); 6006 format %{ "FLDZ ST\n\t" 6007 "FSTP $dst" %} 6008 ins_encode %{ 6009 __ fldz(); 6010 __ fstp_d($dst$$reg); 6011 %} 6012 ins_pipe(fpu_reg_con); 6013 %} 6014 6015 // The instruction usage is guarded by predicate in operand immFPR1(). 6016 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6017 match(Set dst con); 6018 ins_cost(125); 6019 format %{ "FLD1 ST\n\t" 6020 "FSTP $dst" %} 6021 ins_encode %{ 6022 __ fld1(); 6023 __ fstp_d($dst$$reg); 6024 %} 6025 ins_pipe(fpu_reg_con); 6026 %} 6027 6028 // The instruction usage is guarded by predicate in operand immF(). 6029 instruct loadConF(regF dst, immF con) %{ 6030 match(Set dst con); 6031 ins_cost(125); 6032 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6033 ins_encode %{ 6034 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6035 %} 6036 ins_pipe(pipe_slow); 6037 %} 6038 6039 // The instruction usage is guarded by predicate in operand immF0(). 6040 instruct loadConF0(regF dst, immF0 src) %{ 6041 match(Set dst src); 6042 ins_cost(100); 6043 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6044 ins_encode %{ 6045 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6046 %} 6047 ins_pipe(pipe_slow); 6048 %} 6049 6050 // The instruction usage is guarded by predicate in operand immDPR(). 6051 instruct loadConDPR(regDPR dst, immDPR con) %{ 6052 match(Set dst con); 6053 ins_cost(125); 6054 6055 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6056 "FSTP $dst" %} 6057 ins_encode %{ 6058 __ fld_d($constantaddress($con)); 6059 __ fstp_d($dst$$reg); 6060 %} 6061 ins_pipe(fpu_reg_con); 6062 %} 6063 6064 // The instruction usage is guarded by predicate in operand immDPR0(). 6065 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6066 match(Set dst con); 6067 ins_cost(125); 6068 6069 format %{ "FLDZ ST\n\t" 6070 "FSTP $dst" %} 6071 ins_encode %{ 6072 __ fldz(); 6073 __ fstp_d($dst$$reg); 6074 %} 6075 ins_pipe(fpu_reg_con); 6076 %} 6077 6078 // The instruction usage is guarded by predicate in operand immDPR1(). 6079 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6080 match(Set dst con); 6081 ins_cost(125); 6082 6083 format %{ "FLD1 ST\n\t" 6084 "FSTP $dst" %} 6085 ins_encode %{ 6086 __ fld1(); 6087 __ fstp_d($dst$$reg); 6088 %} 6089 ins_pipe(fpu_reg_con); 6090 %} 6091 6092 // The instruction usage is guarded by predicate in operand immD(). 6093 instruct loadConD(regD dst, immD con) %{ 6094 match(Set dst con); 6095 ins_cost(125); 6096 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6097 ins_encode %{ 6098 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6099 %} 6100 ins_pipe(pipe_slow); 6101 %} 6102 6103 // The instruction usage is guarded by predicate in operand immD0(). 6104 instruct loadConD0(regD dst, immD0 src) %{ 6105 match(Set dst src); 6106 ins_cost(100); 6107 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6108 ins_encode %{ 6109 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6110 %} 6111 ins_pipe( pipe_slow ); 6112 %} 6113 6114 // Load Stack Slot 6115 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6116 match(Set dst src); 6117 ins_cost(125); 6118 6119 format %{ "MOV $dst,$src" %} 6120 opcode(0x8B); 6121 ins_encode( OpcP, RegMem(dst,src)); 6122 ins_pipe( ialu_reg_mem ); 6123 %} 6124 6125 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6126 match(Set dst src); 6127 6128 ins_cost(200); 6129 format %{ "MOV $dst,$src.lo\n\t" 6130 "MOV $dst+4,$src.hi" %} 6131 opcode(0x8B, 0x8B); 6132 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6133 ins_pipe( ialu_mem_long_reg ); 6134 %} 6135 6136 // Load Stack Slot 6137 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6138 match(Set dst src); 6139 ins_cost(125); 6140 6141 format %{ "MOV $dst,$src" %} 6142 opcode(0x8B); 6143 ins_encode( OpcP, RegMem(dst,src)); 6144 ins_pipe( ialu_reg_mem ); 6145 %} 6146 6147 // Load Stack Slot 6148 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6149 match(Set dst src); 6150 ins_cost(125); 6151 6152 format %{ "FLD_S $src\n\t" 6153 "FSTP $dst" %} 6154 opcode(0xD9); /* D9 /0, FLD m32real */ 6155 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6156 Pop_Reg_FPR(dst) ); 6157 ins_pipe( fpu_reg_mem ); 6158 %} 6159 6160 // Load Stack Slot 6161 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6162 match(Set dst src); 6163 ins_cost(125); 6164 6165 format %{ "FLD_D $src\n\t" 6166 "FSTP $dst" %} 6167 opcode(0xDD); /* DD /0, FLD m64real */ 6168 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6169 Pop_Reg_DPR(dst) ); 6170 ins_pipe( fpu_reg_mem ); 6171 %} 6172 6173 // Prefetch instructions for allocation. 6174 // Must be safe to execute with invalid address (cannot fault). 6175 6176 instruct prefetchAlloc0( memory mem ) %{ 6177 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6178 match(PrefetchAllocation mem); 6179 ins_cost(0); 6180 size(0); 6181 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6182 ins_encode(); 6183 ins_pipe(empty); 6184 %} 6185 6186 instruct prefetchAlloc( memory mem ) %{ 6187 predicate(AllocatePrefetchInstr==3); 6188 match( PrefetchAllocation mem ); 6189 ins_cost(100); 6190 6191 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6192 ins_encode %{ 6193 __ prefetchw($mem$$Address); 6194 %} 6195 ins_pipe(ialu_mem); 6196 %} 6197 6198 instruct prefetchAllocNTA( memory mem ) %{ 6199 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6200 match(PrefetchAllocation mem); 6201 ins_cost(100); 6202 6203 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6204 ins_encode %{ 6205 __ prefetchnta($mem$$Address); 6206 %} 6207 ins_pipe(ialu_mem); 6208 %} 6209 6210 instruct prefetchAllocT0( memory mem ) %{ 6211 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6212 match(PrefetchAllocation mem); 6213 ins_cost(100); 6214 6215 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6216 ins_encode %{ 6217 __ prefetcht0($mem$$Address); 6218 %} 6219 ins_pipe(ialu_mem); 6220 %} 6221 6222 instruct prefetchAllocT2( memory mem ) %{ 6223 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6224 match(PrefetchAllocation mem); 6225 ins_cost(100); 6226 6227 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6228 ins_encode %{ 6229 __ prefetcht2($mem$$Address); 6230 %} 6231 ins_pipe(ialu_mem); 6232 %} 6233 6234 //----------Store Instructions------------------------------------------------- 6235 6236 // Store Byte 6237 instruct storeB(memory mem, xRegI src) %{ 6238 match(Set mem (StoreB mem src)); 6239 6240 ins_cost(125); 6241 format %{ "MOV8 $mem,$src" %} 6242 opcode(0x88); 6243 ins_encode( OpcP, RegMem( src, mem ) ); 6244 ins_pipe( ialu_mem_reg ); 6245 %} 6246 6247 // Store Char/Short 6248 instruct storeC(memory mem, rRegI src) %{ 6249 match(Set mem (StoreC mem src)); 6250 6251 ins_cost(125); 6252 format %{ "MOV16 $mem,$src" %} 6253 opcode(0x89, 0x66); 6254 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6255 ins_pipe( ialu_mem_reg ); 6256 %} 6257 6258 // Store Integer 6259 instruct storeI(memory mem, rRegI src) %{ 6260 match(Set mem (StoreI mem src)); 6261 6262 ins_cost(125); 6263 format %{ "MOV $mem,$src" %} 6264 opcode(0x89); 6265 ins_encode( OpcP, RegMem( src, mem ) ); 6266 ins_pipe( ialu_mem_reg ); 6267 %} 6268 6269 // Store Long 6270 instruct storeL(long_memory mem, eRegL src) %{ 6271 predicate(!((StoreLNode*)n)->require_atomic_access()); 6272 match(Set mem (StoreL mem src)); 6273 6274 ins_cost(200); 6275 format %{ "MOV $mem,$src.lo\n\t" 6276 "MOV $mem+4,$src.hi" %} 6277 opcode(0x89, 0x89); 6278 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6279 ins_pipe( ialu_mem_long_reg ); 6280 %} 6281 6282 // Store Long to Integer 6283 instruct storeL2I(memory mem, eRegL src) %{ 6284 match(Set mem (StoreI mem (ConvL2I src))); 6285 6286 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6287 ins_encode %{ 6288 __ movl($mem$$Address, $src$$Register); 6289 %} 6290 ins_pipe(ialu_mem_reg); 6291 %} 6292 6293 // Volatile Store Long. Must be atomic, so move it into 6294 // the FP TOS and then do a 64-bit FIST. Has to probe the 6295 // target address before the store (for null-ptr checks) 6296 // so the memory operand is used twice in the encoding. 6297 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6298 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6299 match(Set mem (StoreL mem src)); 6300 effect( KILL cr ); 6301 ins_cost(400); 6302 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6303 "FILD $src\n\t" 6304 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6305 opcode(0x3B); 6306 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6307 ins_pipe( fpu_reg_mem ); 6308 %} 6309 6310 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6311 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6312 match(Set mem (StoreL mem src)); 6313 effect( TEMP tmp, KILL cr ); 6314 ins_cost(380); 6315 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6316 "MOVSD $tmp,$src\n\t" 6317 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6318 ins_encode %{ 6319 __ cmpl(rax, $mem$$Address); 6320 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6321 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6322 %} 6323 ins_pipe( pipe_slow ); 6324 %} 6325 6326 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6327 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6328 match(Set mem (StoreL mem src)); 6329 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6330 ins_cost(360); 6331 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6332 "MOVD $tmp,$src.lo\n\t" 6333 "MOVD $tmp2,$src.hi\n\t" 6334 "PUNPCKLDQ $tmp,$tmp2\n\t" 6335 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6336 ins_encode %{ 6337 __ cmpl(rax, $mem$$Address); 6338 __ movdl($tmp$$XMMRegister, $src$$Register); 6339 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6340 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6341 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6342 %} 6343 ins_pipe( pipe_slow ); 6344 %} 6345 6346 // Store Pointer; for storing unknown oops and raw pointers 6347 instruct storeP(memory mem, anyRegP src) %{ 6348 match(Set mem (StoreP mem src)); 6349 6350 ins_cost(125); 6351 format %{ "MOV $mem,$src" %} 6352 opcode(0x89); 6353 ins_encode( OpcP, RegMem( src, mem ) ); 6354 ins_pipe( ialu_mem_reg ); 6355 %} 6356 6357 // Store Integer Immediate 6358 instruct storeImmI(memory mem, immI src) %{ 6359 match(Set mem (StoreI mem src)); 6360 6361 ins_cost(150); 6362 format %{ "MOV $mem,$src" %} 6363 opcode(0xC7); /* C7 /0 */ 6364 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6365 ins_pipe( ialu_mem_imm ); 6366 %} 6367 6368 // Store Short/Char Immediate 6369 instruct storeImmI16(memory mem, immI16 src) %{ 6370 predicate(UseStoreImmI16); 6371 match(Set mem (StoreC mem src)); 6372 6373 ins_cost(150); 6374 format %{ "MOV16 $mem,$src" %} 6375 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6376 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6377 ins_pipe( ialu_mem_imm ); 6378 %} 6379 6380 // Store Pointer Immediate; null pointers or constant oops that do not 6381 // need card-mark barriers. 6382 instruct storeImmP(memory mem, immP src) %{ 6383 match(Set mem (StoreP mem src)); 6384 6385 ins_cost(150); 6386 format %{ "MOV $mem,$src" %} 6387 opcode(0xC7); /* C7 /0 */ 6388 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6389 ins_pipe( ialu_mem_imm ); 6390 %} 6391 6392 // Store Byte Immediate 6393 instruct storeImmB(memory mem, immI8 src) %{ 6394 match(Set mem (StoreB mem src)); 6395 6396 ins_cost(150); 6397 format %{ "MOV8 $mem,$src" %} 6398 opcode(0xC6); /* C6 /0 */ 6399 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6400 ins_pipe( ialu_mem_imm ); 6401 %} 6402 6403 // Store CMS card-mark Immediate 6404 instruct storeImmCM(memory mem, immI8 src) %{ 6405 match(Set mem (StoreCM mem src)); 6406 6407 ins_cost(150); 6408 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6409 opcode(0xC6); /* C6 /0 */ 6410 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6411 ins_pipe( ialu_mem_imm ); 6412 %} 6413 6414 // Store Double 6415 instruct storeDPR( memory mem, regDPR1 src) %{ 6416 predicate(UseSSE<=1); 6417 match(Set mem (StoreD mem src)); 6418 6419 ins_cost(100); 6420 format %{ "FST_D $mem,$src" %} 6421 opcode(0xDD); /* DD /2 */ 6422 ins_encode( enc_FPR_store(mem,src) ); 6423 ins_pipe( fpu_mem_reg ); 6424 %} 6425 6426 // Store double does rounding on x86 6427 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6428 predicate(UseSSE<=1); 6429 match(Set mem (StoreD mem (RoundDouble src))); 6430 6431 ins_cost(100); 6432 format %{ "FST_D $mem,$src\t# round" %} 6433 opcode(0xDD); /* DD /2 */ 6434 ins_encode( enc_FPR_store(mem,src) ); 6435 ins_pipe( fpu_mem_reg ); 6436 %} 6437 6438 // Store XMM register to memory (double-precision floating points) 6439 // MOVSD instruction 6440 instruct storeD(memory mem, regD src) %{ 6441 predicate(UseSSE>=2); 6442 match(Set mem (StoreD mem src)); 6443 ins_cost(95); 6444 format %{ "MOVSD $mem,$src" %} 6445 ins_encode %{ 6446 __ movdbl($mem$$Address, $src$$XMMRegister); 6447 %} 6448 ins_pipe( pipe_slow ); 6449 %} 6450 6451 // Store XMM register to memory (single-precision floating point) 6452 // MOVSS instruction 6453 instruct storeF(memory mem, regF src) %{ 6454 predicate(UseSSE>=1); 6455 match(Set mem (StoreF mem src)); 6456 ins_cost(95); 6457 format %{ "MOVSS $mem,$src" %} 6458 ins_encode %{ 6459 __ movflt($mem$$Address, $src$$XMMRegister); 6460 %} 6461 ins_pipe( pipe_slow ); 6462 %} 6463 6464 // Store Float 6465 instruct storeFPR( memory mem, regFPR1 src) %{ 6466 predicate(UseSSE==0); 6467 match(Set mem (StoreF mem src)); 6468 6469 ins_cost(100); 6470 format %{ "FST_S $mem,$src" %} 6471 opcode(0xD9); /* D9 /2 */ 6472 ins_encode( enc_FPR_store(mem,src) ); 6473 ins_pipe( fpu_mem_reg ); 6474 %} 6475 6476 // Store Float does rounding on x86 6477 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6478 predicate(UseSSE==0); 6479 match(Set mem (StoreF mem (RoundFloat src))); 6480 6481 ins_cost(100); 6482 format %{ "FST_S $mem,$src\t# round" %} 6483 opcode(0xD9); /* D9 /2 */ 6484 ins_encode( enc_FPR_store(mem,src) ); 6485 ins_pipe( fpu_mem_reg ); 6486 %} 6487 6488 // Store Float does rounding on x86 6489 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6490 predicate(UseSSE<=1); 6491 match(Set mem (StoreF mem (ConvD2F src))); 6492 6493 ins_cost(100); 6494 format %{ "FST_S $mem,$src\t# D-round" %} 6495 opcode(0xD9); /* D9 /2 */ 6496 ins_encode( enc_FPR_store(mem,src) ); 6497 ins_pipe( fpu_mem_reg ); 6498 %} 6499 6500 // Store immediate Float value (it is faster than store from FPU register) 6501 // The instruction usage is guarded by predicate in operand immFPR(). 6502 instruct storeFPR_imm( memory mem, immFPR src) %{ 6503 match(Set mem (StoreF mem src)); 6504 6505 ins_cost(50); 6506 format %{ "MOV $mem,$src\t# store float" %} 6507 opcode(0xC7); /* C7 /0 */ 6508 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6509 ins_pipe( ialu_mem_imm ); 6510 %} 6511 6512 // Store immediate Float value (it is faster than store from XMM register) 6513 // The instruction usage is guarded by predicate in operand immF(). 6514 instruct storeF_imm( memory mem, immF src) %{ 6515 match(Set mem (StoreF mem src)); 6516 6517 ins_cost(50); 6518 format %{ "MOV $mem,$src\t# store float" %} 6519 opcode(0xC7); /* C7 /0 */ 6520 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6521 ins_pipe( ialu_mem_imm ); 6522 %} 6523 6524 // Store Integer to stack slot 6525 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6526 match(Set dst src); 6527 6528 ins_cost(100); 6529 format %{ "MOV $dst,$src" %} 6530 opcode(0x89); 6531 ins_encode( OpcPRegSS( dst, src ) ); 6532 ins_pipe( ialu_mem_reg ); 6533 %} 6534 6535 // Store Integer to stack slot 6536 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6537 match(Set dst src); 6538 6539 ins_cost(100); 6540 format %{ "MOV $dst,$src" %} 6541 opcode(0x89); 6542 ins_encode( OpcPRegSS( dst, src ) ); 6543 ins_pipe( ialu_mem_reg ); 6544 %} 6545 6546 // Store Long to stack slot 6547 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6548 match(Set dst src); 6549 6550 ins_cost(200); 6551 format %{ "MOV $dst,$src.lo\n\t" 6552 "MOV $dst+4,$src.hi" %} 6553 opcode(0x89, 0x89); 6554 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6555 ins_pipe( ialu_mem_long_reg ); 6556 %} 6557 6558 //----------MemBar Instructions----------------------------------------------- 6559 // Memory barrier flavors 6560 6561 instruct membar_acquire() %{ 6562 match(MemBarAcquire); 6563 match(LoadFence); 6564 ins_cost(400); 6565 6566 size(0); 6567 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6568 ins_encode(); 6569 ins_pipe(empty); 6570 %} 6571 6572 instruct membar_acquire_lock() %{ 6573 match(MemBarAcquireLock); 6574 ins_cost(0); 6575 6576 size(0); 6577 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6578 ins_encode( ); 6579 ins_pipe(empty); 6580 %} 6581 6582 instruct membar_release() %{ 6583 match(MemBarRelease); 6584 match(StoreFence); 6585 ins_cost(400); 6586 6587 size(0); 6588 format %{ "MEMBAR-release ! (empty encoding)" %} 6589 ins_encode( ); 6590 ins_pipe(empty); 6591 %} 6592 6593 instruct membar_release_lock() %{ 6594 match(MemBarReleaseLock); 6595 ins_cost(0); 6596 6597 size(0); 6598 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6599 ins_encode( ); 6600 ins_pipe(empty); 6601 %} 6602 6603 instruct membar_volatile(eFlagsReg cr) %{ 6604 match(MemBarVolatile); 6605 effect(KILL cr); 6606 ins_cost(400); 6607 6608 format %{ 6609 $$template 6610 if (os::is_MP()) { 6611 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6612 } else { 6613 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6614 } 6615 %} 6616 ins_encode %{ 6617 __ membar(Assembler::StoreLoad); 6618 %} 6619 ins_pipe(pipe_slow); 6620 %} 6621 6622 instruct unnecessary_membar_volatile() %{ 6623 match(MemBarVolatile); 6624 predicate(Matcher::post_store_load_barrier(n)); 6625 ins_cost(0); 6626 6627 size(0); 6628 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6629 ins_encode( ); 6630 ins_pipe(empty); 6631 %} 6632 6633 instruct membar_storestore() %{ 6634 match(MemBarStoreStore); 6635 ins_cost(0); 6636 6637 size(0); 6638 format %{ "MEMBAR-storestore (empty encoding)" %} 6639 ins_encode( ); 6640 ins_pipe(empty); 6641 %} 6642 6643 //----------Move Instructions-------------------------------------------------- 6644 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6645 match(Set dst (CastX2P src)); 6646 format %{ "# X2P $dst, $src" %} 6647 ins_encode( /*empty encoding*/ ); 6648 ins_cost(0); 6649 ins_pipe(empty); 6650 %} 6651 6652 instruct castP2X(rRegI dst, eRegP src ) %{ 6653 match(Set dst (CastP2X src)); 6654 ins_cost(50); 6655 format %{ "MOV $dst, $src\t# CastP2X" %} 6656 ins_encode( enc_Copy( dst, src) ); 6657 ins_pipe( ialu_reg_reg ); 6658 %} 6659 6660 //----------Conditional Move--------------------------------------------------- 6661 // Conditional move 6662 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6663 predicate(!VM_Version::supports_cmov() ); 6664 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6665 ins_cost(200); 6666 format %{ "J$cop,us skip\t# signed cmove\n\t" 6667 "MOV $dst,$src\n" 6668 "skip:" %} 6669 ins_encode %{ 6670 Label Lskip; 6671 // Invert sense of branch from sense of CMOV 6672 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6673 __ movl($dst$$Register, $src$$Register); 6674 __ bind(Lskip); 6675 %} 6676 ins_pipe( pipe_cmov_reg ); 6677 %} 6678 6679 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6680 predicate(!VM_Version::supports_cmov() ); 6681 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6682 ins_cost(200); 6683 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6684 "MOV $dst,$src\n" 6685 "skip:" %} 6686 ins_encode %{ 6687 Label Lskip; 6688 // Invert sense of branch from sense of CMOV 6689 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6690 __ movl($dst$$Register, $src$$Register); 6691 __ bind(Lskip); 6692 %} 6693 ins_pipe( pipe_cmov_reg ); 6694 %} 6695 6696 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6697 predicate(VM_Version::supports_cmov() ); 6698 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6699 ins_cost(200); 6700 format %{ "CMOV$cop $dst,$src" %} 6701 opcode(0x0F,0x40); 6702 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6703 ins_pipe( pipe_cmov_reg ); 6704 %} 6705 6706 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6707 predicate(VM_Version::supports_cmov() ); 6708 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6709 ins_cost(200); 6710 format %{ "CMOV$cop $dst,$src" %} 6711 opcode(0x0F,0x40); 6712 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6713 ins_pipe( pipe_cmov_reg ); 6714 %} 6715 6716 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6717 predicate(VM_Version::supports_cmov() ); 6718 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6719 ins_cost(200); 6720 expand %{ 6721 cmovI_regU(cop, cr, dst, src); 6722 %} 6723 %} 6724 6725 // Conditional move 6726 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6727 predicate(VM_Version::supports_cmov() ); 6728 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6729 ins_cost(250); 6730 format %{ "CMOV$cop $dst,$src" %} 6731 opcode(0x0F,0x40); 6732 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6733 ins_pipe( pipe_cmov_mem ); 6734 %} 6735 6736 // Conditional move 6737 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6738 predicate(VM_Version::supports_cmov() ); 6739 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6740 ins_cost(250); 6741 format %{ "CMOV$cop $dst,$src" %} 6742 opcode(0x0F,0x40); 6743 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6744 ins_pipe( pipe_cmov_mem ); 6745 %} 6746 6747 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6748 predicate(VM_Version::supports_cmov() ); 6749 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6750 ins_cost(250); 6751 expand %{ 6752 cmovI_memU(cop, cr, dst, src); 6753 %} 6754 %} 6755 6756 // Conditional move 6757 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6758 predicate(VM_Version::supports_cmov() ); 6759 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6760 ins_cost(200); 6761 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6762 opcode(0x0F,0x40); 6763 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6764 ins_pipe( pipe_cmov_reg ); 6765 %} 6766 6767 // Conditional move (non-P6 version) 6768 // Note: a CMoveP is generated for stubs and native wrappers 6769 // regardless of whether we are on a P6, so we 6770 // emulate a cmov here 6771 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6772 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6773 ins_cost(300); 6774 format %{ "Jn$cop skip\n\t" 6775 "MOV $dst,$src\t# pointer\n" 6776 "skip:" %} 6777 opcode(0x8b); 6778 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6779 ins_pipe( pipe_cmov_reg ); 6780 %} 6781 6782 // Conditional move 6783 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6784 predicate(VM_Version::supports_cmov() ); 6785 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6786 ins_cost(200); 6787 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6788 opcode(0x0F,0x40); 6789 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6790 ins_pipe( pipe_cmov_reg ); 6791 %} 6792 6793 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6794 predicate(VM_Version::supports_cmov() ); 6795 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6796 ins_cost(200); 6797 expand %{ 6798 cmovP_regU(cop, cr, dst, src); 6799 %} 6800 %} 6801 6802 // DISABLED: Requires the ADLC to emit a bottom_type call that 6803 // correctly meets the two pointer arguments; one is an incoming 6804 // register but the other is a memory operand. ALSO appears to 6805 // be buggy with implicit null checks. 6806 // 6807 //// Conditional move 6808 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6809 // predicate(VM_Version::supports_cmov() ); 6810 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6811 // ins_cost(250); 6812 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6813 // opcode(0x0F,0x40); 6814 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6815 // ins_pipe( pipe_cmov_mem ); 6816 //%} 6817 // 6818 //// Conditional move 6819 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6820 // predicate(VM_Version::supports_cmov() ); 6821 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6822 // ins_cost(250); 6823 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6824 // opcode(0x0F,0x40); 6825 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6826 // ins_pipe( pipe_cmov_mem ); 6827 //%} 6828 6829 // Conditional move 6830 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6831 predicate(UseSSE<=1); 6832 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6833 ins_cost(200); 6834 format %{ "FCMOV$cop $dst,$src\t# double" %} 6835 opcode(0xDA); 6836 ins_encode( enc_cmov_dpr(cop,src) ); 6837 ins_pipe( pipe_cmovDPR_reg ); 6838 %} 6839 6840 // Conditional move 6841 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6842 predicate(UseSSE==0); 6843 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6844 ins_cost(200); 6845 format %{ "FCMOV$cop $dst,$src\t# float" %} 6846 opcode(0xDA); 6847 ins_encode( enc_cmov_dpr(cop,src) ); 6848 ins_pipe( pipe_cmovDPR_reg ); 6849 %} 6850 6851 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6852 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6853 predicate(UseSSE<=1); 6854 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6855 ins_cost(200); 6856 format %{ "Jn$cop skip\n\t" 6857 "MOV $dst,$src\t# double\n" 6858 "skip:" %} 6859 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6860 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6861 ins_pipe( pipe_cmovDPR_reg ); 6862 %} 6863 6864 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6865 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6866 predicate(UseSSE==0); 6867 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6868 ins_cost(200); 6869 format %{ "Jn$cop skip\n\t" 6870 "MOV $dst,$src\t# float\n" 6871 "skip:" %} 6872 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6873 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6874 ins_pipe( pipe_cmovDPR_reg ); 6875 %} 6876 6877 // No CMOVE with SSE/SSE2 6878 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6879 predicate (UseSSE>=1); 6880 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6881 ins_cost(200); 6882 format %{ "Jn$cop skip\n\t" 6883 "MOVSS $dst,$src\t# float\n" 6884 "skip:" %} 6885 ins_encode %{ 6886 Label skip; 6887 // Invert sense of branch from sense of CMOV 6888 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6889 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6890 __ bind(skip); 6891 %} 6892 ins_pipe( pipe_slow ); 6893 %} 6894 6895 // No CMOVE with SSE/SSE2 6896 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6897 predicate (UseSSE>=2); 6898 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6899 ins_cost(200); 6900 format %{ "Jn$cop skip\n\t" 6901 "MOVSD $dst,$src\t# float\n" 6902 "skip:" %} 6903 ins_encode %{ 6904 Label skip; 6905 // Invert sense of branch from sense of CMOV 6906 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6907 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6908 __ bind(skip); 6909 %} 6910 ins_pipe( pipe_slow ); 6911 %} 6912 6913 // unsigned version 6914 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6915 predicate (UseSSE>=1); 6916 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6917 ins_cost(200); 6918 format %{ "Jn$cop skip\n\t" 6919 "MOVSS $dst,$src\t# float\n" 6920 "skip:" %} 6921 ins_encode %{ 6922 Label skip; 6923 // Invert sense of branch from sense of CMOV 6924 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6925 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6926 __ bind(skip); 6927 %} 6928 ins_pipe( pipe_slow ); 6929 %} 6930 6931 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6932 predicate (UseSSE>=1); 6933 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6934 ins_cost(200); 6935 expand %{ 6936 fcmovF_regU(cop, cr, dst, src); 6937 %} 6938 %} 6939 6940 // unsigned version 6941 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6942 predicate (UseSSE>=2); 6943 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6944 ins_cost(200); 6945 format %{ "Jn$cop skip\n\t" 6946 "MOVSD $dst,$src\t# float\n" 6947 "skip:" %} 6948 ins_encode %{ 6949 Label skip; 6950 // Invert sense of branch from sense of CMOV 6951 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6952 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6953 __ bind(skip); 6954 %} 6955 ins_pipe( pipe_slow ); 6956 %} 6957 6958 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6959 predicate (UseSSE>=2); 6960 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6961 ins_cost(200); 6962 expand %{ 6963 fcmovD_regU(cop, cr, dst, src); 6964 %} 6965 %} 6966 6967 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6968 predicate(VM_Version::supports_cmov() ); 6969 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6970 ins_cost(200); 6971 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6972 "CMOV$cop $dst.hi,$src.hi" %} 6973 opcode(0x0F,0x40); 6974 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6975 ins_pipe( pipe_cmov_reg_long ); 6976 %} 6977 6978 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6979 predicate(VM_Version::supports_cmov() ); 6980 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6981 ins_cost(200); 6982 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6983 "CMOV$cop $dst.hi,$src.hi" %} 6984 opcode(0x0F,0x40); 6985 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6986 ins_pipe( pipe_cmov_reg_long ); 6987 %} 6988 6989 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6990 predicate(VM_Version::supports_cmov() ); 6991 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6992 ins_cost(200); 6993 expand %{ 6994 cmovL_regU(cop, cr, dst, src); 6995 %} 6996 %} 6997 6998 //----------Arithmetic Instructions-------------------------------------------- 6999 //----------Addition Instructions---------------------------------------------- 7000 7001 // Integer Addition Instructions 7002 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7003 match(Set dst (AddI dst src)); 7004 effect(KILL cr); 7005 7006 size(2); 7007 format %{ "ADD $dst,$src" %} 7008 opcode(0x03); 7009 ins_encode( OpcP, RegReg( dst, src) ); 7010 ins_pipe( ialu_reg_reg ); 7011 %} 7012 7013 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7014 match(Set dst (AddI dst src)); 7015 effect(KILL cr); 7016 7017 format %{ "ADD $dst,$src" %} 7018 opcode(0x81, 0x00); /* /0 id */ 7019 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7020 ins_pipe( ialu_reg ); 7021 %} 7022 7023 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7024 predicate(UseIncDec); 7025 match(Set dst (AddI dst src)); 7026 effect(KILL cr); 7027 7028 size(1); 7029 format %{ "INC $dst" %} 7030 opcode(0x40); /* */ 7031 ins_encode( Opc_plus( primary, dst ) ); 7032 ins_pipe( ialu_reg ); 7033 %} 7034 7035 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7036 match(Set dst (AddI src0 src1)); 7037 ins_cost(110); 7038 7039 format %{ "LEA $dst,[$src0 + $src1]" %} 7040 opcode(0x8D); /* 0x8D /r */ 7041 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7042 ins_pipe( ialu_reg_reg ); 7043 %} 7044 7045 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7046 match(Set dst (AddP src0 src1)); 7047 ins_cost(110); 7048 7049 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7050 opcode(0x8D); /* 0x8D /r */ 7051 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7052 ins_pipe( ialu_reg_reg ); 7053 %} 7054 7055 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7056 predicate(UseIncDec); 7057 match(Set dst (AddI dst src)); 7058 effect(KILL cr); 7059 7060 size(1); 7061 format %{ "DEC $dst" %} 7062 opcode(0x48); /* */ 7063 ins_encode( Opc_plus( primary, dst ) ); 7064 ins_pipe( ialu_reg ); 7065 %} 7066 7067 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7068 match(Set dst (AddP dst src)); 7069 effect(KILL cr); 7070 7071 size(2); 7072 format %{ "ADD $dst,$src" %} 7073 opcode(0x03); 7074 ins_encode( OpcP, RegReg( dst, src) ); 7075 ins_pipe( ialu_reg_reg ); 7076 %} 7077 7078 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7079 match(Set dst (AddP dst src)); 7080 effect(KILL cr); 7081 7082 format %{ "ADD $dst,$src" %} 7083 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7084 // ins_encode( RegImm( dst, src) ); 7085 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7086 ins_pipe( ialu_reg ); 7087 %} 7088 7089 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7090 match(Set dst (AddI dst (LoadI src))); 7091 effect(KILL cr); 7092 7093 ins_cost(125); 7094 format %{ "ADD $dst,$src" %} 7095 opcode(0x03); 7096 ins_encode( OpcP, RegMem( dst, src) ); 7097 ins_pipe( ialu_reg_mem ); 7098 %} 7099 7100 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7101 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7102 effect(KILL cr); 7103 7104 ins_cost(150); 7105 format %{ "ADD $dst,$src" %} 7106 opcode(0x01); /* Opcode 01 /r */ 7107 ins_encode( OpcP, RegMem( src, dst ) ); 7108 ins_pipe( ialu_mem_reg ); 7109 %} 7110 7111 // Add Memory with Immediate 7112 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7113 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7114 effect(KILL cr); 7115 7116 ins_cost(125); 7117 format %{ "ADD $dst,$src" %} 7118 opcode(0x81); /* Opcode 81 /0 id */ 7119 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7120 ins_pipe( ialu_mem_imm ); 7121 %} 7122 7123 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7124 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7125 effect(KILL cr); 7126 7127 ins_cost(125); 7128 format %{ "INC $dst" %} 7129 opcode(0xFF); /* Opcode FF /0 */ 7130 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7131 ins_pipe( ialu_mem_imm ); 7132 %} 7133 7134 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7135 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7136 effect(KILL cr); 7137 7138 ins_cost(125); 7139 format %{ "DEC $dst" %} 7140 opcode(0xFF); /* Opcode FF /1 */ 7141 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7142 ins_pipe( ialu_mem_imm ); 7143 %} 7144 7145 7146 instruct checkCastPP( eRegP dst ) %{ 7147 match(Set dst (CheckCastPP dst)); 7148 7149 size(0); 7150 format %{ "#checkcastPP of $dst" %} 7151 ins_encode( /*empty encoding*/ ); 7152 ins_pipe( empty ); 7153 %} 7154 7155 instruct castPP( eRegP dst ) %{ 7156 match(Set dst (CastPP dst)); 7157 format %{ "#castPP of $dst" %} 7158 ins_encode( /*empty encoding*/ ); 7159 ins_pipe( empty ); 7160 %} 7161 7162 instruct castII( rRegI dst ) %{ 7163 match(Set dst (CastII dst)); 7164 format %{ "#castII of $dst" %} 7165 ins_encode( /*empty encoding*/ ); 7166 ins_cost(0); 7167 ins_pipe( empty ); 7168 %} 7169 7170 7171 // Load-locked - same as a regular pointer load when used with compare-swap 7172 instruct loadPLocked(eRegP dst, memory mem) %{ 7173 match(Set dst (LoadPLocked mem)); 7174 7175 ins_cost(125); 7176 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7177 opcode(0x8B); 7178 ins_encode( OpcP, RegMem(dst,mem)); 7179 ins_pipe( ialu_reg_mem ); 7180 %} 7181 7182 // Conditional-store of the updated heap-top. 7183 // Used during allocation of the shared heap. 7184 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7185 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7186 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7187 // EAX is killed if there is contention, but then it's also unused. 7188 // In the common case of no contention, EAX holds the new oop address. 7189 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7190 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7191 ins_pipe( pipe_cmpxchg ); 7192 %} 7193 7194 // Conditional-store of an int value. 7195 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7196 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7197 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7198 effect(KILL oldval); 7199 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7200 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7201 ins_pipe( pipe_cmpxchg ); 7202 %} 7203 7204 // Conditional-store of a long value. 7205 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7206 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7207 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7208 effect(KILL oldval); 7209 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7210 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7211 "XCHG EBX,ECX" 7212 %} 7213 ins_encode %{ 7214 // Note: we need to swap rbx, and rcx before and after the 7215 // cmpxchg8 instruction because the instruction uses 7216 // rcx as the high order word of the new value to store but 7217 // our register encoding uses rbx. 7218 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7219 if( os::is_MP() ) 7220 __ lock(); 7221 __ cmpxchg8($mem$$Address); 7222 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7223 %} 7224 ins_pipe( pipe_cmpxchg ); 7225 %} 7226 7227 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7228 7229 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7230 predicate(VM_Version::supports_cx8()); 7231 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7232 effect(KILL cr, KILL oldval); 7233 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7234 "MOV $res,0\n\t" 7235 "JNE,s fail\n\t" 7236 "MOV $res,1\n" 7237 "fail:" %} 7238 ins_encode( enc_cmpxchg8(mem_ptr), 7239 enc_flags_ne_to_boolean(res) ); 7240 ins_pipe( pipe_cmpxchg ); 7241 %} 7242 7243 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7244 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7245 effect(KILL cr, KILL oldval); 7246 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7247 "MOV $res,0\n\t" 7248 "JNE,s fail\n\t" 7249 "MOV $res,1\n" 7250 "fail:" %} 7251 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7252 ins_pipe( pipe_cmpxchg ); 7253 %} 7254 7255 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7256 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7257 effect(KILL cr, KILL oldval); 7258 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7259 "MOV $res,0\n\t" 7260 "JNE,s fail\n\t" 7261 "MOV $res,1\n" 7262 "fail:" %} 7263 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7264 ins_pipe( pipe_cmpxchg ); 7265 %} 7266 7267 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7268 predicate(n->as_LoadStore()->result_not_used()); 7269 match(Set dummy (GetAndAddI mem add)); 7270 effect(KILL cr); 7271 format %{ "ADDL [$mem],$add" %} 7272 ins_encode %{ 7273 if (os::is_MP()) { __ lock(); } 7274 __ addl($mem$$Address, $add$$constant); 7275 %} 7276 ins_pipe( pipe_cmpxchg ); 7277 %} 7278 7279 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7280 match(Set newval (GetAndAddI mem newval)); 7281 effect(KILL cr); 7282 format %{ "XADDL [$mem],$newval" %} 7283 ins_encode %{ 7284 if (os::is_MP()) { __ lock(); } 7285 __ xaddl($mem$$Address, $newval$$Register); 7286 %} 7287 ins_pipe( pipe_cmpxchg ); 7288 %} 7289 7290 instruct xchgI( memory mem, rRegI newval) %{ 7291 match(Set newval (GetAndSetI mem newval)); 7292 format %{ "XCHGL $newval,[$mem]" %} 7293 ins_encode %{ 7294 __ xchgl($newval$$Register, $mem$$Address); 7295 %} 7296 ins_pipe( pipe_cmpxchg ); 7297 %} 7298 7299 instruct xchgP( memory mem, pRegP newval) %{ 7300 match(Set newval (GetAndSetP mem newval)); 7301 format %{ "XCHGL $newval,[$mem]" %} 7302 ins_encode %{ 7303 __ xchgl($newval$$Register, $mem$$Address); 7304 %} 7305 ins_pipe( pipe_cmpxchg ); 7306 %} 7307 7308 //----------Subtraction Instructions------------------------------------------- 7309 7310 // Integer Subtraction Instructions 7311 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7312 match(Set dst (SubI dst src)); 7313 effect(KILL cr); 7314 7315 size(2); 7316 format %{ "SUB $dst,$src" %} 7317 opcode(0x2B); 7318 ins_encode( OpcP, RegReg( dst, src) ); 7319 ins_pipe( ialu_reg_reg ); 7320 %} 7321 7322 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7323 match(Set dst (SubI dst src)); 7324 effect(KILL cr); 7325 7326 format %{ "SUB $dst,$src" %} 7327 opcode(0x81,0x05); /* Opcode 81 /5 */ 7328 // ins_encode( RegImm( dst, src) ); 7329 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7330 ins_pipe( ialu_reg ); 7331 %} 7332 7333 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7334 match(Set dst (SubI dst (LoadI src))); 7335 effect(KILL cr); 7336 7337 ins_cost(125); 7338 format %{ "SUB $dst,$src" %} 7339 opcode(0x2B); 7340 ins_encode( OpcP, RegMem( dst, src) ); 7341 ins_pipe( ialu_reg_mem ); 7342 %} 7343 7344 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7345 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7346 effect(KILL cr); 7347 7348 ins_cost(150); 7349 format %{ "SUB $dst,$src" %} 7350 opcode(0x29); /* Opcode 29 /r */ 7351 ins_encode( OpcP, RegMem( src, dst ) ); 7352 ins_pipe( ialu_mem_reg ); 7353 %} 7354 7355 // Subtract from a pointer 7356 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7357 match(Set dst (AddP dst (SubI zero src))); 7358 effect(KILL cr); 7359 7360 size(2); 7361 format %{ "SUB $dst,$src" %} 7362 opcode(0x2B); 7363 ins_encode( OpcP, RegReg( dst, src) ); 7364 ins_pipe( ialu_reg_reg ); 7365 %} 7366 7367 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7368 match(Set dst (SubI zero dst)); 7369 effect(KILL cr); 7370 7371 size(2); 7372 format %{ "NEG $dst" %} 7373 opcode(0xF7,0x03); // Opcode F7 /3 7374 ins_encode( OpcP, RegOpc( dst ) ); 7375 ins_pipe( ialu_reg ); 7376 %} 7377 7378 //----------Multiplication/Division Instructions------------------------------- 7379 // Integer Multiplication Instructions 7380 // Multiply Register 7381 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7382 match(Set dst (MulI dst src)); 7383 effect(KILL cr); 7384 7385 size(3); 7386 ins_cost(300); 7387 format %{ "IMUL $dst,$src" %} 7388 opcode(0xAF, 0x0F); 7389 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7390 ins_pipe( ialu_reg_reg_alu0 ); 7391 %} 7392 7393 // Multiply 32-bit Immediate 7394 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7395 match(Set dst (MulI src imm)); 7396 effect(KILL cr); 7397 7398 ins_cost(300); 7399 format %{ "IMUL $dst,$src,$imm" %} 7400 opcode(0x69); /* 69 /r id */ 7401 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7402 ins_pipe( ialu_reg_reg_alu0 ); 7403 %} 7404 7405 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7406 match(Set dst src); 7407 effect(KILL cr); 7408 7409 // Note that this is artificially increased to make it more expensive than loadConL 7410 ins_cost(250); 7411 format %{ "MOV EAX,$src\t// low word only" %} 7412 opcode(0xB8); 7413 ins_encode( LdImmL_Lo(dst, src) ); 7414 ins_pipe( ialu_reg_fat ); 7415 %} 7416 7417 // Multiply by 32-bit Immediate, taking the shifted high order results 7418 // (special case for shift by 32) 7419 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7420 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7421 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7422 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7423 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7424 effect(USE src1, KILL cr); 7425 7426 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7427 ins_cost(0*100 + 1*400 - 150); 7428 format %{ "IMUL EDX:EAX,$src1" %} 7429 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7430 ins_pipe( pipe_slow ); 7431 %} 7432 7433 // Multiply by 32-bit Immediate, taking the shifted high order results 7434 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7435 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7436 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7437 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7438 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7439 effect(USE src1, KILL cr); 7440 7441 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7442 ins_cost(1*100 + 1*400 - 150); 7443 format %{ "IMUL EDX:EAX,$src1\n\t" 7444 "SAR EDX,$cnt-32" %} 7445 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7446 ins_pipe( pipe_slow ); 7447 %} 7448 7449 // Multiply Memory 32-bit Immediate 7450 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7451 match(Set dst (MulI (LoadI src) imm)); 7452 effect(KILL cr); 7453 7454 ins_cost(300); 7455 format %{ "IMUL $dst,$src,$imm" %} 7456 opcode(0x69); /* 69 /r id */ 7457 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7458 ins_pipe( ialu_reg_mem_alu0 ); 7459 %} 7460 7461 // Multiply Memory 7462 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7463 match(Set dst (MulI dst (LoadI src))); 7464 effect(KILL cr); 7465 7466 ins_cost(350); 7467 format %{ "IMUL $dst,$src" %} 7468 opcode(0xAF, 0x0F); 7469 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7470 ins_pipe( ialu_reg_mem_alu0 ); 7471 %} 7472 7473 // Multiply Register Int to Long 7474 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7475 // Basic Idea: long = (long)int * (long)int 7476 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7477 effect(DEF dst, USE src, USE src1, KILL flags); 7478 7479 ins_cost(300); 7480 format %{ "IMUL $dst,$src1" %} 7481 7482 ins_encode( long_int_multiply( dst, src1 ) ); 7483 ins_pipe( ialu_reg_reg_alu0 ); 7484 %} 7485 7486 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7487 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7488 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7489 effect(KILL flags); 7490 7491 ins_cost(300); 7492 format %{ "MUL $dst,$src1" %} 7493 7494 ins_encode( long_uint_multiply(dst, src1) ); 7495 ins_pipe( ialu_reg_reg_alu0 ); 7496 %} 7497 7498 // Multiply Register Long 7499 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7500 match(Set dst (MulL dst src)); 7501 effect(KILL cr, TEMP tmp); 7502 ins_cost(4*100+3*400); 7503 // Basic idea: lo(result) = lo(x_lo * y_lo) 7504 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7505 format %{ "MOV $tmp,$src.lo\n\t" 7506 "IMUL $tmp,EDX\n\t" 7507 "MOV EDX,$src.hi\n\t" 7508 "IMUL EDX,EAX\n\t" 7509 "ADD $tmp,EDX\n\t" 7510 "MUL EDX:EAX,$src.lo\n\t" 7511 "ADD EDX,$tmp" %} 7512 ins_encode( long_multiply( dst, src, tmp ) ); 7513 ins_pipe( pipe_slow ); 7514 %} 7515 7516 // Multiply Register Long where the left operand's high 32 bits are zero 7517 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7518 predicate(is_operand_hi32_zero(n->in(1))); 7519 match(Set dst (MulL dst src)); 7520 effect(KILL cr, TEMP tmp); 7521 ins_cost(2*100+2*400); 7522 // Basic idea: lo(result) = lo(x_lo * y_lo) 7523 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7524 format %{ "MOV $tmp,$src.hi\n\t" 7525 "IMUL $tmp,EAX\n\t" 7526 "MUL EDX:EAX,$src.lo\n\t" 7527 "ADD EDX,$tmp" %} 7528 ins_encode %{ 7529 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7530 __ imull($tmp$$Register, rax); 7531 __ mull($src$$Register); 7532 __ addl(rdx, $tmp$$Register); 7533 %} 7534 ins_pipe( pipe_slow ); 7535 %} 7536 7537 // Multiply Register Long where the right operand's high 32 bits are zero 7538 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7539 predicate(is_operand_hi32_zero(n->in(2))); 7540 match(Set dst (MulL dst src)); 7541 effect(KILL cr, TEMP tmp); 7542 ins_cost(2*100+2*400); 7543 // Basic idea: lo(result) = lo(x_lo * y_lo) 7544 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7545 format %{ "MOV $tmp,$src.lo\n\t" 7546 "IMUL $tmp,EDX\n\t" 7547 "MUL EDX:EAX,$src.lo\n\t" 7548 "ADD EDX,$tmp" %} 7549 ins_encode %{ 7550 __ movl($tmp$$Register, $src$$Register); 7551 __ imull($tmp$$Register, rdx); 7552 __ mull($src$$Register); 7553 __ addl(rdx, $tmp$$Register); 7554 %} 7555 ins_pipe( pipe_slow ); 7556 %} 7557 7558 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7559 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7560 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7561 match(Set dst (MulL dst src)); 7562 effect(KILL cr); 7563 ins_cost(1*400); 7564 // Basic idea: lo(result) = lo(x_lo * y_lo) 7565 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7566 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7567 ins_encode %{ 7568 __ mull($src$$Register); 7569 %} 7570 ins_pipe( pipe_slow ); 7571 %} 7572 7573 // Multiply Register Long by small constant 7574 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7575 match(Set dst (MulL dst src)); 7576 effect(KILL cr, TEMP tmp); 7577 ins_cost(2*100+2*400); 7578 size(12); 7579 // Basic idea: lo(result) = lo(src * EAX) 7580 // hi(result) = hi(src * EAX) + lo(src * EDX) 7581 format %{ "IMUL $tmp,EDX,$src\n\t" 7582 "MOV EDX,$src\n\t" 7583 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7584 "ADD EDX,$tmp" %} 7585 ins_encode( long_multiply_con( dst, src, tmp ) ); 7586 ins_pipe( pipe_slow ); 7587 %} 7588 7589 // Integer DIV with Register 7590 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7591 match(Set rax (DivI rax div)); 7592 effect(KILL rdx, KILL cr); 7593 size(26); 7594 ins_cost(30*100+10*100); 7595 format %{ "CMP EAX,0x80000000\n\t" 7596 "JNE,s normal\n\t" 7597 "XOR EDX,EDX\n\t" 7598 "CMP ECX,-1\n\t" 7599 "JE,s done\n" 7600 "normal: CDQ\n\t" 7601 "IDIV $div\n\t" 7602 "done:" %} 7603 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7604 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7605 ins_pipe( ialu_reg_reg_alu0 ); 7606 %} 7607 7608 // Divide Register Long 7609 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7610 match(Set dst (DivL src1 src2)); 7611 effect( KILL cr, KILL cx, KILL bx ); 7612 ins_cost(10000); 7613 format %{ "PUSH $src1.hi\n\t" 7614 "PUSH $src1.lo\n\t" 7615 "PUSH $src2.hi\n\t" 7616 "PUSH $src2.lo\n\t" 7617 "CALL SharedRuntime::ldiv\n\t" 7618 "ADD ESP,16" %} 7619 ins_encode( long_div(src1,src2) ); 7620 ins_pipe( pipe_slow ); 7621 %} 7622 7623 // Integer DIVMOD with Register, both quotient and mod results 7624 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7625 match(DivModI rax div); 7626 effect(KILL cr); 7627 size(26); 7628 ins_cost(30*100+10*100); 7629 format %{ "CMP EAX,0x80000000\n\t" 7630 "JNE,s normal\n\t" 7631 "XOR EDX,EDX\n\t" 7632 "CMP ECX,-1\n\t" 7633 "JE,s done\n" 7634 "normal: CDQ\n\t" 7635 "IDIV $div\n\t" 7636 "done:" %} 7637 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7638 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7639 ins_pipe( pipe_slow ); 7640 %} 7641 7642 // Integer MOD with Register 7643 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7644 match(Set rdx (ModI rax div)); 7645 effect(KILL rax, KILL cr); 7646 7647 size(26); 7648 ins_cost(300); 7649 format %{ "CDQ\n\t" 7650 "IDIV $div" %} 7651 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7652 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7653 ins_pipe( ialu_reg_reg_alu0 ); 7654 %} 7655 7656 // Remainder Register Long 7657 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7658 match(Set dst (ModL src1 src2)); 7659 effect( KILL cr, KILL cx, KILL bx ); 7660 ins_cost(10000); 7661 format %{ "PUSH $src1.hi\n\t" 7662 "PUSH $src1.lo\n\t" 7663 "PUSH $src2.hi\n\t" 7664 "PUSH $src2.lo\n\t" 7665 "CALL SharedRuntime::lrem\n\t" 7666 "ADD ESP,16" %} 7667 ins_encode( long_mod(src1,src2) ); 7668 ins_pipe( pipe_slow ); 7669 %} 7670 7671 // Divide Register Long (no special case since divisor != -1) 7672 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7673 match(Set dst (DivL dst imm)); 7674 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7675 ins_cost(1000); 7676 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7677 "XOR $tmp2,$tmp2\n\t" 7678 "CMP $tmp,EDX\n\t" 7679 "JA,s fast\n\t" 7680 "MOV $tmp2,EAX\n\t" 7681 "MOV EAX,EDX\n\t" 7682 "MOV EDX,0\n\t" 7683 "JLE,s pos\n\t" 7684 "LNEG EAX : $tmp2\n\t" 7685 "DIV $tmp # unsigned division\n\t" 7686 "XCHG EAX,$tmp2\n\t" 7687 "DIV $tmp\n\t" 7688 "LNEG $tmp2 : EAX\n\t" 7689 "JMP,s done\n" 7690 "pos:\n\t" 7691 "DIV $tmp\n\t" 7692 "XCHG EAX,$tmp2\n" 7693 "fast:\n\t" 7694 "DIV $tmp\n" 7695 "done:\n\t" 7696 "MOV EDX,$tmp2\n\t" 7697 "NEG EDX:EAX # if $imm < 0" %} 7698 ins_encode %{ 7699 int con = (int)$imm$$constant; 7700 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7701 int pcon = (con > 0) ? con : -con; 7702 Label Lfast, Lpos, Ldone; 7703 7704 __ movl($tmp$$Register, pcon); 7705 __ xorl($tmp2$$Register,$tmp2$$Register); 7706 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7707 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7708 7709 __ movl($tmp2$$Register, $dst$$Register); // save 7710 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7711 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7712 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7713 7714 // Negative dividend. 7715 // convert value to positive to use unsigned division 7716 __ lneg($dst$$Register, $tmp2$$Register); 7717 __ divl($tmp$$Register); 7718 __ xchgl($dst$$Register, $tmp2$$Register); 7719 __ divl($tmp$$Register); 7720 // revert result back to negative 7721 __ lneg($tmp2$$Register, $dst$$Register); 7722 __ jmpb(Ldone); 7723 7724 __ bind(Lpos); 7725 __ divl($tmp$$Register); // Use unsigned division 7726 __ xchgl($dst$$Register, $tmp2$$Register); 7727 // Fallthrow for final divide, tmp2 has 32 bit hi result 7728 7729 __ bind(Lfast); 7730 // fast path: src is positive 7731 __ divl($tmp$$Register); // Use unsigned division 7732 7733 __ bind(Ldone); 7734 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7735 if (con < 0) { 7736 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7737 } 7738 %} 7739 ins_pipe( pipe_slow ); 7740 %} 7741 7742 // Remainder Register Long (remainder fit into 32 bits) 7743 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7744 match(Set dst (ModL dst imm)); 7745 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7746 ins_cost(1000); 7747 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7748 "CMP $tmp,EDX\n\t" 7749 "JA,s fast\n\t" 7750 "MOV $tmp2,EAX\n\t" 7751 "MOV EAX,EDX\n\t" 7752 "MOV EDX,0\n\t" 7753 "JLE,s pos\n\t" 7754 "LNEG EAX : $tmp2\n\t" 7755 "DIV $tmp # unsigned division\n\t" 7756 "MOV EAX,$tmp2\n\t" 7757 "DIV $tmp\n\t" 7758 "NEG EDX\n\t" 7759 "JMP,s done\n" 7760 "pos:\n\t" 7761 "DIV $tmp\n\t" 7762 "MOV EAX,$tmp2\n" 7763 "fast:\n\t" 7764 "DIV $tmp\n" 7765 "done:\n\t" 7766 "MOV EAX,EDX\n\t" 7767 "SAR EDX,31\n\t" %} 7768 ins_encode %{ 7769 int con = (int)$imm$$constant; 7770 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7771 int pcon = (con > 0) ? con : -con; 7772 Label Lfast, Lpos, Ldone; 7773 7774 __ movl($tmp$$Register, pcon); 7775 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7776 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7777 7778 __ movl($tmp2$$Register, $dst$$Register); // save 7779 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7780 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7781 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7782 7783 // Negative dividend. 7784 // convert value to positive to use unsigned division 7785 __ lneg($dst$$Register, $tmp2$$Register); 7786 __ divl($tmp$$Register); 7787 __ movl($dst$$Register, $tmp2$$Register); 7788 __ divl($tmp$$Register); 7789 // revert remainder back to negative 7790 __ negl(HIGH_FROM_LOW($dst$$Register)); 7791 __ jmpb(Ldone); 7792 7793 __ bind(Lpos); 7794 __ divl($tmp$$Register); 7795 __ movl($dst$$Register, $tmp2$$Register); 7796 7797 __ bind(Lfast); 7798 // fast path: src is positive 7799 __ divl($tmp$$Register); 7800 7801 __ bind(Ldone); 7802 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7803 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7804 7805 %} 7806 ins_pipe( pipe_slow ); 7807 %} 7808 7809 // Integer Shift Instructions 7810 // Shift Left by one 7811 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7812 match(Set dst (LShiftI dst shift)); 7813 effect(KILL cr); 7814 7815 size(2); 7816 format %{ "SHL $dst,$shift" %} 7817 opcode(0xD1, 0x4); /* D1 /4 */ 7818 ins_encode( OpcP, RegOpc( dst ) ); 7819 ins_pipe( ialu_reg ); 7820 %} 7821 7822 // Shift Left by 8-bit immediate 7823 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7824 match(Set dst (LShiftI dst shift)); 7825 effect(KILL cr); 7826 7827 size(3); 7828 format %{ "SHL $dst,$shift" %} 7829 opcode(0xC1, 0x4); /* C1 /4 ib */ 7830 ins_encode( RegOpcImm( dst, shift) ); 7831 ins_pipe( ialu_reg ); 7832 %} 7833 7834 // Shift Left by variable 7835 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7836 match(Set dst (LShiftI dst shift)); 7837 effect(KILL cr); 7838 7839 size(2); 7840 format %{ "SHL $dst,$shift" %} 7841 opcode(0xD3, 0x4); /* D3 /4 */ 7842 ins_encode( OpcP, RegOpc( dst ) ); 7843 ins_pipe( ialu_reg_reg ); 7844 %} 7845 7846 // Arithmetic shift right by one 7847 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7848 match(Set dst (RShiftI dst shift)); 7849 effect(KILL cr); 7850 7851 size(2); 7852 format %{ "SAR $dst,$shift" %} 7853 opcode(0xD1, 0x7); /* D1 /7 */ 7854 ins_encode( OpcP, RegOpc( dst ) ); 7855 ins_pipe( ialu_reg ); 7856 %} 7857 7858 // Arithmetic shift right by one 7859 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7860 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7861 effect(KILL cr); 7862 format %{ "SAR $dst,$shift" %} 7863 opcode(0xD1, 0x7); /* D1 /7 */ 7864 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7865 ins_pipe( ialu_mem_imm ); 7866 %} 7867 7868 // Arithmetic Shift Right by 8-bit immediate 7869 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7870 match(Set dst (RShiftI dst shift)); 7871 effect(KILL cr); 7872 7873 size(3); 7874 format %{ "SAR $dst,$shift" %} 7875 opcode(0xC1, 0x7); /* C1 /7 ib */ 7876 ins_encode( RegOpcImm( dst, shift ) ); 7877 ins_pipe( ialu_mem_imm ); 7878 %} 7879 7880 // Arithmetic Shift Right by 8-bit immediate 7881 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7882 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7883 effect(KILL cr); 7884 7885 format %{ "SAR $dst,$shift" %} 7886 opcode(0xC1, 0x7); /* C1 /7 ib */ 7887 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7888 ins_pipe( ialu_mem_imm ); 7889 %} 7890 7891 // Arithmetic Shift Right by variable 7892 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7893 match(Set dst (RShiftI dst shift)); 7894 effect(KILL cr); 7895 7896 size(2); 7897 format %{ "SAR $dst,$shift" %} 7898 opcode(0xD3, 0x7); /* D3 /7 */ 7899 ins_encode( OpcP, RegOpc( dst ) ); 7900 ins_pipe( ialu_reg_reg ); 7901 %} 7902 7903 // Logical shift right by one 7904 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7905 match(Set dst (URShiftI dst shift)); 7906 effect(KILL cr); 7907 7908 size(2); 7909 format %{ "SHR $dst,$shift" %} 7910 opcode(0xD1, 0x5); /* D1 /5 */ 7911 ins_encode( OpcP, RegOpc( dst ) ); 7912 ins_pipe( ialu_reg ); 7913 %} 7914 7915 // Logical Shift Right by 8-bit immediate 7916 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7917 match(Set dst (URShiftI dst shift)); 7918 effect(KILL cr); 7919 7920 size(3); 7921 format %{ "SHR $dst,$shift" %} 7922 opcode(0xC1, 0x5); /* C1 /5 ib */ 7923 ins_encode( RegOpcImm( dst, shift) ); 7924 ins_pipe( ialu_reg ); 7925 %} 7926 7927 7928 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7929 // This idiom is used by the compiler for the i2b bytecode. 7930 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7931 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7932 7933 size(3); 7934 format %{ "MOVSX $dst,$src :8" %} 7935 ins_encode %{ 7936 __ movsbl($dst$$Register, $src$$Register); 7937 %} 7938 ins_pipe(ialu_reg_reg); 7939 %} 7940 7941 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7942 // This idiom is used by the compiler the i2s bytecode. 7943 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7944 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7945 7946 size(3); 7947 format %{ "MOVSX $dst,$src :16" %} 7948 ins_encode %{ 7949 __ movswl($dst$$Register, $src$$Register); 7950 %} 7951 ins_pipe(ialu_reg_reg); 7952 %} 7953 7954 7955 // Logical Shift Right by variable 7956 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7957 match(Set dst (URShiftI dst shift)); 7958 effect(KILL cr); 7959 7960 size(2); 7961 format %{ "SHR $dst,$shift" %} 7962 opcode(0xD3, 0x5); /* D3 /5 */ 7963 ins_encode( OpcP, RegOpc( dst ) ); 7964 ins_pipe( ialu_reg_reg ); 7965 %} 7966 7967 7968 //----------Logical Instructions----------------------------------------------- 7969 //----------Integer Logical Instructions--------------------------------------- 7970 // And Instructions 7971 // And Register with Register 7972 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7973 match(Set dst (AndI dst src)); 7974 effect(KILL cr); 7975 7976 size(2); 7977 format %{ "AND $dst,$src" %} 7978 opcode(0x23); 7979 ins_encode( OpcP, RegReg( dst, src) ); 7980 ins_pipe( ialu_reg_reg ); 7981 %} 7982 7983 // And Register with Immediate 7984 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7985 match(Set dst (AndI dst src)); 7986 effect(KILL cr); 7987 7988 format %{ "AND $dst,$src" %} 7989 opcode(0x81,0x04); /* Opcode 81 /4 */ 7990 // ins_encode( RegImm( dst, src) ); 7991 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7992 ins_pipe( ialu_reg ); 7993 %} 7994 7995 // And Register with Memory 7996 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7997 match(Set dst (AndI dst (LoadI src))); 7998 effect(KILL cr); 7999 8000 ins_cost(125); 8001 format %{ "AND $dst,$src" %} 8002 opcode(0x23); 8003 ins_encode( OpcP, RegMem( dst, src) ); 8004 ins_pipe( ialu_reg_mem ); 8005 %} 8006 8007 // And Memory with Register 8008 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8009 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8010 effect(KILL cr); 8011 8012 ins_cost(150); 8013 format %{ "AND $dst,$src" %} 8014 opcode(0x21); /* Opcode 21 /r */ 8015 ins_encode( OpcP, RegMem( src, dst ) ); 8016 ins_pipe( ialu_mem_reg ); 8017 %} 8018 8019 // And Memory with Immediate 8020 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8021 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8022 effect(KILL cr); 8023 8024 ins_cost(125); 8025 format %{ "AND $dst,$src" %} 8026 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8027 // ins_encode( MemImm( dst, src) ); 8028 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8029 ins_pipe( ialu_mem_imm ); 8030 %} 8031 8032 // BMI1 instructions 8033 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8034 match(Set dst (AndI (XorI src1 minus_1) src2)); 8035 predicate(UseBMI1Instructions); 8036 effect(KILL cr); 8037 8038 format %{ "ANDNL $dst, $src1, $src2" %} 8039 8040 ins_encode %{ 8041 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8042 %} 8043 ins_pipe(ialu_reg); 8044 %} 8045 8046 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8047 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8048 predicate(UseBMI1Instructions); 8049 effect(KILL cr); 8050 8051 ins_cost(125); 8052 format %{ "ANDNL $dst, $src1, $src2" %} 8053 8054 ins_encode %{ 8055 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8056 %} 8057 ins_pipe(ialu_reg_mem); 8058 %} 8059 8060 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8061 match(Set dst (AndI (SubI imm_zero src) src)); 8062 predicate(UseBMI1Instructions); 8063 effect(KILL cr); 8064 8065 format %{ "BLSIL $dst, $src" %} 8066 8067 ins_encode %{ 8068 __ blsil($dst$$Register, $src$$Register); 8069 %} 8070 ins_pipe(ialu_reg); 8071 %} 8072 8073 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8074 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8075 predicate(UseBMI1Instructions); 8076 effect(KILL cr); 8077 8078 ins_cost(125); 8079 format %{ "BLSIL $dst, $src" %} 8080 8081 ins_encode %{ 8082 __ blsil($dst$$Register, $src$$Address); 8083 %} 8084 ins_pipe(ialu_reg_mem); 8085 %} 8086 8087 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8088 %{ 8089 match(Set dst (XorI (AddI src minus_1) src)); 8090 predicate(UseBMI1Instructions); 8091 effect(KILL cr); 8092 8093 format %{ "BLSMSKL $dst, $src" %} 8094 8095 ins_encode %{ 8096 __ blsmskl($dst$$Register, $src$$Register); 8097 %} 8098 8099 ins_pipe(ialu_reg); 8100 %} 8101 8102 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8103 %{ 8104 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8105 predicate(UseBMI1Instructions); 8106 effect(KILL cr); 8107 8108 ins_cost(125); 8109 format %{ "BLSMSKL $dst, $src" %} 8110 8111 ins_encode %{ 8112 __ blsmskl($dst$$Register, $src$$Address); 8113 %} 8114 8115 ins_pipe(ialu_reg_mem); 8116 %} 8117 8118 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8119 %{ 8120 match(Set dst (AndI (AddI src minus_1) src) ); 8121 predicate(UseBMI1Instructions); 8122 effect(KILL cr); 8123 8124 format %{ "BLSRL $dst, $src" %} 8125 8126 ins_encode %{ 8127 __ blsrl($dst$$Register, $src$$Register); 8128 %} 8129 8130 ins_pipe(ialu_reg); 8131 %} 8132 8133 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8134 %{ 8135 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8136 predicate(UseBMI1Instructions); 8137 effect(KILL cr); 8138 8139 ins_cost(125); 8140 format %{ "BLSRL $dst, $src" %} 8141 8142 ins_encode %{ 8143 __ blsrl($dst$$Register, $src$$Address); 8144 %} 8145 8146 ins_pipe(ialu_reg_mem); 8147 %} 8148 8149 // Or Instructions 8150 // Or Register with Register 8151 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8152 match(Set dst (OrI dst src)); 8153 effect(KILL cr); 8154 8155 size(2); 8156 format %{ "OR $dst,$src" %} 8157 opcode(0x0B); 8158 ins_encode( OpcP, RegReg( dst, src) ); 8159 ins_pipe( ialu_reg_reg ); 8160 %} 8161 8162 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8163 match(Set dst (OrI dst (CastP2X src))); 8164 effect(KILL cr); 8165 8166 size(2); 8167 format %{ "OR $dst,$src" %} 8168 opcode(0x0B); 8169 ins_encode( OpcP, RegReg( dst, src) ); 8170 ins_pipe( ialu_reg_reg ); 8171 %} 8172 8173 8174 // Or Register with Immediate 8175 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8176 match(Set dst (OrI dst src)); 8177 effect(KILL cr); 8178 8179 format %{ "OR $dst,$src" %} 8180 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8181 // ins_encode( RegImm( dst, src) ); 8182 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8183 ins_pipe( ialu_reg ); 8184 %} 8185 8186 // Or Register with Memory 8187 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8188 match(Set dst (OrI dst (LoadI src))); 8189 effect(KILL cr); 8190 8191 ins_cost(125); 8192 format %{ "OR $dst,$src" %} 8193 opcode(0x0B); 8194 ins_encode( OpcP, RegMem( dst, src) ); 8195 ins_pipe( ialu_reg_mem ); 8196 %} 8197 8198 // Or Memory with Register 8199 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8200 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8201 effect(KILL cr); 8202 8203 ins_cost(150); 8204 format %{ "OR $dst,$src" %} 8205 opcode(0x09); /* Opcode 09 /r */ 8206 ins_encode( OpcP, RegMem( src, dst ) ); 8207 ins_pipe( ialu_mem_reg ); 8208 %} 8209 8210 // Or Memory with Immediate 8211 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8212 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8213 effect(KILL cr); 8214 8215 ins_cost(125); 8216 format %{ "OR $dst,$src" %} 8217 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8218 // ins_encode( MemImm( dst, src) ); 8219 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8220 ins_pipe( ialu_mem_imm ); 8221 %} 8222 8223 // ROL/ROR 8224 // ROL expand 8225 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8226 effect(USE_DEF dst, USE shift, KILL cr); 8227 8228 format %{ "ROL $dst, $shift" %} 8229 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8230 ins_encode( OpcP, RegOpc( dst )); 8231 ins_pipe( ialu_reg ); 8232 %} 8233 8234 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8235 effect(USE_DEF dst, USE shift, KILL cr); 8236 8237 format %{ "ROL $dst, $shift" %} 8238 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8239 ins_encode( RegOpcImm(dst, shift) ); 8240 ins_pipe(ialu_reg); 8241 %} 8242 8243 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8244 effect(USE_DEF dst, USE shift, KILL cr); 8245 8246 format %{ "ROL $dst, $shift" %} 8247 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8248 ins_encode(OpcP, RegOpc(dst)); 8249 ins_pipe( ialu_reg_reg ); 8250 %} 8251 // end of ROL expand 8252 8253 // ROL 32bit by one once 8254 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8255 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8256 8257 expand %{ 8258 rolI_eReg_imm1(dst, lshift, cr); 8259 %} 8260 %} 8261 8262 // ROL 32bit var by imm8 once 8263 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8264 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8265 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8266 8267 expand %{ 8268 rolI_eReg_imm8(dst, lshift, cr); 8269 %} 8270 %} 8271 8272 // ROL 32bit var by var once 8273 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8274 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8275 8276 expand %{ 8277 rolI_eReg_CL(dst, shift, cr); 8278 %} 8279 %} 8280 8281 // ROL 32bit var by var once 8282 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8283 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8284 8285 expand %{ 8286 rolI_eReg_CL(dst, shift, cr); 8287 %} 8288 %} 8289 8290 // ROR expand 8291 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8292 effect(USE_DEF dst, USE shift, KILL cr); 8293 8294 format %{ "ROR $dst, $shift" %} 8295 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8296 ins_encode( OpcP, RegOpc( dst ) ); 8297 ins_pipe( ialu_reg ); 8298 %} 8299 8300 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8301 effect (USE_DEF dst, USE shift, KILL cr); 8302 8303 format %{ "ROR $dst, $shift" %} 8304 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8305 ins_encode( RegOpcImm(dst, shift) ); 8306 ins_pipe( ialu_reg ); 8307 %} 8308 8309 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8310 effect(USE_DEF dst, USE shift, KILL cr); 8311 8312 format %{ "ROR $dst, $shift" %} 8313 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8314 ins_encode(OpcP, RegOpc(dst)); 8315 ins_pipe( ialu_reg_reg ); 8316 %} 8317 // end of ROR expand 8318 8319 // ROR right once 8320 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8321 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8322 8323 expand %{ 8324 rorI_eReg_imm1(dst, rshift, cr); 8325 %} 8326 %} 8327 8328 // ROR 32bit by immI8 once 8329 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8330 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8331 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8332 8333 expand %{ 8334 rorI_eReg_imm8(dst, rshift, cr); 8335 %} 8336 %} 8337 8338 // ROR 32bit var by var once 8339 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8340 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8341 8342 expand %{ 8343 rorI_eReg_CL(dst, shift, cr); 8344 %} 8345 %} 8346 8347 // ROR 32bit var by var once 8348 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8349 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8350 8351 expand %{ 8352 rorI_eReg_CL(dst, shift, cr); 8353 %} 8354 %} 8355 8356 // Xor Instructions 8357 // Xor Register with Register 8358 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8359 match(Set dst (XorI dst src)); 8360 effect(KILL cr); 8361 8362 size(2); 8363 format %{ "XOR $dst,$src" %} 8364 opcode(0x33); 8365 ins_encode( OpcP, RegReg( dst, src) ); 8366 ins_pipe( ialu_reg_reg ); 8367 %} 8368 8369 // Xor Register with Immediate -1 8370 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8371 match(Set dst (XorI dst imm)); 8372 8373 size(2); 8374 format %{ "NOT $dst" %} 8375 ins_encode %{ 8376 __ notl($dst$$Register); 8377 %} 8378 ins_pipe( ialu_reg ); 8379 %} 8380 8381 // Xor Register with Immediate 8382 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8383 match(Set dst (XorI dst src)); 8384 effect(KILL cr); 8385 8386 format %{ "XOR $dst,$src" %} 8387 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8388 // ins_encode( RegImm( dst, src) ); 8389 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8390 ins_pipe( ialu_reg ); 8391 %} 8392 8393 // Xor Register with Memory 8394 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8395 match(Set dst (XorI dst (LoadI src))); 8396 effect(KILL cr); 8397 8398 ins_cost(125); 8399 format %{ "XOR $dst,$src" %} 8400 opcode(0x33); 8401 ins_encode( OpcP, RegMem(dst, src) ); 8402 ins_pipe( ialu_reg_mem ); 8403 %} 8404 8405 // Xor Memory with Register 8406 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8407 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8408 effect(KILL cr); 8409 8410 ins_cost(150); 8411 format %{ "XOR $dst,$src" %} 8412 opcode(0x31); /* Opcode 31 /r */ 8413 ins_encode( OpcP, RegMem( src, dst ) ); 8414 ins_pipe( ialu_mem_reg ); 8415 %} 8416 8417 // Xor Memory with Immediate 8418 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8419 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8420 effect(KILL cr); 8421 8422 ins_cost(125); 8423 format %{ "XOR $dst,$src" %} 8424 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8425 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8426 ins_pipe( ialu_mem_imm ); 8427 %} 8428 8429 //----------Convert Int to Boolean--------------------------------------------- 8430 8431 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8432 effect( DEF dst, USE src ); 8433 format %{ "MOV $dst,$src" %} 8434 ins_encode( enc_Copy( dst, src) ); 8435 ins_pipe( ialu_reg_reg ); 8436 %} 8437 8438 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8439 effect( USE_DEF dst, USE src, KILL cr ); 8440 8441 size(4); 8442 format %{ "NEG $dst\n\t" 8443 "ADC $dst,$src" %} 8444 ins_encode( neg_reg(dst), 8445 OpcRegReg(0x13,dst,src) ); 8446 ins_pipe( ialu_reg_reg_long ); 8447 %} 8448 8449 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8450 match(Set dst (Conv2B src)); 8451 8452 expand %{ 8453 movI_nocopy(dst,src); 8454 ci2b(dst,src,cr); 8455 %} 8456 %} 8457 8458 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8459 effect( DEF dst, USE src ); 8460 format %{ "MOV $dst,$src" %} 8461 ins_encode( enc_Copy( dst, src) ); 8462 ins_pipe( ialu_reg_reg ); 8463 %} 8464 8465 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8466 effect( USE_DEF dst, USE src, KILL cr ); 8467 format %{ "NEG $dst\n\t" 8468 "ADC $dst,$src" %} 8469 ins_encode( neg_reg(dst), 8470 OpcRegReg(0x13,dst,src) ); 8471 ins_pipe( ialu_reg_reg_long ); 8472 %} 8473 8474 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8475 match(Set dst (Conv2B src)); 8476 8477 expand %{ 8478 movP_nocopy(dst,src); 8479 cp2b(dst,src,cr); 8480 %} 8481 %} 8482 8483 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8484 match(Set dst (CmpLTMask p q)); 8485 effect(KILL cr); 8486 ins_cost(400); 8487 8488 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8489 format %{ "XOR $dst,$dst\n\t" 8490 "CMP $p,$q\n\t" 8491 "SETlt $dst\n\t" 8492 "NEG $dst" %} 8493 ins_encode %{ 8494 Register Rp = $p$$Register; 8495 Register Rq = $q$$Register; 8496 Register Rd = $dst$$Register; 8497 Label done; 8498 __ xorl(Rd, Rd); 8499 __ cmpl(Rp, Rq); 8500 __ setb(Assembler::less, Rd); 8501 __ negl(Rd); 8502 %} 8503 8504 ins_pipe(pipe_slow); 8505 %} 8506 8507 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8508 match(Set dst (CmpLTMask dst zero)); 8509 effect(DEF dst, KILL cr); 8510 ins_cost(100); 8511 8512 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8513 ins_encode %{ 8514 __ sarl($dst$$Register, 31); 8515 %} 8516 ins_pipe(ialu_reg); 8517 %} 8518 8519 /* better to save a register than avoid a branch */ 8520 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8521 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8522 effect(KILL cr); 8523 ins_cost(400); 8524 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8525 "JGE done\n\t" 8526 "ADD $p,$y\n" 8527 "done: " %} 8528 ins_encode %{ 8529 Register Rp = $p$$Register; 8530 Register Rq = $q$$Register; 8531 Register Ry = $y$$Register; 8532 Label done; 8533 __ subl(Rp, Rq); 8534 __ jccb(Assembler::greaterEqual, done); 8535 __ addl(Rp, Ry); 8536 __ bind(done); 8537 %} 8538 8539 ins_pipe(pipe_cmplt); 8540 %} 8541 8542 /* better to save a register than avoid a branch */ 8543 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8544 match(Set y (AndI (CmpLTMask p q) y)); 8545 effect(KILL cr); 8546 8547 ins_cost(300); 8548 8549 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8550 "JLT done\n\t" 8551 "XORL $y, $y\n" 8552 "done: " %} 8553 ins_encode %{ 8554 Register Rp = $p$$Register; 8555 Register Rq = $q$$Register; 8556 Register Ry = $y$$Register; 8557 Label done; 8558 __ cmpl(Rp, Rq); 8559 __ jccb(Assembler::less, done); 8560 __ xorl(Ry, Ry); 8561 __ bind(done); 8562 %} 8563 8564 ins_pipe(pipe_cmplt); 8565 %} 8566 8567 /* If I enable this, I encourage spilling in the inner loop of compress. 8568 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8569 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8570 */ 8571 //----------Overflow Math Instructions----------------------------------------- 8572 8573 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8574 %{ 8575 match(Set cr (OverflowAddI op1 op2)); 8576 effect(DEF cr, USE_KILL op1, USE op2); 8577 8578 format %{ "ADD $op1, $op2\t# overflow check int" %} 8579 8580 ins_encode %{ 8581 __ addl($op1$$Register, $op2$$Register); 8582 %} 8583 ins_pipe(ialu_reg_reg); 8584 %} 8585 8586 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8587 %{ 8588 match(Set cr (OverflowAddI op1 op2)); 8589 effect(DEF cr, USE_KILL op1, USE op2); 8590 8591 format %{ "ADD $op1, $op2\t# overflow check int" %} 8592 8593 ins_encode %{ 8594 __ addl($op1$$Register, $op2$$constant); 8595 %} 8596 ins_pipe(ialu_reg_reg); 8597 %} 8598 8599 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8600 %{ 8601 match(Set cr (OverflowSubI op1 op2)); 8602 8603 format %{ "CMP $op1, $op2\t# overflow check int" %} 8604 ins_encode %{ 8605 __ cmpl($op1$$Register, $op2$$Register); 8606 %} 8607 ins_pipe(ialu_reg_reg); 8608 %} 8609 8610 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8611 %{ 8612 match(Set cr (OverflowSubI op1 op2)); 8613 8614 format %{ "CMP $op1, $op2\t# overflow check int" %} 8615 ins_encode %{ 8616 __ cmpl($op1$$Register, $op2$$constant); 8617 %} 8618 ins_pipe(ialu_reg_reg); 8619 %} 8620 8621 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8622 %{ 8623 match(Set cr (OverflowSubI zero op2)); 8624 effect(DEF cr, USE_KILL op2); 8625 8626 format %{ "NEG $op2\t# overflow check int" %} 8627 ins_encode %{ 8628 __ negl($op2$$Register); 8629 %} 8630 ins_pipe(ialu_reg_reg); 8631 %} 8632 8633 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8634 %{ 8635 match(Set cr (OverflowMulI op1 op2)); 8636 effect(DEF cr, USE_KILL op1, USE op2); 8637 8638 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8639 ins_encode %{ 8640 __ imull($op1$$Register, $op2$$Register); 8641 %} 8642 ins_pipe(ialu_reg_reg_alu0); 8643 %} 8644 8645 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8646 %{ 8647 match(Set cr (OverflowMulI op1 op2)); 8648 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8649 8650 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8651 ins_encode %{ 8652 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8653 %} 8654 ins_pipe(ialu_reg_reg_alu0); 8655 %} 8656 8657 //----------Long Instructions------------------------------------------------ 8658 // Add Long Register with Register 8659 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8660 match(Set dst (AddL dst src)); 8661 effect(KILL cr); 8662 ins_cost(200); 8663 format %{ "ADD $dst.lo,$src.lo\n\t" 8664 "ADC $dst.hi,$src.hi" %} 8665 opcode(0x03, 0x13); 8666 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8667 ins_pipe( ialu_reg_reg_long ); 8668 %} 8669 8670 // Add Long Register with Immediate 8671 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8672 match(Set dst (AddL dst src)); 8673 effect(KILL cr); 8674 format %{ "ADD $dst.lo,$src.lo\n\t" 8675 "ADC $dst.hi,$src.hi" %} 8676 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8677 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8678 ins_pipe( ialu_reg_long ); 8679 %} 8680 8681 // Add Long Register with Memory 8682 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8683 match(Set dst (AddL dst (LoadL mem))); 8684 effect(KILL cr); 8685 ins_cost(125); 8686 format %{ "ADD $dst.lo,$mem\n\t" 8687 "ADC $dst.hi,$mem+4" %} 8688 opcode(0x03, 0x13); 8689 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8690 ins_pipe( ialu_reg_long_mem ); 8691 %} 8692 8693 // Subtract Long Register with Register. 8694 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8695 match(Set dst (SubL dst src)); 8696 effect(KILL cr); 8697 ins_cost(200); 8698 format %{ "SUB $dst.lo,$src.lo\n\t" 8699 "SBB $dst.hi,$src.hi" %} 8700 opcode(0x2B, 0x1B); 8701 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8702 ins_pipe( ialu_reg_reg_long ); 8703 %} 8704 8705 // Subtract Long Register with Immediate 8706 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8707 match(Set dst (SubL dst src)); 8708 effect(KILL cr); 8709 format %{ "SUB $dst.lo,$src.lo\n\t" 8710 "SBB $dst.hi,$src.hi" %} 8711 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8712 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8713 ins_pipe( ialu_reg_long ); 8714 %} 8715 8716 // Subtract Long Register with Memory 8717 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8718 match(Set dst (SubL dst (LoadL mem))); 8719 effect(KILL cr); 8720 ins_cost(125); 8721 format %{ "SUB $dst.lo,$mem\n\t" 8722 "SBB $dst.hi,$mem+4" %} 8723 opcode(0x2B, 0x1B); 8724 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8725 ins_pipe( ialu_reg_long_mem ); 8726 %} 8727 8728 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8729 match(Set dst (SubL zero dst)); 8730 effect(KILL cr); 8731 ins_cost(300); 8732 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8733 ins_encode( neg_long(dst) ); 8734 ins_pipe( ialu_reg_reg_long ); 8735 %} 8736 8737 // And Long Register with Register 8738 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8739 match(Set dst (AndL dst src)); 8740 effect(KILL cr); 8741 format %{ "AND $dst.lo,$src.lo\n\t" 8742 "AND $dst.hi,$src.hi" %} 8743 opcode(0x23,0x23); 8744 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8745 ins_pipe( ialu_reg_reg_long ); 8746 %} 8747 8748 // And Long Register with Immediate 8749 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8750 match(Set dst (AndL dst src)); 8751 effect(KILL cr); 8752 format %{ "AND $dst.lo,$src.lo\n\t" 8753 "AND $dst.hi,$src.hi" %} 8754 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8755 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8756 ins_pipe( ialu_reg_long ); 8757 %} 8758 8759 // And Long Register with Memory 8760 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8761 match(Set dst (AndL dst (LoadL mem))); 8762 effect(KILL cr); 8763 ins_cost(125); 8764 format %{ "AND $dst.lo,$mem\n\t" 8765 "AND $dst.hi,$mem+4" %} 8766 opcode(0x23, 0x23); 8767 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8768 ins_pipe( ialu_reg_long_mem ); 8769 %} 8770 8771 // BMI1 instructions 8772 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8773 match(Set dst (AndL (XorL src1 minus_1) src2)); 8774 predicate(UseBMI1Instructions); 8775 effect(KILL cr, TEMP dst); 8776 8777 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8778 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8779 %} 8780 8781 ins_encode %{ 8782 Register Rdst = $dst$$Register; 8783 Register Rsrc1 = $src1$$Register; 8784 Register Rsrc2 = $src2$$Register; 8785 __ andnl(Rdst, Rsrc1, Rsrc2); 8786 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8787 %} 8788 ins_pipe(ialu_reg_reg_long); 8789 %} 8790 8791 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8792 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8793 predicate(UseBMI1Instructions); 8794 effect(KILL cr, TEMP dst); 8795 8796 ins_cost(125); 8797 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8798 "ANDNL $dst.hi, $src1.hi, $src2+4" 8799 %} 8800 8801 ins_encode %{ 8802 Register Rdst = $dst$$Register; 8803 Register Rsrc1 = $src1$$Register; 8804 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8805 8806 __ andnl(Rdst, Rsrc1, $src2$$Address); 8807 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8808 %} 8809 ins_pipe(ialu_reg_mem); 8810 %} 8811 8812 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8813 match(Set dst (AndL (SubL imm_zero src) src)); 8814 predicate(UseBMI1Instructions); 8815 effect(KILL cr, TEMP dst); 8816 8817 format %{ "MOVL $dst.hi, 0\n\t" 8818 "BLSIL $dst.lo, $src.lo\n\t" 8819 "JNZ done\n\t" 8820 "BLSIL $dst.hi, $src.hi\n" 8821 "done:" 8822 %} 8823 8824 ins_encode %{ 8825 Label done; 8826 Register Rdst = $dst$$Register; 8827 Register Rsrc = $src$$Register; 8828 __ movl(HIGH_FROM_LOW(Rdst), 0); 8829 __ blsil(Rdst, Rsrc); 8830 __ jccb(Assembler::notZero, done); 8831 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8832 __ bind(done); 8833 %} 8834 ins_pipe(ialu_reg); 8835 %} 8836 8837 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8838 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8839 predicate(UseBMI1Instructions); 8840 effect(KILL cr, TEMP dst); 8841 8842 ins_cost(125); 8843 format %{ "MOVL $dst.hi, 0\n\t" 8844 "BLSIL $dst.lo, $src\n\t" 8845 "JNZ done\n\t" 8846 "BLSIL $dst.hi, $src+4\n" 8847 "done:" 8848 %} 8849 8850 ins_encode %{ 8851 Label done; 8852 Register Rdst = $dst$$Register; 8853 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8854 8855 __ movl(HIGH_FROM_LOW(Rdst), 0); 8856 __ blsil(Rdst, $src$$Address); 8857 __ jccb(Assembler::notZero, done); 8858 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8859 __ bind(done); 8860 %} 8861 ins_pipe(ialu_reg_mem); 8862 %} 8863 8864 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8865 %{ 8866 match(Set dst (XorL (AddL src minus_1) src)); 8867 predicate(UseBMI1Instructions); 8868 effect(KILL cr, TEMP dst); 8869 8870 format %{ "MOVL $dst.hi, 0\n\t" 8871 "BLSMSKL $dst.lo, $src.lo\n\t" 8872 "JNC done\n\t" 8873 "BLSMSKL $dst.hi, $src.hi\n" 8874 "done:" 8875 %} 8876 8877 ins_encode %{ 8878 Label done; 8879 Register Rdst = $dst$$Register; 8880 Register Rsrc = $src$$Register; 8881 __ movl(HIGH_FROM_LOW(Rdst), 0); 8882 __ blsmskl(Rdst, Rsrc); 8883 __ jccb(Assembler::carryClear, done); 8884 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8885 __ bind(done); 8886 %} 8887 8888 ins_pipe(ialu_reg); 8889 %} 8890 8891 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8892 %{ 8893 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8894 predicate(UseBMI1Instructions); 8895 effect(KILL cr, TEMP dst); 8896 8897 ins_cost(125); 8898 format %{ "MOVL $dst.hi, 0\n\t" 8899 "BLSMSKL $dst.lo, $src\n\t" 8900 "JNC done\n\t" 8901 "BLSMSKL $dst.hi, $src+4\n" 8902 "done:" 8903 %} 8904 8905 ins_encode %{ 8906 Label done; 8907 Register Rdst = $dst$$Register; 8908 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8909 8910 __ movl(HIGH_FROM_LOW(Rdst), 0); 8911 __ blsmskl(Rdst, $src$$Address); 8912 __ jccb(Assembler::carryClear, done); 8913 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8914 __ bind(done); 8915 %} 8916 8917 ins_pipe(ialu_reg_mem); 8918 %} 8919 8920 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8921 %{ 8922 match(Set dst (AndL (AddL src minus_1) src) ); 8923 predicate(UseBMI1Instructions); 8924 effect(KILL cr, TEMP dst); 8925 8926 format %{ "MOVL $dst.hi, $src.hi\n\t" 8927 "BLSRL $dst.lo, $src.lo\n\t" 8928 "JNC done\n\t" 8929 "BLSRL $dst.hi, $src.hi\n" 8930 "done:" 8931 %} 8932 8933 ins_encode %{ 8934 Label done; 8935 Register Rdst = $dst$$Register; 8936 Register Rsrc = $src$$Register; 8937 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8938 __ blsrl(Rdst, Rsrc); 8939 __ jccb(Assembler::carryClear, done); 8940 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8941 __ bind(done); 8942 %} 8943 8944 ins_pipe(ialu_reg); 8945 %} 8946 8947 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8948 %{ 8949 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8950 predicate(UseBMI1Instructions); 8951 effect(KILL cr, TEMP dst); 8952 8953 ins_cost(125); 8954 format %{ "MOVL $dst.hi, $src+4\n\t" 8955 "BLSRL $dst.lo, $src\n\t" 8956 "JNC done\n\t" 8957 "BLSRL $dst.hi, $src+4\n" 8958 "done:" 8959 %} 8960 8961 ins_encode %{ 8962 Label done; 8963 Register Rdst = $dst$$Register; 8964 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8965 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8966 __ blsrl(Rdst, $src$$Address); 8967 __ jccb(Assembler::carryClear, done); 8968 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8969 __ bind(done); 8970 %} 8971 8972 ins_pipe(ialu_reg_mem); 8973 %} 8974 8975 // Or Long Register with Register 8976 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8977 match(Set dst (OrL dst src)); 8978 effect(KILL cr); 8979 format %{ "OR $dst.lo,$src.lo\n\t" 8980 "OR $dst.hi,$src.hi" %} 8981 opcode(0x0B,0x0B); 8982 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8983 ins_pipe( ialu_reg_reg_long ); 8984 %} 8985 8986 // Or Long Register with Immediate 8987 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8988 match(Set dst (OrL dst src)); 8989 effect(KILL cr); 8990 format %{ "OR $dst.lo,$src.lo\n\t" 8991 "OR $dst.hi,$src.hi" %} 8992 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8993 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8994 ins_pipe( ialu_reg_long ); 8995 %} 8996 8997 // Or Long Register with Memory 8998 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8999 match(Set dst (OrL dst (LoadL mem))); 9000 effect(KILL cr); 9001 ins_cost(125); 9002 format %{ "OR $dst.lo,$mem\n\t" 9003 "OR $dst.hi,$mem+4" %} 9004 opcode(0x0B,0x0B); 9005 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9006 ins_pipe( ialu_reg_long_mem ); 9007 %} 9008 9009 // Xor Long Register with Register 9010 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9011 match(Set dst (XorL dst src)); 9012 effect(KILL cr); 9013 format %{ "XOR $dst.lo,$src.lo\n\t" 9014 "XOR $dst.hi,$src.hi" %} 9015 opcode(0x33,0x33); 9016 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9017 ins_pipe( ialu_reg_reg_long ); 9018 %} 9019 9020 // Xor Long Register with Immediate -1 9021 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9022 match(Set dst (XorL dst imm)); 9023 format %{ "NOT $dst.lo\n\t" 9024 "NOT $dst.hi" %} 9025 ins_encode %{ 9026 __ notl($dst$$Register); 9027 __ notl(HIGH_FROM_LOW($dst$$Register)); 9028 %} 9029 ins_pipe( ialu_reg_long ); 9030 %} 9031 9032 // Xor Long Register with Immediate 9033 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9034 match(Set dst (XorL dst src)); 9035 effect(KILL cr); 9036 format %{ "XOR $dst.lo,$src.lo\n\t" 9037 "XOR $dst.hi,$src.hi" %} 9038 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9039 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9040 ins_pipe( ialu_reg_long ); 9041 %} 9042 9043 // Xor Long Register with Memory 9044 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9045 match(Set dst (XorL dst (LoadL mem))); 9046 effect(KILL cr); 9047 ins_cost(125); 9048 format %{ "XOR $dst.lo,$mem\n\t" 9049 "XOR $dst.hi,$mem+4" %} 9050 opcode(0x33,0x33); 9051 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9052 ins_pipe( ialu_reg_long_mem ); 9053 %} 9054 9055 // Shift Left Long by 1 9056 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9057 predicate(UseNewLongLShift); 9058 match(Set dst (LShiftL dst cnt)); 9059 effect(KILL cr); 9060 ins_cost(100); 9061 format %{ "ADD $dst.lo,$dst.lo\n\t" 9062 "ADC $dst.hi,$dst.hi" %} 9063 ins_encode %{ 9064 __ addl($dst$$Register,$dst$$Register); 9065 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9066 %} 9067 ins_pipe( ialu_reg_long ); 9068 %} 9069 9070 // Shift Left Long by 2 9071 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9072 predicate(UseNewLongLShift); 9073 match(Set dst (LShiftL dst cnt)); 9074 effect(KILL cr); 9075 ins_cost(100); 9076 format %{ "ADD $dst.lo,$dst.lo\n\t" 9077 "ADC $dst.hi,$dst.hi\n\t" 9078 "ADD $dst.lo,$dst.lo\n\t" 9079 "ADC $dst.hi,$dst.hi" %} 9080 ins_encode %{ 9081 __ addl($dst$$Register,$dst$$Register); 9082 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9083 __ addl($dst$$Register,$dst$$Register); 9084 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9085 %} 9086 ins_pipe( ialu_reg_long ); 9087 %} 9088 9089 // Shift Left Long by 3 9090 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9091 predicate(UseNewLongLShift); 9092 match(Set dst (LShiftL dst cnt)); 9093 effect(KILL cr); 9094 ins_cost(100); 9095 format %{ "ADD $dst.lo,$dst.lo\n\t" 9096 "ADC $dst.hi,$dst.hi\n\t" 9097 "ADD $dst.lo,$dst.lo\n\t" 9098 "ADC $dst.hi,$dst.hi\n\t" 9099 "ADD $dst.lo,$dst.lo\n\t" 9100 "ADC $dst.hi,$dst.hi" %} 9101 ins_encode %{ 9102 __ addl($dst$$Register,$dst$$Register); 9103 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9104 __ addl($dst$$Register,$dst$$Register); 9105 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9106 __ addl($dst$$Register,$dst$$Register); 9107 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9108 %} 9109 ins_pipe( ialu_reg_long ); 9110 %} 9111 9112 // Shift Left Long by 1-31 9113 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9114 match(Set dst (LShiftL dst cnt)); 9115 effect(KILL cr); 9116 ins_cost(200); 9117 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9118 "SHL $dst.lo,$cnt" %} 9119 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9120 ins_encode( move_long_small_shift(dst,cnt) ); 9121 ins_pipe( ialu_reg_long ); 9122 %} 9123 9124 // Shift Left Long by 32-63 9125 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9126 match(Set dst (LShiftL dst cnt)); 9127 effect(KILL cr); 9128 ins_cost(300); 9129 format %{ "MOV $dst.hi,$dst.lo\n" 9130 "\tSHL $dst.hi,$cnt-32\n" 9131 "\tXOR $dst.lo,$dst.lo" %} 9132 opcode(0xC1, 0x4); /* C1 /4 ib */ 9133 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9134 ins_pipe( ialu_reg_long ); 9135 %} 9136 9137 // Shift Left Long by variable 9138 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9139 match(Set dst (LShiftL dst shift)); 9140 effect(KILL cr); 9141 ins_cost(500+200); 9142 size(17); 9143 format %{ "TEST $shift,32\n\t" 9144 "JEQ,s small\n\t" 9145 "MOV $dst.hi,$dst.lo\n\t" 9146 "XOR $dst.lo,$dst.lo\n" 9147 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9148 "SHL $dst.lo,$shift" %} 9149 ins_encode( shift_left_long( dst, shift ) ); 9150 ins_pipe( pipe_slow ); 9151 %} 9152 9153 // Shift Right Long by 1-31 9154 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9155 match(Set dst (URShiftL dst cnt)); 9156 effect(KILL cr); 9157 ins_cost(200); 9158 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9159 "SHR $dst.hi,$cnt" %} 9160 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9161 ins_encode( move_long_small_shift(dst,cnt) ); 9162 ins_pipe( ialu_reg_long ); 9163 %} 9164 9165 // Shift Right Long by 32-63 9166 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9167 match(Set dst (URShiftL dst cnt)); 9168 effect(KILL cr); 9169 ins_cost(300); 9170 format %{ "MOV $dst.lo,$dst.hi\n" 9171 "\tSHR $dst.lo,$cnt-32\n" 9172 "\tXOR $dst.hi,$dst.hi" %} 9173 opcode(0xC1, 0x5); /* C1 /5 ib */ 9174 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9175 ins_pipe( ialu_reg_long ); 9176 %} 9177 9178 // Shift Right Long by variable 9179 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9180 match(Set dst (URShiftL dst shift)); 9181 effect(KILL cr); 9182 ins_cost(600); 9183 size(17); 9184 format %{ "TEST $shift,32\n\t" 9185 "JEQ,s small\n\t" 9186 "MOV $dst.lo,$dst.hi\n\t" 9187 "XOR $dst.hi,$dst.hi\n" 9188 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9189 "SHR $dst.hi,$shift" %} 9190 ins_encode( shift_right_long( dst, shift ) ); 9191 ins_pipe( pipe_slow ); 9192 %} 9193 9194 // Shift Right Long by 1-31 9195 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9196 match(Set dst (RShiftL dst cnt)); 9197 effect(KILL cr); 9198 ins_cost(200); 9199 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9200 "SAR $dst.hi,$cnt" %} 9201 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9202 ins_encode( move_long_small_shift(dst,cnt) ); 9203 ins_pipe( ialu_reg_long ); 9204 %} 9205 9206 // Shift Right Long by 32-63 9207 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9208 match(Set dst (RShiftL dst cnt)); 9209 effect(KILL cr); 9210 ins_cost(300); 9211 format %{ "MOV $dst.lo,$dst.hi\n" 9212 "\tSAR $dst.lo,$cnt-32\n" 9213 "\tSAR $dst.hi,31" %} 9214 opcode(0xC1, 0x7); /* C1 /7 ib */ 9215 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9216 ins_pipe( ialu_reg_long ); 9217 %} 9218 9219 // Shift Right arithmetic Long by variable 9220 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9221 match(Set dst (RShiftL dst shift)); 9222 effect(KILL cr); 9223 ins_cost(600); 9224 size(18); 9225 format %{ "TEST $shift,32\n\t" 9226 "JEQ,s small\n\t" 9227 "MOV $dst.lo,$dst.hi\n\t" 9228 "SAR $dst.hi,31\n" 9229 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9230 "SAR $dst.hi,$shift" %} 9231 ins_encode( shift_right_arith_long( dst, shift ) ); 9232 ins_pipe( pipe_slow ); 9233 %} 9234 9235 9236 //----------Double Instructions------------------------------------------------ 9237 // Double Math 9238 9239 // Compare & branch 9240 9241 // P6 version of float compare, sets condition codes in EFLAGS 9242 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9243 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9244 match(Set cr (CmpD src1 src2)); 9245 effect(KILL rax); 9246 ins_cost(150); 9247 format %{ "FLD $src1\n\t" 9248 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9249 "JNP exit\n\t" 9250 "MOV ah,1 // saw a NaN, set CF\n\t" 9251 "SAHF\n" 9252 "exit:\tNOP // avoid branch to branch" %} 9253 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9254 ins_encode( Push_Reg_DPR(src1), 9255 OpcP, RegOpc(src2), 9256 cmpF_P6_fixup ); 9257 ins_pipe( pipe_slow ); 9258 %} 9259 9260 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9261 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9262 match(Set cr (CmpD src1 src2)); 9263 ins_cost(150); 9264 format %{ "FLD $src1\n\t" 9265 "FUCOMIP ST,$src2 // P6 instruction" %} 9266 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9267 ins_encode( Push_Reg_DPR(src1), 9268 OpcP, RegOpc(src2)); 9269 ins_pipe( pipe_slow ); 9270 %} 9271 9272 // Compare & branch 9273 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9274 predicate(UseSSE<=1); 9275 match(Set cr (CmpD src1 src2)); 9276 effect(KILL rax); 9277 ins_cost(200); 9278 format %{ "FLD $src1\n\t" 9279 "FCOMp $src2\n\t" 9280 "FNSTSW AX\n\t" 9281 "TEST AX,0x400\n\t" 9282 "JZ,s flags\n\t" 9283 "MOV AH,1\t# unordered treat as LT\n" 9284 "flags:\tSAHF" %} 9285 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9286 ins_encode( Push_Reg_DPR(src1), 9287 OpcP, RegOpc(src2), 9288 fpu_flags); 9289 ins_pipe( pipe_slow ); 9290 %} 9291 9292 // Compare vs zero into -1,0,1 9293 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9294 predicate(UseSSE<=1); 9295 match(Set dst (CmpD3 src1 zero)); 9296 effect(KILL cr, KILL rax); 9297 ins_cost(280); 9298 format %{ "FTSTD $dst,$src1" %} 9299 opcode(0xE4, 0xD9); 9300 ins_encode( Push_Reg_DPR(src1), 9301 OpcS, OpcP, PopFPU, 9302 CmpF_Result(dst)); 9303 ins_pipe( pipe_slow ); 9304 %} 9305 9306 // Compare into -1,0,1 9307 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9308 predicate(UseSSE<=1); 9309 match(Set dst (CmpD3 src1 src2)); 9310 effect(KILL cr, KILL rax); 9311 ins_cost(300); 9312 format %{ "FCMPD $dst,$src1,$src2" %} 9313 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9314 ins_encode( Push_Reg_DPR(src1), 9315 OpcP, RegOpc(src2), 9316 CmpF_Result(dst)); 9317 ins_pipe( pipe_slow ); 9318 %} 9319 9320 // float compare and set condition codes in EFLAGS by XMM regs 9321 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9322 predicate(UseSSE>=2); 9323 match(Set cr (CmpD src1 src2)); 9324 ins_cost(145); 9325 format %{ "UCOMISD $src1,$src2\n\t" 9326 "JNP,s exit\n\t" 9327 "PUSHF\t# saw NaN, set CF\n\t" 9328 "AND [rsp], #0xffffff2b\n\t" 9329 "POPF\n" 9330 "exit:" %} 9331 ins_encode %{ 9332 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9333 emit_cmpfp_fixup(_masm); 9334 %} 9335 ins_pipe( pipe_slow ); 9336 %} 9337 9338 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9339 predicate(UseSSE>=2); 9340 match(Set cr (CmpD src1 src2)); 9341 ins_cost(100); 9342 format %{ "UCOMISD $src1,$src2" %} 9343 ins_encode %{ 9344 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9345 %} 9346 ins_pipe( pipe_slow ); 9347 %} 9348 9349 // float compare and set condition codes in EFLAGS by XMM regs 9350 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9351 predicate(UseSSE>=2); 9352 match(Set cr (CmpD src1 (LoadD src2))); 9353 ins_cost(145); 9354 format %{ "UCOMISD $src1,$src2\n\t" 9355 "JNP,s exit\n\t" 9356 "PUSHF\t# saw NaN, set CF\n\t" 9357 "AND [rsp], #0xffffff2b\n\t" 9358 "POPF\n" 9359 "exit:" %} 9360 ins_encode %{ 9361 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9362 emit_cmpfp_fixup(_masm); 9363 %} 9364 ins_pipe( pipe_slow ); 9365 %} 9366 9367 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9368 predicate(UseSSE>=2); 9369 match(Set cr (CmpD src1 (LoadD src2))); 9370 ins_cost(100); 9371 format %{ "UCOMISD $src1,$src2" %} 9372 ins_encode %{ 9373 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9374 %} 9375 ins_pipe( pipe_slow ); 9376 %} 9377 9378 // Compare into -1,0,1 in XMM 9379 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9380 predicate(UseSSE>=2); 9381 match(Set dst (CmpD3 src1 src2)); 9382 effect(KILL cr); 9383 ins_cost(255); 9384 format %{ "UCOMISD $src1, $src2\n\t" 9385 "MOV $dst, #-1\n\t" 9386 "JP,s done\n\t" 9387 "JB,s done\n\t" 9388 "SETNE $dst\n\t" 9389 "MOVZB $dst, $dst\n" 9390 "done:" %} 9391 ins_encode %{ 9392 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9393 emit_cmpfp3(_masm, $dst$$Register); 9394 %} 9395 ins_pipe( pipe_slow ); 9396 %} 9397 9398 // Compare into -1,0,1 in XMM and memory 9399 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9400 predicate(UseSSE>=2); 9401 match(Set dst (CmpD3 src1 (LoadD src2))); 9402 effect(KILL cr); 9403 ins_cost(275); 9404 format %{ "UCOMISD $src1, $src2\n\t" 9405 "MOV $dst, #-1\n\t" 9406 "JP,s done\n\t" 9407 "JB,s done\n\t" 9408 "SETNE $dst\n\t" 9409 "MOVZB $dst, $dst\n" 9410 "done:" %} 9411 ins_encode %{ 9412 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9413 emit_cmpfp3(_masm, $dst$$Register); 9414 %} 9415 ins_pipe( pipe_slow ); 9416 %} 9417 9418 9419 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9420 predicate (UseSSE <=1); 9421 match(Set dst (SubD dst src)); 9422 9423 format %{ "FLD $src\n\t" 9424 "DSUBp $dst,ST" %} 9425 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9426 ins_cost(150); 9427 ins_encode( Push_Reg_DPR(src), 9428 OpcP, RegOpc(dst) ); 9429 ins_pipe( fpu_reg_reg ); 9430 %} 9431 9432 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9433 predicate (UseSSE <=1); 9434 match(Set dst (RoundDouble (SubD src1 src2))); 9435 ins_cost(250); 9436 9437 format %{ "FLD $src2\n\t" 9438 "DSUB ST,$src1\n\t" 9439 "FSTP_D $dst\t# D-round" %} 9440 opcode(0xD8, 0x5); 9441 ins_encode( Push_Reg_DPR(src2), 9442 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9443 ins_pipe( fpu_mem_reg_reg ); 9444 %} 9445 9446 9447 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9448 predicate (UseSSE <=1); 9449 match(Set dst (SubD dst (LoadD src))); 9450 ins_cost(150); 9451 9452 format %{ "FLD $src\n\t" 9453 "DSUBp $dst,ST" %} 9454 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9455 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9456 OpcP, RegOpc(dst) ); 9457 ins_pipe( fpu_reg_mem ); 9458 %} 9459 9460 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9461 predicate (UseSSE<=1); 9462 match(Set dst (AbsD src)); 9463 ins_cost(100); 9464 format %{ "FABS" %} 9465 opcode(0xE1, 0xD9); 9466 ins_encode( OpcS, OpcP ); 9467 ins_pipe( fpu_reg_reg ); 9468 %} 9469 9470 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9471 predicate(UseSSE<=1); 9472 match(Set dst (NegD src)); 9473 ins_cost(100); 9474 format %{ "FCHS" %} 9475 opcode(0xE0, 0xD9); 9476 ins_encode( OpcS, OpcP ); 9477 ins_pipe( fpu_reg_reg ); 9478 %} 9479 9480 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9481 predicate(UseSSE<=1); 9482 match(Set dst (AddD dst src)); 9483 format %{ "FLD $src\n\t" 9484 "DADD $dst,ST" %} 9485 size(4); 9486 ins_cost(150); 9487 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9488 ins_encode( Push_Reg_DPR(src), 9489 OpcP, RegOpc(dst) ); 9490 ins_pipe( fpu_reg_reg ); 9491 %} 9492 9493 9494 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9495 predicate(UseSSE<=1); 9496 match(Set dst (RoundDouble (AddD src1 src2))); 9497 ins_cost(250); 9498 9499 format %{ "FLD $src2\n\t" 9500 "DADD ST,$src1\n\t" 9501 "FSTP_D $dst\t# D-round" %} 9502 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9503 ins_encode( Push_Reg_DPR(src2), 9504 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9505 ins_pipe( fpu_mem_reg_reg ); 9506 %} 9507 9508 9509 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9510 predicate(UseSSE<=1); 9511 match(Set dst (AddD dst (LoadD src))); 9512 ins_cost(150); 9513 9514 format %{ "FLD $src\n\t" 9515 "DADDp $dst,ST" %} 9516 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9517 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9518 OpcP, RegOpc(dst) ); 9519 ins_pipe( fpu_reg_mem ); 9520 %} 9521 9522 // add-to-memory 9523 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9524 predicate(UseSSE<=1); 9525 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9526 ins_cost(150); 9527 9528 format %{ "FLD_D $dst\n\t" 9529 "DADD ST,$src\n\t" 9530 "FST_D $dst" %} 9531 opcode(0xDD, 0x0); 9532 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9533 Opcode(0xD8), RegOpc(src), 9534 set_instruction_start, 9535 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9536 ins_pipe( fpu_reg_mem ); 9537 %} 9538 9539 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9540 predicate(UseSSE<=1); 9541 match(Set dst (AddD dst con)); 9542 ins_cost(125); 9543 format %{ "FLD1\n\t" 9544 "DADDp $dst,ST" %} 9545 ins_encode %{ 9546 __ fld1(); 9547 __ faddp($dst$$reg); 9548 %} 9549 ins_pipe(fpu_reg); 9550 %} 9551 9552 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9553 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9554 match(Set dst (AddD dst con)); 9555 ins_cost(200); 9556 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9557 "DADDp $dst,ST" %} 9558 ins_encode %{ 9559 __ fld_d($constantaddress($con)); 9560 __ faddp($dst$$reg); 9561 %} 9562 ins_pipe(fpu_reg_mem); 9563 %} 9564 9565 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9566 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9567 match(Set dst (RoundDouble (AddD src con))); 9568 ins_cost(200); 9569 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9570 "DADD ST,$src\n\t" 9571 "FSTP_D $dst\t# D-round" %} 9572 ins_encode %{ 9573 __ fld_d($constantaddress($con)); 9574 __ fadd($src$$reg); 9575 __ fstp_d(Address(rsp, $dst$$disp)); 9576 %} 9577 ins_pipe(fpu_mem_reg_con); 9578 %} 9579 9580 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9581 predicate(UseSSE<=1); 9582 match(Set dst (MulD dst src)); 9583 format %{ "FLD $src\n\t" 9584 "DMULp $dst,ST" %} 9585 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9586 ins_cost(150); 9587 ins_encode( Push_Reg_DPR(src), 9588 OpcP, RegOpc(dst) ); 9589 ins_pipe( fpu_reg_reg ); 9590 %} 9591 9592 // Strict FP instruction biases argument before multiply then 9593 // biases result to avoid double rounding of subnormals. 9594 // 9595 // scale arg1 by multiplying arg1 by 2^(-15360) 9596 // load arg2 9597 // multiply scaled arg1 by arg2 9598 // rescale product by 2^(15360) 9599 // 9600 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9601 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9602 match(Set dst (MulD dst src)); 9603 ins_cost(1); // Select this instruction for all strict FP double multiplies 9604 9605 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9606 "DMULp $dst,ST\n\t" 9607 "FLD $src\n\t" 9608 "DMULp $dst,ST\n\t" 9609 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9610 "DMULp $dst,ST\n\t" %} 9611 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9612 ins_encode( strictfp_bias1(dst), 9613 Push_Reg_DPR(src), 9614 OpcP, RegOpc(dst), 9615 strictfp_bias2(dst) ); 9616 ins_pipe( fpu_reg_reg ); 9617 %} 9618 9619 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9620 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9621 match(Set dst (MulD dst con)); 9622 ins_cost(200); 9623 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9624 "DMULp $dst,ST" %} 9625 ins_encode %{ 9626 __ fld_d($constantaddress($con)); 9627 __ fmulp($dst$$reg); 9628 %} 9629 ins_pipe(fpu_reg_mem); 9630 %} 9631 9632 9633 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9634 predicate( UseSSE<=1 ); 9635 match(Set dst (MulD dst (LoadD src))); 9636 ins_cost(200); 9637 format %{ "FLD_D $src\n\t" 9638 "DMULp $dst,ST" %} 9639 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9640 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9641 OpcP, RegOpc(dst) ); 9642 ins_pipe( fpu_reg_mem ); 9643 %} 9644 9645 // 9646 // Cisc-alternate to reg-reg multiply 9647 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9648 predicate( UseSSE<=1 ); 9649 match(Set dst (MulD src (LoadD mem))); 9650 ins_cost(250); 9651 format %{ "FLD_D $mem\n\t" 9652 "DMUL ST,$src\n\t" 9653 "FSTP_D $dst" %} 9654 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9655 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9656 OpcReg_FPR(src), 9657 Pop_Reg_DPR(dst) ); 9658 ins_pipe( fpu_reg_reg_mem ); 9659 %} 9660 9661 9662 // MACRO3 -- addDPR a mulDPR 9663 // This instruction is a '2-address' instruction in that the result goes 9664 // back to src2. This eliminates a move from the macro; possibly the 9665 // register allocator will have to add it back (and maybe not). 9666 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9667 predicate( UseSSE<=1 ); 9668 match(Set src2 (AddD (MulD src0 src1) src2)); 9669 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9670 "DMUL ST,$src1\n\t" 9671 "DADDp $src2,ST" %} 9672 ins_cost(250); 9673 opcode(0xDD); /* LoadD DD /0 */ 9674 ins_encode( Push_Reg_FPR(src0), 9675 FMul_ST_reg(src1), 9676 FAddP_reg_ST(src2) ); 9677 ins_pipe( fpu_reg_reg_reg ); 9678 %} 9679 9680 9681 // MACRO3 -- subDPR a mulDPR 9682 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9683 predicate( UseSSE<=1 ); 9684 match(Set src2 (SubD (MulD src0 src1) src2)); 9685 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9686 "DMUL ST,$src1\n\t" 9687 "DSUBRp $src2,ST" %} 9688 ins_cost(250); 9689 ins_encode( Push_Reg_FPR(src0), 9690 FMul_ST_reg(src1), 9691 Opcode(0xDE), Opc_plus(0xE0,src2)); 9692 ins_pipe( fpu_reg_reg_reg ); 9693 %} 9694 9695 9696 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9697 predicate( UseSSE<=1 ); 9698 match(Set dst (DivD dst src)); 9699 9700 format %{ "FLD $src\n\t" 9701 "FDIVp $dst,ST" %} 9702 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9703 ins_cost(150); 9704 ins_encode( Push_Reg_DPR(src), 9705 OpcP, RegOpc(dst) ); 9706 ins_pipe( fpu_reg_reg ); 9707 %} 9708 9709 // Strict FP instruction biases argument before division then 9710 // biases result, to avoid double rounding of subnormals. 9711 // 9712 // scale dividend by multiplying dividend by 2^(-15360) 9713 // load divisor 9714 // divide scaled dividend by divisor 9715 // rescale quotient by 2^(15360) 9716 // 9717 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9718 predicate (UseSSE<=1); 9719 match(Set dst (DivD dst src)); 9720 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9721 ins_cost(01); 9722 9723 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9724 "DMULp $dst,ST\n\t" 9725 "FLD $src\n\t" 9726 "FDIVp $dst,ST\n\t" 9727 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9728 "DMULp $dst,ST\n\t" %} 9729 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9730 ins_encode( strictfp_bias1(dst), 9731 Push_Reg_DPR(src), 9732 OpcP, RegOpc(dst), 9733 strictfp_bias2(dst) ); 9734 ins_pipe( fpu_reg_reg ); 9735 %} 9736 9737 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9738 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9739 match(Set dst (RoundDouble (DivD src1 src2))); 9740 9741 format %{ "FLD $src1\n\t" 9742 "FDIV ST,$src2\n\t" 9743 "FSTP_D $dst\t# D-round" %} 9744 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9745 ins_encode( Push_Reg_DPR(src1), 9746 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9747 ins_pipe( fpu_mem_reg_reg ); 9748 %} 9749 9750 9751 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9752 predicate(UseSSE<=1); 9753 match(Set dst (ModD dst src)); 9754 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9755 9756 format %{ "DMOD $dst,$src" %} 9757 ins_cost(250); 9758 ins_encode(Push_Reg_Mod_DPR(dst, src), 9759 emitModDPR(), 9760 Push_Result_Mod_DPR(src), 9761 Pop_Reg_DPR(dst)); 9762 ins_pipe( pipe_slow ); 9763 %} 9764 9765 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9766 predicate(UseSSE>=2); 9767 match(Set dst (ModD src0 src1)); 9768 effect(KILL rax, KILL cr); 9769 9770 format %{ "SUB ESP,8\t # DMOD\n" 9771 "\tMOVSD [ESP+0],$src1\n" 9772 "\tFLD_D [ESP+0]\n" 9773 "\tMOVSD [ESP+0],$src0\n" 9774 "\tFLD_D [ESP+0]\n" 9775 "loop:\tFPREM\n" 9776 "\tFWAIT\n" 9777 "\tFNSTSW AX\n" 9778 "\tSAHF\n" 9779 "\tJP loop\n" 9780 "\tFSTP_D [ESP+0]\n" 9781 "\tMOVSD $dst,[ESP+0]\n" 9782 "\tADD ESP,8\n" 9783 "\tFSTP ST0\t # Restore FPU Stack" 9784 %} 9785 ins_cost(250); 9786 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9787 ins_pipe( pipe_slow ); 9788 %} 9789 9790 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 9791 predicate (UseSSE<=1); 9792 match(Set dst (SinD src)); 9793 ins_cost(1800); 9794 format %{ "DSIN $dst" %} 9795 opcode(0xD9, 0xFE); 9796 ins_encode( OpcP, OpcS ); 9797 ins_pipe( pipe_slow ); 9798 %} 9799 9800 instruct sinD_reg(regD dst, eFlagsReg cr) %{ 9801 predicate (UseSSE>=2); 9802 match(Set dst (SinD dst)); 9803 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9804 ins_cost(1800); 9805 format %{ "DSIN $dst" %} 9806 opcode(0xD9, 0xFE); 9807 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9808 ins_pipe( pipe_slow ); 9809 %} 9810 9811 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 9812 predicate (UseSSE<=1); 9813 match(Set dst (CosD src)); 9814 ins_cost(1800); 9815 format %{ "DCOS $dst" %} 9816 opcode(0xD9, 0xFF); 9817 ins_encode( OpcP, OpcS ); 9818 ins_pipe( pipe_slow ); 9819 %} 9820 9821 instruct cosD_reg(regD dst, eFlagsReg cr) %{ 9822 predicate (UseSSE>=2); 9823 match(Set dst (CosD dst)); 9824 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9825 ins_cost(1800); 9826 format %{ "DCOS $dst" %} 9827 opcode(0xD9, 0xFF); 9828 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9829 ins_pipe( pipe_slow ); 9830 %} 9831 9832 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9833 predicate (UseSSE<=1); 9834 match(Set dst(TanD src)); 9835 format %{ "DTAN $dst" %} 9836 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9837 Opcode(0xDD), Opcode(0xD8)); // fstp st 9838 ins_pipe( pipe_slow ); 9839 %} 9840 9841 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9842 predicate (UseSSE>=2); 9843 match(Set dst(TanD dst)); 9844 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9845 format %{ "DTAN $dst" %} 9846 ins_encode( Push_SrcD(dst), 9847 Opcode(0xD9), Opcode(0xF2), // fptan 9848 Opcode(0xDD), Opcode(0xD8), // fstp st 9849 Push_ResultD(dst) ); 9850 ins_pipe( pipe_slow ); 9851 %} 9852 9853 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9854 predicate (UseSSE<=1); 9855 match(Set dst(AtanD dst src)); 9856 format %{ "DATA $dst,$src" %} 9857 opcode(0xD9, 0xF3); 9858 ins_encode( Push_Reg_DPR(src), 9859 OpcP, OpcS, RegOpc(dst) ); 9860 ins_pipe( pipe_slow ); 9861 %} 9862 9863 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9864 predicate (UseSSE>=2); 9865 match(Set dst(AtanD dst src)); 9866 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9867 format %{ "DATA $dst,$src" %} 9868 opcode(0xD9, 0xF3); 9869 ins_encode( Push_SrcD(src), 9870 OpcP, OpcS, Push_ResultD(dst) ); 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9875 predicate (UseSSE<=1); 9876 match(Set dst (SqrtD src)); 9877 format %{ "DSQRT $dst,$src" %} 9878 opcode(0xFA, 0xD9); 9879 ins_encode( Push_Reg_DPR(src), 9880 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9881 ins_pipe( pipe_slow ); 9882 %} 9883 9884 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9885 predicate (UseSSE<=1); 9886 match(Set Y (PowD X Y)); // Raise X to the Yth power 9887 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9888 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9889 ins_encode %{ 9890 __ subptr(rsp, 8); 9891 __ fld_s($X$$reg - 1); 9892 __ fast_pow(); 9893 __ addptr(rsp, 8); 9894 %} 9895 ins_pipe( pipe_slow ); 9896 %} 9897 9898 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9899 predicate (UseSSE>=2); 9900 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9901 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9902 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9903 ins_encode %{ 9904 __ subptr(rsp, 8); 9905 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9906 __ fld_d(Address(rsp, 0)); 9907 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9908 __ fld_d(Address(rsp, 0)); 9909 __ fast_pow(); 9910 __ fstp_d(Address(rsp, 0)); 9911 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9912 __ addptr(rsp, 8); 9913 %} 9914 ins_pipe( pipe_slow ); 9915 %} 9916 9917 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9918 predicate (UseSSE<=1); 9919 // The source Double operand on FPU stack 9920 match(Set dst (Log10D src)); 9921 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9922 // fxch ; swap ST(0) with ST(1) 9923 // fyl2x ; compute log_10(2) * log_2(x) 9924 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9925 "FXCH \n\t" 9926 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9927 %} 9928 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9929 Opcode(0xD9), Opcode(0xC9), // fxch 9930 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9931 9932 ins_pipe( pipe_slow ); 9933 %} 9934 9935 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9936 predicate (UseSSE>=2); 9937 effect(KILL cr); 9938 match(Set dst (Log10D src)); 9939 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9940 // fyl2x ; compute log_10(2) * log_2(x) 9941 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9942 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9943 %} 9944 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9945 Push_SrcD(src), 9946 Opcode(0xD9), Opcode(0xF1), // fyl2x 9947 Push_ResultD(dst)); 9948 9949 ins_pipe( pipe_slow ); 9950 %} 9951 9952 //-------------Float Instructions------------------------------- 9953 // Float Math 9954 9955 // Code for float compare: 9956 // fcompp(); 9957 // fwait(); fnstsw_ax(); 9958 // sahf(); 9959 // movl(dst, unordered_result); 9960 // jcc(Assembler::parity, exit); 9961 // movl(dst, less_result); 9962 // jcc(Assembler::below, exit); 9963 // movl(dst, equal_result); 9964 // jcc(Assembler::equal, exit); 9965 // movl(dst, greater_result); 9966 // exit: 9967 9968 // P6 version of float compare, sets condition codes in EFLAGS 9969 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9970 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9971 match(Set cr (CmpF src1 src2)); 9972 effect(KILL rax); 9973 ins_cost(150); 9974 format %{ "FLD $src1\n\t" 9975 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9976 "JNP exit\n\t" 9977 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9978 "SAHF\n" 9979 "exit:\tNOP // avoid branch to branch" %} 9980 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9981 ins_encode( Push_Reg_DPR(src1), 9982 OpcP, RegOpc(src2), 9983 cmpF_P6_fixup ); 9984 ins_pipe( pipe_slow ); 9985 %} 9986 9987 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9988 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9989 match(Set cr (CmpF src1 src2)); 9990 ins_cost(100); 9991 format %{ "FLD $src1\n\t" 9992 "FUCOMIP ST,$src2 // P6 instruction" %} 9993 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9994 ins_encode( Push_Reg_DPR(src1), 9995 OpcP, RegOpc(src2)); 9996 ins_pipe( pipe_slow ); 9997 %} 9998 9999 10000 // Compare & branch 10001 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10002 predicate(UseSSE == 0); 10003 match(Set cr (CmpF src1 src2)); 10004 effect(KILL rax); 10005 ins_cost(200); 10006 format %{ "FLD $src1\n\t" 10007 "FCOMp $src2\n\t" 10008 "FNSTSW AX\n\t" 10009 "TEST AX,0x400\n\t" 10010 "JZ,s flags\n\t" 10011 "MOV AH,1\t# unordered treat as LT\n" 10012 "flags:\tSAHF" %} 10013 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10014 ins_encode( Push_Reg_DPR(src1), 10015 OpcP, RegOpc(src2), 10016 fpu_flags); 10017 ins_pipe( pipe_slow ); 10018 %} 10019 10020 // Compare vs zero into -1,0,1 10021 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10022 predicate(UseSSE == 0); 10023 match(Set dst (CmpF3 src1 zero)); 10024 effect(KILL cr, KILL rax); 10025 ins_cost(280); 10026 format %{ "FTSTF $dst,$src1" %} 10027 opcode(0xE4, 0xD9); 10028 ins_encode( Push_Reg_DPR(src1), 10029 OpcS, OpcP, PopFPU, 10030 CmpF_Result(dst)); 10031 ins_pipe( pipe_slow ); 10032 %} 10033 10034 // Compare into -1,0,1 10035 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10036 predicate(UseSSE == 0); 10037 match(Set dst (CmpF3 src1 src2)); 10038 effect(KILL cr, KILL rax); 10039 ins_cost(300); 10040 format %{ "FCMPF $dst,$src1,$src2" %} 10041 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10042 ins_encode( Push_Reg_DPR(src1), 10043 OpcP, RegOpc(src2), 10044 CmpF_Result(dst)); 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 // float compare and set condition codes in EFLAGS by XMM regs 10049 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10050 predicate(UseSSE>=1); 10051 match(Set cr (CmpF src1 src2)); 10052 ins_cost(145); 10053 format %{ "UCOMISS $src1,$src2\n\t" 10054 "JNP,s exit\n\t" 10055 "PUSHF\t# saw NaN, set CF\n\t" 10056 "AND [rsp], #0xffffff2b\n\t" 10057 "POPF\n" 10058 "exit:" %} 10059 ins_encode %{ 10060 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10061 emit_cmpfp_fixup(_masm); 10062 %} 10063 ins_pipe( pipe_slow ); 10064 %} 10065 10066 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10067 predicate(UseSSE>=1); 10068 match(Set cr (CmpF src1 src2)); 10069 ins_cost(100); 10070 format %{ "UCOMISS $src1,$src2" %} 10071 ins_encode %{ 10072 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10073 %} 10074 ins_pipe( pipe_slow ); 10075 %} 10076 10077 // float compare and set condition codes in EFLAGS by XMM regs 10078 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10079 predicate(UseSSE>=1); 10080 match(Set cr (CmpF src1 (LoadF src2))); 10081 ins_cost(165); 10082 format %{ "UCOMISS $src1,$src2\n\t" 10083 "JNP,s exit\n\t" 10084 "PUSHF\t# saw NaN, set CF\n\t" 10085 "AND [rsp], #0xffffff2b\n\t" 10086 "POPF\n" 10087 "exit:" %} 10088 ins_encode %{ 10089 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10090 emit_cmpfp_fixup(_masm); 10091 %} 10092 ins_pipe( pipe_slow ); 10093 %} 10094 10095 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10096 predicate(UseSSE>=1); 10097 match(Set cr (CmpF src1 (LoadF src2))); 10098 ins_cost(100); 10099 format %{ "UCOMISS $src1,$src2" %} 10100 ins_encode %{ 10101 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10102 %} 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 // Compare into -1,0,1 in XMM 10107 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10108 predicate(UseSSE>=1); 10109 match(Set dst (CmpF3 src1 src2)); 10110 effect(KILL cr); 10111 ins_cost(255); 10112 format %{ "UCOMISS $src1, $src2\n\t" 10113 "MOV $dst, #-1\n\t" 10114 "JP,s done\n\t" 10115 "JB,s done\n\t" 10116 "SETNE $dst\n\t" 10117 "MOVZB $dst, $dst\n" 10118 "done:" %} 10119 ins_encode %{ 10120 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10121 emit_cmpfp3(_masm, $dst$$Register); 10122 %} 10123 ins_pipe( pipe_slow ); 10124 %} 10125 10126 // Compare into -1,0,1 in XMM and memory 10127 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10128 predicate(UseSSE>=1); 10129 match(Set dst (CmpF3 src1 (LoadF src2))); 10130 effect(KILL cr); 10131 ins_cost(275); 10132 format %{ "UCOMISS $src1, $src2\n\t" 10133 "MOV $dst, #-1\n\t" 10134 "JP,s done\n\t" 10135 "JB,s done\n\t" 10136 "SETNE $dst\n\t" 10137 "MOVZB $dst, $dst\n" 10138 "done:" %} 10139 ins_encode %{ 10140 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10141 emit_cmpfp3(_masm, $dst$$Register); 10142 %} 10143 ins_pipe( pipe_slow ); 10144 %} 10145 10146 // Spill to obtain 24-bit precision 10147 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10148 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10149 match(Set dst (SubF src1 src2)); 10150 10151 format %{ "FSUB $dst,$src1 - $src2" %} 10152 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10153 ins_encode( Push_Reg_FPR(src1), 10154 OpcReg_FPR(src2), 10155 Pop_Mem_FPR(dst) ); 10156 ins_pipe( fpu_mem_reg_reg ); 10157 %} 10158 // 10159 // This instruction does not round to 24-bits 10160 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10161 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10162 match(Set dst (SubF dst src)); 10163 10164 format %{ "FSUB $dst,$src" %} 10165 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10166 ins_encode( Push_Reg_FPR(src), 10167 OpcP, RegOpc(dst) ); 10168 ins_pipe( fpu_reg_reg ); 10169 %} 10170 10171 // Spill to obtain 24-bit precision 10172 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10173 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10174 match(Set dst (AddF src1 src2)); 10175 10176 format %{ "FADD $dst,$src1,$src2" %} 10177 opcode(0xD8, 0x0); /* D8 C0+i */ 10178 ins_encode( Push_Reg_FPR(src2), 10179 OpcReg_FPR(src1), 10180 Pop_Mem_FPR(dst) ); 10181 ins_pipe( fpu_mem_reg_reg ); 10182 %} 10183 // 10184 // This instruction does not round to 24-bits 10185 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10186 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10187 match(Set dst (AddF dst src)); 10188 10189 format %{ "FLD $src\n\t" 10190 "FADDp $dst,ST" %} 10191 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10192 ins_encode( Push_Reg_FPR(src), 10193 OpcP, RegOpc(dst) ); 10194 ins_pipe( fpu_reg_reg ); 10195 %} 10196 10197 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10198 predicate(UseSSE==0); 10199 match(Set dst (AbsF src)); 10200 ins_cost(100); 10201 format %{ "FABS" %} 10202 opcode(0xE1, 0xD9); 10203 ins_encode( OpcS, OpcP ); 10204 ins_pipe( fpu_reg_reg ); 10205 %} 10206 10207 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10208 predicate(UseSSE==0); 10209 match(Set dst (NegF src)); 10210 ins_cost(100); 10211 format %{ "FCHS" %} 10212 opcode(0xE0, 0xD9); 10213 ins_encode( OpcS, OpcP ); 10214 ins_pipe( fpu_reg_reg ); 10215 %} 10216 10217 // Cisc-alternate to addFPR_reg 10218 // Spill to obtain 24-bit precision 10219 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10220 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10221 match(Set dst (AddF src1 (LoadF src2))); 10222 10223 format %{ "FLD $src2\n\t" 10224 "FADD ST,$src1\n\t" 10225 "FSTP_S $dst" %} 10226 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10227 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10228 OpcReg_FPR(src1), 10229 Pop_Mem_FPR(dst) ); 10230 ins_pipe( fpu_mem_reg_mem ); 10231 %} 10232 // 10233 // Cisc-alternate to addFPR_reg 10234 // This instruction does not round to 24-bits 10235 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10236 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10237 match(Set dst (AddF dst (LoadF src))); 10238 10239 format %{ "FADD $dst,$src" %} 10240 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10241 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10242 OpcP, RegOpc(dst) ); 10243 ins_pipe( fpu_reg_mem ); 10244 %} 10245 10246 // // Following two instructions for _222_mpegaudio 10247 // Spill to obtain 24-bit precision 10248 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10249 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10250 match(Set dst (AddF src1 src2)); 10251 10252 format %{ "FADD $dst,$src1,$src2" %} 10253 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10254 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10255 OpcReg_FPR(src2), 10256 Pop_Mem_FPR(dst) ); 10257 ins_pipe( fpu_mem_reg_mem ); 10258 %} 10259 10260 // Cisc-spill variant 10261 // Spill to obtain 24-bit precision 10262 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10263 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10264 match(Set dst (AddF src1 (LoadF src2))); 10265 10266 format %{ "FADD $dst,$src1,$src2 cisc" %} 10267 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10268 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10269 set_instruction_start, 10270 OpcP, RMopc_Mem(secondary,src1), 10271 Pop_Mem_FPR(dst) ); 10272 ins_pipe( fpu_mem_mem_mem ); 10273 %} 10274 10275 // Spill to obtain 24-bit precision 10276 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10277 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10278 match(Set dst (AddF src1 src2)); 10279 10280 format %{ "FADD $dst,$src1,$src2" %} 10281 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10282 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10283 set_instruction_start, 10284 OpcP, RMopc_Mem(secondary,src1), 10285 Pop_Mem_FPR(dst) ); 10286 ins_pipe( fpu_mem_mem_mem ); 10287 %} 10288 10289 10290 // Spill to obtain 24-bit precision 10291 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10292 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10293 match(Set dst (AddF src con)); 10294 format %{ "FLD $src\n\t" 10295 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10296 "FSTP_S $dst" %} 10297 ins_encode %{ 10298 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10299 __ fadd_s($constantaddress($con)); 10300 __ fstp_s(Address(rsp, $dst$$disp)); 10301 %} 10302 ins_pipe(fpu_mem_reg_con); 10303 %} 10304 // 10305 // This instruction does not round to 24-bits 10306 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10307 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10308 match(Set dst (AddF src con)); 10309 format %{ "FLD $src\n\t" 10310 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10311 "FSTP $dst" %} 10312 ins_encode %{ 10313 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10314 __ fadd_s($constantaddress($con)); 10315 __ fstp_d($dst$$reg); 10316 %} 10317 ins_pipe(fpu_reg_reg_con); 10318 %} 10319 10320 // Spill to obtain 24-bit precision 10321 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10322 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10323 match(Set dst (MulF src1 src2)); 10324 10325 format %{ "FLD $src1\n\t" 10326 "FMUL $src2\n\t" 10327 "FSTP_S $dst" %} 10328 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10329 ins_encode( Push_Reg_FPR(src1), 10330 OpcReg_FPR(src2), 10331 Pop_Mem_FPR(dst) ); 10332 ins_pipe( fpu_mem_reg_reg ); 10333 %} 10334 // 10335 // This instruction does not round to 24-bits 10336 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10337 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10338 match(Set dst (MulF src1 src2)); 10339 10340 format %{ "FLD $src1\n\t" 10341 "FMUL $src2\n\t" 10342 "FSTP_S $dst" %} 10343 opcode(0xD8, 0x1); /* D8 C8+i */ 10344 ins_encode( Push_Reg_FPR(src2), 10345 OpcReg_FPR(src1), 10346 Pop_Reg_FPR(dst) ); 10347 ins_pipe( fpu_reg_reg_reg ); 10348 %} 10349 10350 10351 // Spill to obtain 24-bit precision 10352 // Cisc-alternate to reg-reg multiply 10353 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10354 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10355 match(Set dst (MulF src1 (LoadF src2))); 10356 10357 format %{ "FLD_S $src2\n\t" 10358 "FMUL $src1\n\t" 10359 "FSTP_S $dst" %} 10360 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10361 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10362 OpcReg_FPR(src1), 10363 Pop_Mem_FPR(dst) ); 10364 ins_pipe( fpu_mem_reg_mem ); 10365 %} 10366 // 10367 // This instruction does not round to 24-bits 10368 // Cisc-alternate to reg-reg multiply 10369 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10370 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10371 match(Set dst (MulF src1 (LoadF src2))); 10372 10373 format %{ "FMUL $dst,$src1,$src2" %} 10374 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10375 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10376 OpcReg_FPR(src1), 10377 Pop_Reg_FPR(dst) ); 10378 ins_pipe( fpu_reg_reg_mem ); 10379 %} 10380 10381 // Spill to obtain 24-bit precision 10382 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10383 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10384 match(Set dst (MulF src1 src2)); 10385 10386 format %{ "FMUL $dst,$src1,$src2" %} 10387 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10388 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10389 set_instruction_start, 10390 OpcP, RMopc_Mem(secondary,src1), 10391 Pop_Mem_FPR(dst) ); 10392 ins_pipe( fpu_mem_mem_mem ); 10393 %} 10394 10395 // Spill to obtain 24-bit precision 10396 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10397 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10398 match(Set dst (MulF src con)); 10399 10400 format %{ "FLD $src\n\t" 10401 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10402 "FSTP_S $dst" %} 10403 ins_encode %{ 10404 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10405 __ fmul_s($constantaddress($con)); 10406 __ fstp_s(Address(rsp, $dst$$disp)); 10407 %} 10408 ins_pipe(fpu_mem_reg_con); 10409 %} 10410 // 10411 // This instruction does not round to 24-bits 10412 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10413 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10414 match(Set dst (MulF src con)); 10415 10416 format %{ "FLD $src\n\t" 10417 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10418 "FSTP $dst" %} 10419 ins_encode %{ 10420 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10421 __ fmul_s($constantaddress($con)); 10422 __ fstp_d($dst$$reg); 10423 %} 10424 ins_pipe(fpu_reg_reg_con); 10425 %} 10426 10427 10428 // 10429 // MACRO1 -- subsume unshared load into mulFPR 10430 // This instruction does not round to 24-bits 10431 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10432 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10433 match(Set dst (MulF (LoadF mem1) src)); 10434 10435 format %{ "FLD $mem1 ===MACRO1===\n\t" 10436 "FMUL ST,$src\n\t" 10437 "FSTP $dst" %} 10438 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10439 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10440 OpcReg_FPR(src), 10441 Pop_Reg_FPR(dst) ); 10442 ins_pipe( fpu_reg_reg_mem ); 10443 %} 10444 // 10445 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10446 // This instruction does not round to 24-bits 10447 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10448 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10449 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10450 ins_cost(95); 10451 10452 format %{ "FLD $mem1 ===MACRO2===\n\t" 10453 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10454 "FADD ST,$src2\n\t" 10455 "FSTP $dst" %} 10456 opcode(0xD9); /* LoadF D9 /0 */ 10457 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10458 FMul_ST_reg(src1), 10459 FAdd_ST_reg(src2), 10460 Pop_Reg_FPR(dst) ); 10461 ins_pipe( fpu_reg_mem_reg_reg ); 10462 %} 10463 10464 // MACRO3 -- addFPR a mulFPR 10465 // This instruction does not round to 24-bits. It is a '2-address' 10466 // instruction in that the result goes back to src2. This eliminates 10467 // a move from the macro; possibly the register allocator will have 10468 // to add it back (and maybe not). 10469 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10470 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10471 match(Set src2 (AddF (MulF src0 src1) src2)); 10472 10473 format %{ "FLD $src0 ===MACRO3===\n\t" 10474 "FMUL ST,$src1\n\t" 10475 "FADDP $src2,ST" %} 10476 opcode(0xD9); /* LoadF D9 /0 */ 10477 ins_encode( Push_Reg_FPR(src0), 10478 FMul_ST_reg(src1), 10479 FAddP_reg_ST(src2) ); 10480 ins_pipe( fpu_reg_reg_reg ); 10481 %} 10482 10483 // MACRO4 -- divFPR subFPR 10484 // This instruction does not round to 24-bits 10485 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10486 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10487 match(Set dst (DivF (SubF src2 src1) src3)); 10488 10489 format %{ "FLD $src2 ===MACRO4===\n\t" 10490 "FSUB ST,$src1\n\t" 10491 "FDIV ST,$src3\n\t" 10492 "FSTP $dst" %} 10493 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10494 ins_encode( Push_Reg_FPR(src2), 10495 subFPR_divFPR_encode(src1,src3), 10496 Pop_Reg_FPR(dst) ); 10497 ins_pipe( fpu_reg_reg_reg_reg ); 10498 %} 10499 10500 // Spill to obtain 24-bit precision 10501 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10502 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10503 match(Set dst (DivF src1 src2)); 10504 10505 format %{ "FDIV $dst,$src1,$src2" %} 10506 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10507 ins_encode( Push_Reg_FPR(src1), 10508 OpcReg_FPR(src2), 10509 Pop_Mem_FPR(dst) ); 10510 ins_pipe( fpu_mem_reg_reg ); 10511 %} 10512 // 10513 // This instruction does not round to 24-bits 10514 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10515 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10516 match(Set dst (DivF dst src)); 10517 10518 format %{ "FDIV $dst,$src" %} 10519 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10520 ins_encode( Push_Reg_FPR(src), 10521 OpcP, RegOpc(dst) ); 10522 ins_pipe( fpu_reg_reg ); 10523 %} 10524 10525 10526 // Spill to obtain 24-bit precision 10527 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10528 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10529 match(Set dst (ModF src1 src2)); 10530 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10531 10532 format %{ "FMOD $dst,$src1,$src2" %} 10533 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10534 emitModDPR(), 10535 Push_Result_Mod_DPR(src2), 10536 Pop_Mem_FPR(dst)); 10537 ins_pipe( pipe_slow ); 10538 %} 10539 // 10540 // This instruction does not round to 24-bits 10541 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10542 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10543 match(Set dst (ModF dst src)); 10544 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10545 10546 format %{ "FMOD $dst,$src" %} 10547 ins_encode(Push_Reg_Mod_DPR(dst, src), 10548 emitModDPR(), 10549 Push_Result_Mod_DPR(src), 10550 Pop_Reg_FPR(dst)); 10551 ins_pipe( pipe_slow ); 10552 %} 10553 10554 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10555 predicate(UseSSE>=1); 10556 match(Set dst (ModF src0 src1)); 10557 effect(KILL rax, KILL cr); 10558 format %{ "SUB ESP,4\t # FMOD\n" 10559 "\tMOVSS [ESP+0],$src1\n" 10560 "\tFLD_S [ESP+0]\n" 10561 "\tMOVSS [ESP+0],$src0\n" 10562 "\tFLD_S [ESP+0]\n" 10563 "loop:\tFPREM\n" 10564 "\tFWAIT\n" 10565 "\tFNSTSW AX\n" 10566 "\tSAHF\n" 10567 "\tJP loop\n" 10568 "\tFSTP_S [ESP+0]\n" 10569 "\tMOVSS $dst,[ESP+0]\n" 10570 "\tADD ESP,4\n" 10571 "\tFSTP ST0\t # Restore FPU Stack" 10572 %} 10573 ins_cost(250); 10574 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10575 ins_pipe( pipe_slow ); 10576 %} 10577 10578 10579 //----------Arithmetic Conversion Instructions--------------------------------- 10580 // The conversions operations are all Alpha sorted. Please keep it that way! 10581 10582 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10583 predicate(UseSSE==0); 10584 match(Set dst (RoundFloat src)); 10585 ins_cost(125); 10586 format %{ "FST_S $dst,$src\t# F-round" %} 10587 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10588 ins_pipe( fpu_mem_reg ); 10589 %} 10590 10591 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10592 predicate(UseSSE<=1); 10593 match(Set dst (RoundDouble src)); 10594 ins_cost(125); 10595 format %{ "FST_D $dst,$src\t# D-round" %} 10596 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10597 ins_pipe( fpu_mem_reg ); 10598 %} 10599 10600 // Force rounding to 24-bit precision and 6-bit exponent 10601 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10602 predicate(UseSSE==0); 10603 match(Set dst (ConvD2F src)); 10604 format %{ "FST_S $dst,$src\t# F-round" %} 10605 expand %{ 10606 roundFloat_mem_reg(dst,src); 10607 %} 10608 %} 10609 10610 // Force rounding to 24-bit precision and 6-bit exponent 10611 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10612 predicate(UseSSE==1); 10613 match(Set dst (ConvD2F src)); 10614 effect( KILL cr ); 10615 format %{ "SUB ESP,4\n\t" 10616 "FST_S [ESP],$src\t# F-round\n\t" 10617 "MOVSS $dst,[ESP]\n\t" 10618 "ADD ESP,4" %} 10619 ins_encode %{ 10620 __ subptr(rsp, 4); 10621 if ($src$$reg != FPR1L_enc) { 10622 __ fld_s($src$$reg-1); 10623 __ fstp_s(Address(rsp, 0)); 10624 } else { 10625 __ fst_s(Address(rsp, 0)); 10626 } 10627 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10628 __ addptr(rsp, 4); 10629 %} 10630 ins_pipe( pipe_slow ); 10631 %} 10632 10633 // Force rounding double precision to single precision 10634 instruct convD2F_reg(regF dst, regD src) %{ 10635 predicate(UseSSE>=2); 10636 match(Set dst (ConvD2F src)); 10637 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10638 ins_encode %{ 10639 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10640 %} 10641 ins_pipe( pipe_slow ); 10642 %} 10643 10644 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10645 predicate(UseSSE==0); 10646 match(Set dst (ConvF2D src)); 10647 format %{ "FST_S $dst,$src\t# D-round" %} 10648 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10649 ins_pipe( fpu_reg_reg ); 10650 %} 10651 10652 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10653 predicate(UseSSE==1); 10654 match(Set dst (ConvF2D src)); 10655 format %{ "FST_D $dst,$src\t# D-round" %} 10656 expand %{ 10657 roundDouble_mem_reg(dst,src); 10658 %} 10659 %} 10660 10661 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10662 predicate(UseSSE==1); 10663 match(Set dst (ConvF2D src)); 10664 effect( KILL cr ); 10665 format %{ "SUB ESP,4\n\t" 10666 "MOVSS [ESP] $src\n\t" 10667 "FLD_S [ESP]\n\t" 10668 "ADD ESP,4\n\t" 10669 "FSTP $dst\t# D-round" %} 10670 ins_encode %{ 10671 __ subptr(rsp, 4); 10672 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10673 __ fld_s(Address(rsp, 0)); 10674 __ addptr(rsp, 4); 10675 __ fstp_d($dst$$reg); 10676 %} 10677 ins_pipe( pipe_slow ); 10678 %} 10679 10680 instruct convF2D_reg(regD dst, regF src) %{ 10681 predicate(UseSSE>=2); 10682 match(Set dst (ConvF2D src)); 10683 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10684 ins_encode %{ 10685 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10686 %} 10687 ins_pipe( pipe_slow ); 10688 %} 10689 10690 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10691 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10692 predicate(UseSSE<=1); 10693 match(Set dst (ConvD2I src)); 10694 effect( KILL tmp, KILL cr ); 10695 format %{ "FLD $src\t# Convert double to int \n\t" 10696 "FLDCW trunc mode\n\t" 10697 "SUB ESP,4\n\t" 10698 "FISTp [ESP + #0]\n\t" 10699 "FLDCW std/24-bit mode\n\t" 10700 "POP EAX\n\t" 10701 "CMP EAX,0x80000000\n\t" 10702 "JNE,s fast\n\t" 10703 "FLD_D $src\n\t" 10704 "CALL d2i_wrapper\n" 10705 "fast:" %} 10706 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10707 ins_pipe( pipe_slow ); 10708 %} 10709 10710 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10711 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10712 predicate(UseSSE>=2); 10713 match(Set dst (ConvD2I src)); 10714 effect( KILL tmp, KILL cr ); 10715 format %{ "CVTTSD2SI $dst, $src\n\t" 10716 "CMP $dst,0x80000000\n\t" 10717 "JNE,s fast\n\t" 10718 "SUB ESP, 8\n\t" 10719 "MOVSD [ESP], $src\n\t" 10720 "FLD_D [ESP]\n\t" 10721 "ADD ESP, 8\n\t" 10722 "CALL d2i_wrapper\n" 10723 "fast:" %} 10724 ins_encode %{ 10725 Label fast; 10726 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10727 __ cmpl($dst$$Register, 0x80000000); 10728 __ jccb(Assembler::notEqual, fast); 10729 __ subptr(rsp, 8); 10730 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10731 __ fld_d(Address(rsp, 0)); 10732 __ addptr(rsp, 8); 10733 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10734 __ bind(fast); 10735 %} 10736 ins_pipe( pipe_slow ); 10737 %} 10738 10739 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10740 predicate(UseSSE<=1); 10741 match(Set dst (ConvD2L src)); 10742 effect( KILL cr ); 10743 format %{ "FLD $src\t# Convert double to long\n\t" 10744 "FLDCW trunc mode\n\t" 10745 "SUB ESP,8\n\t" 10746 "FISTp [ESP + #0]\n\t" 10747 "FLDCW std/24-bit mode\n\t" 10748 "POP EAX\n\t" 10749 "POP EDX\n\t" 10750 "CMP EDX,0x80000000\n\t" 10751 "JNE,s fast\n\t" 10752 "TEST EAX,EAX\n\t" 10753 "JNE,s fast\n\t" 10754 "FLD $src\n\t" 10755 "CALL d2l_wrapper\n" 10756 "fast:" %} 10757 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10758 ins_pipe( pipe_slow ); 10759 %} 10760 10761 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10762 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10763 predicate (UseSSE>=2); 10764 match(Set dst (ConvD2L src)); 10765 effect( KILL cr ); 10766 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10767 "MOVSD [ESP],$src\n\t" 10768 "FLD_D [ESP]\n\t" 10769 "FLDCW trunc mode\n\t" 10770 "FISTp [ESP + #0]\n\t" 10771 "FLDCW std/24-bit mode\n\t" 10772 "POP EAX\n\t" 10773 "POP EDX\n\t" 10774 "CMP EDX,0x80000000\n\t" 10775 "JNE,s fast\n\t" 10776 "TEST EAX,EAX\n\t" 10777 "JNE,s fast\n\t" 10778 "SUB ESP,8\n\t" 10779 "MOVSD [ESP],$src\n\t" 10780 "FLD_D [ESP]\n\t" 10781 "ADD ESP,8\n\t" 10782 "CALL d2l_wrapper\n" 10783 "fast:" %} 10784 ins_encode %{ 10785 Label fast; 10786 __ subptr(rsp, 8); 10787 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10788 __ fld_d(Address(rsp, 0)); 10789 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10790 __ fistp_d(Address(rsp, 0)); 10791 // Restore the rounding mode, mask the exception 10792 if (Compile::current()->in_24_bit_fp_mode()) { 10793 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10794 } else { 10795 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10796 } 10797 // Load the converted long, adjust CPU stack 10798 __ pop(rax); 10799 __ pop(rdx); 10800 __ cmpl(rdx, 0x80000000); 10801 __ jccb(Assembler::notEqual, fast); 10802 __ testl(rax, rax); 10803 __ jccb(Assembler::notEqual, fast); 10804 __ subptr(rsp, 8); 10805 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10806 __ fld_d(Address(rsp, 0)); 10807 __ addptr(rsp, 8); 10808 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10809 __ bind(fast); 10810 %} 10811 ins_pipe( pipe_slow ); 10812 %} 10813 10814 // Convert a double to an int. Java semantics require we do complex 10815 // manglations in the corner cases. So we set the rounding mode to 10816 // 'zero', store the darned double down as an int, and reset the 10817 // rounding mode to 'nearest'. The hardware stores a flag value down 10818 // if we would overflow or converted a NAN; we check for this and 10819 // and go the slow path if needed. 10820 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10821 predicate(UseSSE==0); 10822 match(Set dst (ConvF2I src)); 10823 effect( KILL tmp, KILL cr ); 10824 format %{ "FLD $src\t# Convert float to int \n\t" 10825 "FLDCW trunc mode\n\t" 10826 "SUB ESP,4\n\t" 10827 "FISTp [ESP + #0]\n\t" 10828 "FLDCW std/24-bit mode\n\t" 10829 "POP EAX\n\t" 10830 "CMP EAX,0x80000000\n\t" 10831 "JNE,s fast\n\t" 10832 "FLD $src\n\t" 10833 "CALL d2i_wrapper\n" 10834 "fast:" %} 10835 // DPR2I_encoding works for FPR2I 10836 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10837 ins_pipe( pipe_slow ); 10838 %} 10839 10840 // Convert a float in xmm to an int reg. 10841 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10842 predicate(UseSSE>=1); 10843 match(Set dst (ConvF2I src)); 10844 effect( KILL tmp, KILL cr ); 10845 format %{ "CVTTSS2SI $dst, $src\n\t" 10846 "CMP $dst,0x80000000\n\t" 10847 "JNE,s fast\n\t" 10848 "SUB ESP, 4\n\t" 10849 "MOVSS [ESP], $src\n\t" 10850 "FLD [ESP]\n\t" 10851 "ADD ESP, 4\n\t" 10852 "CALL d2i_wrapper\n" 10853 "fast:" %} 10854 ins_encode %{ 10855 Label fast; 10856 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10857 __ cmpl($dst$$Register, 0x80000000); 10858 __ jccb(Assembler::notEqual, fast); 10859 __ subptr(rsp, 4); 10860 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10861 __ fld_s(Address(rsp, 0)); 10862 __ addptr(rsp, 4); 10863 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10864 __ bind(fast); 10865 %} 10866 ins_pipe( pipe_slow ); 10867 %} 10868 10869 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10870 predicate(UseSSE==0); 10871 match(Set dst (ConvF2L src)); 10872 effect( KILL cr ); 10873 format %{ "FLD $src\t# Convert float to long\n\t" 10874 "FLDCW trunc mode\n\t" 10875 "SUB ESP,8\n\t" 10876 "FISTp [ESP + #0]\n\t" 10877 "FLDCW std/24-bit mode\n\t" 10878 "POP EAX\n\t" 10879 "POP EDX\n\t" 10880 "CMP EDX,0x80000000\n\t" 10881 "JNE,s fast\n\t" 10882 "TEST EAX,EAX\n\t" 10883 "JNE,s fast\n\t" 10884 "FLD $src\n\t" 10885 "CALL d2l_wrapper\n" 10886 "fast:" %} 10887 // DPR2L_encoding works for FPR2L 10888 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10889 ins_pipe( pipe_slow ); 10890 %} 10891 10892 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10893 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10894 predicate (UseSSE>=1); 10895 match(Set dst (ConvF2L src)); 10896 effect( KILL cr ); 10897 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10898 "MOVSS [ESP],$src\n\t" 10899 "FLD_S [ESP]\n\t" 10900 "FLDCW trunc mode\n\t" 10901 "FISTp [ESP + #0]\n\t" 10902 "FLDCW std/24-bit mode\n\t" 10903 "POP EAX\n\t" 10904 "POP EDX\n\t" 10905 "CMP EDX,0x80000000\n\t" 10906 "JNE,s fast\n\t" 10907 "TEST EAX,EAX\n\t" 10908 "JNE,s fast\n\t" 10909 "SUB ESP,4\t# Convert float to long\n\t" 10910 "MOVSS [ESP],$src\n\t" 10911 "FLD_S [ESP]\n\t" 10912 "ADD ESP,4\n\t" 10913 "CALL d2l_wrapper\n" 10914 "fast:" %} 10915 ins_encode %{ 10916 Label fast; 10917 __ subptr(rsp, 8); 10918 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10919 __ fld_s(Address(rsp, 0)); 10920 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10921 __ fistp_d(Address(rsp, 0)); 10922 // Restore the rounding mode, mask the exception 10923 if (Compile::current()->in_24_bit_fp_mode()) { 10924 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10925 } else { 10926 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10927 } 10928 // Load the converted long, adjust CPU stack 10929 __ pop(rax); 10930 __ pop(rdx); 10931 __ cmpl(rdx, 0x80000000); 10932 __ jccb(Assembler::notEqual, fast); 10933 __ testl(rax, rax); 10934 __ jccb(Assembler::notEqual, fast); 10935 __ subptr(rsp, 4); 10936 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10937 __ fld_s(Address(rsp, 0)); 10938 __ addptr(rsp, 4); 10939 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10940 __ bind(fast); 10941 %} 10942 ins_pipe( pipe_slow ); 10943 %} 10944 10945 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10946 predicate( UseSSE<=1 ); 10947 match(Set dst (ConvI2D src)); 10948 format %{ "FILD $src\n\t" 10949 "FSTP $dst" %} 10950 opcode(0xDB, 0x0); /* DB /0 */ 10951 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10952 ins_pipe( fpu_reg_mem ); 10953 %} 10954 10955 instruct convI2D_reg(regD dst, rRegI src) %{ 10956 predicate( UseSSE>=2 && !UseXmmI2D ); 10957 match(Set dst (ConvI2D src)); 10958 format %{ "CVTSI2SD $dst,$src" %} 10959 ins_encode %{ 10960 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10961 %} 10962 ins_pipe( pipe_slow ); 10963 %} 10964 10965 instruct convI2D_mem(regD dst, memory mem) %{ 10966 predicate( UseSSE>=2 ); 10967 match(Set dst (ConvI2D (LoadI mem))); 10968 format %{ "CVTSI2SD $dst,$mem" %} 10969 ins_encode %{ 10970 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10971 %} 10972 ins_pipe( pipe_slow ); 10973 %} 10974 10975 instruct convXI2D_reg(regD dst, rRegI src) 10976 %{ 10977 predicate( UseSSE>=2 && UseXmmI2D ); 10978 match(Set dst (ConvI2D src)); 10979 10980 format %{ "MOVD $dst,$src\n\t" 10981 "CVTDQ2PD $dst,$dst\t# i2d" %} 10982 ins_encode %{ 10983 __ movdl($dst$$XMMRegister, $src$$Register); 10984 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10985 %} 10986 ins_pipe(pipe_slow); // XXX 10987 %} 10988 10989 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10990 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10991 match(Set dst (ConvI2D (LoadI mem))); 10992 format %{ "FILD $mem\n\t" 10993 "FSTP $dst" %} 10994 opcode(0xDB); /* DB /0 */ 10995 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10996 Pop_Reg_DPR(dst)); 10997 ins_pipe( fpu_reg_mem ); 10998 %} 10999 11000 // Convert a byte to a float; no rounding step needed. 11001 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11002 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11003 match(Set dst (ConvI2F src)); 11004 format %{ "FILD $src\n\t" 11005 "FSTP $dst" %} 11006 11007 opcode(0xDB, 0x0); /* DB /0 */ 11008 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11009 ins_pipe( fpu_reg_mem ); 11010 %} 11011 11012 // In 24-bit mode, force exponent rounding by storing back out 11013 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11014 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11015 match(Set dst (ConvI2F src)); 11016 ins_cost(200); 11017 format %{ "FILD $src\n\t" 11018 "FSTP_S $dst" %} 11019 opcode(0xDB, 0x0); /* DB /0 */ 11020 ins_encode( Push_Mem_I(src), 11021 Pop_Mem_FPR(dst)); 11022 ins_pipe( fpu_mem_mem ); 11023 %} 11024 11025 // In 24-bit mode, force exponent rounding by storing back out 11026 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11027 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11028 match(Set dst (ConvI2F (LoadI mem))); 11029 ins_cost(200); 11030 format %{ "FILD $mem\n\t" 11031 "FSTP_S $dst" %} 11032 opcode(0xDB); /* DB /0 */ 11033 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11034 Pop_Mem_FPR(dst)); 11035 ins_pipe( fpu_mem_mem ); 11036 %} 11037 11038 // This instruction does not round to 24-bits 11039 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11040 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11041 match(Set dst (ConvI2F src)); 11042 format %{ "FILD $src\n\t" 11043 "FSTP $dst" %} 11044 opcode(0xDB, 0x0); /* DB /0 */ 11045 ins_encode( Push_Mem_I(src), 11046 Pop_Reg_FPR(dst)); 11047 ins_pipe( fpu_reg_mem ); 11048 %} 11049 11050 // This instruction does not round to 24-bits 11051 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11052 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11053 match(Set dst (ConvI2F (LoadI mem))); 11054 format %{ "FILD $mem\n\t" 11055 "FSTP $dst" %} 11056 opcode(0xDB); /* DB /0 */ 11057 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11058 Pop_Reg_FPR(dst)); 11059 ins_pipe( fpu_reg_mem ); 11060 %} 11061 11062 // Convert an int to a float in xmm; no rounding step needed. 11063 instruct convI2F_reg(regF dst, rRegI src) %{ 11064 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11065 match(Set dst (ConvI2F src)); 11066 format %{ "CVTSI2SS $dst, $src" %} 11067 ins_encode %{ 11068 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11069 %} 11070 ins_pipe( pipe_slow ); 11071 %} 11072 11073 instruct convXI2F_reg(regF dst, rRegI src) 11074 %{ 11075 predicate( UseSSE>=2 && UseXmmI2F ); 11076 match(Set dst (ConvI2F src)); 11077 11078 format %{ "MOVD $dst,$src\n\t" 11079 "CVTDQ2PS $dst,$dst\t# i2f" %} 11080 ins_encode %{ 11081 __ movdl($dst$$XMMRegister, $src$$Register); 11082 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11083 %} 11084 ins_pipe(pipe_slow); // XXX 11085 %} 11086 11087 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11088 match(Set dst (ConvI2L src)); 11089 effect(KILL cr); 11090 ins_cost(375); 11091 format %{ "MOV $dst.lo,$src\n\t" 11092 "MOV $dst.hi,$src\n\t" 11093 "SAR $dst.hi,31" %} 11094 ins_encode(convert_int_long(dst,src)); 11095 ins_pipe( ialu_reg_reg_long ); 11096 %} 11097 11098 // Zero-extend convert int to long 11099 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11100 match(Set dst (AndL (ConvI2L src) mask) ); 11101 effect( KILL flags ); 11102 ins_cost(250); 11103 format %{ "MOV $dst.lo,$src\n\t" 11104 "XOR $dst.hi,$dst.hi" %} 11105 opcode(0x33); // XOR 11106 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11107 ins_pipe( ialu_reg_reg_long ); 11108 %} 11109 11110 // Zero-extend long 11111 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11112 match(Set dst (AndL src mask) ); 11113 effect( KILL flags ); 11114 ins_cost(250); 11115 format %{ "MOV $dst.lo,$src.lo\n\t" 11116 "XOR $dst.hi,$dst.hi\n\t" %} 11117 opcode(0x33); // XOR 11118 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11119 ins_pipe( ialu_reg_reg_long ); 11120 %} 11121 11122 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11123 predicate (UseSSE<=1); 11124 match(Set dst (ConvL2D src)); 11125 effect( KILL cr ); 11126 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11127 "PUSH $src.lo\n\t" 11128 "FILD ST,[ESP + #0]\n\t" 11129 "ADD ESP,8\n\t" 11130 "FSTP_D $dst\t# D-round" %} 11131 opcode(0xDF, 0x5); /* DF /5 */ 11132 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11133 ins_pipe( pipe_slow ); 11134 %} 11135 11136 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11137 predicate (UseSSE>=2); 11138 match(Set dst (ConvL2D src)); 11139 effect( KILL cr ); 11140 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11141 "PUSH $src.lo\n\t" 11142 "FILD_D [ESP]\n\t" 11143 "FSTP_D [ESP]\n\t" 11144 "MOVSD $dst,[ESP]\n\t" 11145 "ADD ESP,8" %} 11146 opcode(0xDF, 0x5); /* DF /5 */ 11147 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11148 ins_pipe( pipe_slow ); 11149 %} 11150 11151 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11152 predicate (UseSSE>=1); 11153 match(Set dst (ConvL2F src)); 11154 effect( KILL cr ); 11155 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11156 "PUSH $src.lo\n\t" 11157 "FILD_D [ESP]\n\t" 11158 "FSTP_S [ESP]\n\t" 11159 "MOVSS $dst,[ESP]\n\t" 11160 "ADD ESP,8" %} 11161 opcode(0xDF, 0x5); /* DF /5 */ 11162 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11163 ins_pipe( pipe_slow ); 11164 %} 11165 11166 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11167 match(Set dst (ConvL2F src)); 11168 effect( KILL cr ); 11169 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11170 "PUSH $src.lo\n\t" 11171 "FILD ST,[ESP + #0]\n\t" 11172 "ADD ESP,8\n\t" 11173 "FSTP_S $dst\t# F-round" %} 11174 opcode(0xDF, 0x5); /* DF /5 */ 11175 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11176 ins_pipe( pipe_slow ); 11177 %} 11178 11179 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11180 match(Set dst (ConvL2I src)); 11181 effect( DEF dst, USE src ); 11182 format %{ "MOV $dst,$src.lo" %} 11183 ins_encode(enc_CopyL_Lo(dst,src)); 11184 ins_pipe( ialu_reg_reg ); 11185 %} 11186 11187 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11188 match(Set dst (MoveF2I src)); 11189 effect( DEF dst, USE src ); 11190 ins_cost(100); 11191 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11192 ins_encode %{ 11193 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11194 %} 11195 ins_pipe( ialu_reg_mem ); 11196 %} 11197 11198 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11199 predicate(UseSSE==0); 11200 match(Set dst (MoveF2I src)); 11201 effect( DEF dst, USE src ); 11202 11203 ins_cost(125); 11204 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11205 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11206 ins_pipe( fpu_mem_reg ); 11207 %} 11208 11209 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11210 predicate(UseSSE>=1); 11211 match(Set dst (MoveF2I src)); 11212 effect( DEF dst, USE src ); 11213 11214 ins_cost(95); 11215 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11216 ins_encode %{ 11217 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11218 %} 11219 ins_pipe( pipe_slow ); 11220 %} 11221 11222 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11223 predicate(UseSSE>=2); 11224 match(Set dst (MoveF2I src)); 11225 effect( DEF dst, USE src ); 11226 ins_cost(85); 11227 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11228 ins_encode %{ 11229 __ movdl($dst$$Register, $src$$XMMRegister); 11230 %} 11231 ins_pipe( pipe_slow ); 11232 %} 11233 11234 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11235 match(Set dst (MoveI2F src)); 11236 effect( DEF dst, USE src ); 11237 11238 ins_cost(100); 11239 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11240 ins_encode %{ 11241 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11242 %} 11243 ins_pipe( ialu_mem_reg ); 11244 %} 11245 11246 11247 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11248 predicate(UseSSE==0); 11249 match(Set dst (MoveI2F src)); 11250 effect(DEF dst, USE src); 11251 11252 ins_cost(125); 11253 format %{ "FLD_S $src\n\t" 11254 "FSTP $dst\t# MoveI2F_stack_reg" %} 11255 opcode(0xD9); /* D9 /0, FLD m32real */ 11256 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11257 Pop_Reg_FPR(dst) ); 11258 ins_pipe( fpu_reg_mem ); 11259 %} 11260 11261 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11262 predicate(UseSSE>=1); 11263 match(Set dst (MoveI2F src)); 11264 effect( DEF dst, USE src ); 11265 11266 ins_cost(95); 11267 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11268 ins_encode %{ 11269 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11270 %} 11271 ins_pipe( pipe_slow ); 11272 %} 11273 11274 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11275 predicate(UseSSE>=2); 11276 match(Set dst (MoveI2F src)); 11277 effect( DEF dst, USE src ); 11278 11279 ins_cost(85); 11280 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11281 ins_encode %{ 11282 __ movdl($dst$$XMMRegister, $src$$Register); 11283 %} 11284 ins_pipe( pipe_slow ); 11285 %} 11286 11287 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11288 match(Set dst (MoveD2L src)); 11289 effect(DEF dst, USE src); 11290 11291 ins_cost(250); 11292 format %{ "MOV $dst.lo,$src\n\t" 11293 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11294 opcode(0x8B, 0x8B); 11295 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11296 ins_pipe( ialu_mem_long_reg ); 11297 %} 11298 11299 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11300 predicate(UseSSE<=1); 11301 match(Set dst (MoveD2L src)); 11302 effect(DEF dst, USE src); 11303 11304 ins_cost(125); 11305 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11306 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11307 ins_pipe( fpu_mem_reg ); 11308 %} 11309 11310 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11311 predicate(UseSSE>=2); 11312 match(Set dst (MoveD2L src)); 11313 effect(DEF dst, USE src); 11314 ins_cost(95); 11315 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11316 ins_encode %{ 11317 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11318 %} 11319 ins_pipe( pipe_slow ); 11320 %} 11321 11322 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11323 predicate(UseSSE>=2); 11324 match(Set dst (MoveD2L src)); 11325 effect(DEF dst, USE src, TEMP tmp); 11326 ins_cost(85); 11327 format %{ "MOVD $dst.lo,$src\n\t" 11328 "PSHUFLW $tmp,$src,0x4E\n\t" 11329 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11330 ins_encode %{ 11331 __ movdl($dst$$Register, $src$$XMMRegister); 11332 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11333 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11334 %} 11335 ins_pipe( pipe_slow ); 11336 %} 11337 11338 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11339 match(Set dst (MoveL2D src)); 11340 effect(DEF dst, USE src); 11341 11342 ins_cost(200); 11343 format %{ "MOV $dst,$src.lo\n\t" 11344 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11345 opcode(0x89, 0x89); 11346 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11347 ins_pipe( ialu_mem_long_reg ); 11348 %} 11349 11350 11351 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11352 predicate(UseSSE<=1); 11353 match(Set dst (MoveL2D src)); 11354 effect(DEF dst, USE src); 11355 ins_cost(125); 11356 11357 format %{ "FLD_D $src\n\t" 11358 "FSTP $dst\t# MoveL2D_stack_reg" %} 11359 opcode(0xDD); /* DD /0, FLD m64real */ 11360 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11361 Pop_Reg_DPR(dst) ); 11362 ins_pipe( fpu_reg_mem ); 11363 %} 11364 11365 11366 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11367 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11368 match(Set dst (MoveL2D src)); 11369 effect(DEF dst, USE src); 11370 11371 ins_cost(95); 11372 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11373 ins_encode %{ 11374 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11375 %} 11376 ins_pipe( pipe_slow ); 11377 %} 11378 11379 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11380 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11381 match(Set dst (MoveL2D src)); 11382 effect(DEF dst, USE src); 11383 11384 ins_cost(95); 11385 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11386 ins_encode %{ 11387 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11388 %} 11389 ins_pipe( pipe_slow ); 11390 %} 11391 11392 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11393 predicate(UseSSE>=2); 11394 match(Set dst (MoveL2D src)); 11395 effect(TEMP dst, USE src, TEMP tmp); 11396 ins_cost(85); 11397 format %{ "MOVD $dst,$src.lo\n\t" 11398 "MOVD $tmp,$src.hi\n\t" 11399 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11400 ins_encode %{ 11401 __ movdl($dst$$XMMRegister, $src$$Register); 11402 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11403 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11404 %} 11405 ins_pipe( pipe_slow ); 11406 %} 11407 11408 11409 // ======================================================================= 11410 // fast clearing of an array 11411 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11412 predicate(!UseFastStosb); 11413 match(Set dummy (ClearArray cnt base)); 11414 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11415 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11416 "SHL ECX,1\t# Convert doublewords to words\n\t" 11417 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11418 ins_encode %{ 11419 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11420 %} 11421 ins_pipe( pipe_slow ); 11422 %} 11423 11424 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11425 predicate(UseFastStosb); 11426 match(Set dummy (ClearArray cnt base)); 11427 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11428 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11429 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11430 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11431 ins_encode %{ 11432 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11433 %} 11434 ins_pipe( pipe_slow ); 11435 %} 11436 11437 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11438 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11439 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11440 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11441 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11442 11443 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11444 ins_encode %{ 11445 __ string_compare($str1$$Register, $str2$$Register, 11446 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11447 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11448 %} 11449 ins_pipe( pipe_slow ); 11450 %} 11451 11452 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11453 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11454 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11455 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11456 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11457 11458 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11459 ins_encode %{ 11460 __ string_compare($str1$$Register, $str2$$Register, 11461 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11462 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11463 %} 11464 ins_pipe( pipe_slow ); 11465 %} 11466 11467 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11468 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11469 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11470 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11471 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11472 11473 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11474 ins_encode %{ 11475 __ string_compare($str1$$Register, $str2$$Register, 11476 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11477 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11478 %} 11479 ins_pipe( pipe_slow ); 11480 %} 11481 11482 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11483 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11484 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11485 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11486 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11487 11488 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11489 ins_encode %{ 11490 __ string_compare($str2$$Register, $str1$$Register, 11491 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11492 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11493 %} 11494 ins_pipe( pipe_slow ); 11495 %} 11496 11497 // fast string equals 11498 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11499 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11500 match(Set result (StrEquals (Binary str1 str2) cnt)); 11501 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11502 11503 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11504 ins_encode %{ 11505 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11506 $cnt$$Register, $result$$Register, $tmp3$$Register, 11507 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11508 %} 11509 11510 ins_pipe( pipe_slow ); 11511 %} 11512 11513 // fast search of substring with known size. 11514 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11515 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11516 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11517 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11518 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11519 11520 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11521 ins_encode %{ 11522 int icnt2 = (int)$int_cnt2$$constant; 11523 if (icnt2 >= 16) { 11524 // IndexOf for constant substrings with size >= 16 elements 11525 // which don't need to be loaded through stack. 11526 __ string_indexofC8($str1$$Register, $str2$$Register, 11527 $cnt1$$Register, $cnt2$$Register, 11528 icnt2, $result$$Register, 11529 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11530 } else { 11531 // Small strings are loaded through stack if they cross page boundary. 11532 __ string_indexof($str1$$Register, $str2$$Register, 11533 $cnt1$$Register, $cnt2$$Register, 11534 icnt2, $result$$Register, 11535 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11536 } 11537 %} 11538 ins_pipe( pipe_slow ); 11539 %} 11540 11541 // fast search of substring with known size. 11542 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11543 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11544 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11545 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11546 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11547 11548 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11549 ins_encode %{ 11550 int icnt2 = (int)$int_cnt2$$constant; 11551 if (icnt2 >= 8) { 11552 // IndexOf for constant substrings with size >= 8 elements 11553 // which don't need to be loaded through stack. 11554 __ string_indexofC8($str1$$Register, $str2$$Register, 11555 $cnt1$$Register, $cnt2$$Register, 11556 icnt2, $result$$Register, 11557 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11558 } else { 11559 // Small strings are loaded through stack if they cross page boundary. 11560 __ string_indexof($str1$$Register, $str2$$Register, 11561 $cnt1$$Register, $cnt2$$Register, 11562 icnt2, $result$$Register, 11563 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11564 } 11565 %} 11566 ins_pipe( pipe_slow ); 11567 %} 11568 11569 // fast search of substring with known size. 11570 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11571 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11572 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11573 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11574 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11575 11576 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11577 ins_encode %{ 11578 int icnt2 = (int)$int_cnt2$$constant; 11579 if (icnt2 >= 8) { 11580 // IndexOf for constant substrings with size >= 8 elements 11581 // which don't need to be loaded through stack. 11582 __ string_indexofC8($str1$$Register, $str2$$Register, 11583 $cnt1$$Register, $cnt2$$Register, 11584 icnt2, $result$$Register, 11585 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11586 } else { 11587 // Small strings are loaded through stack if they cross page boundary. 11588 __ string_indexof($str1$$Register, $str2$$Register, 11589 $cnt1$$Register, $cnt2$$Register, 11590 icnt2, $result$$Register, 11591 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11592 } 11593 %} 11594 ins_pipe( pipe_slow ); 11595 %} 11596 11597 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11598 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11599 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11600 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11601 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11602 11603 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11604 ins_encode %{ 11605 __ string_indexof($str1$$Register, $str2$$Register, 11606 $cnt1$$Register, $cnt2$$Register, 11607 (-1), $result$$Register, 11608 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11609 %} 11610 ins_pipe( pipe_slow ); 11611 %} 11612 11613 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11614 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11615 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11616 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11617 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11618 11619 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11620 ins_encode %{ 11621 __ string_indexof($str1$$Register, $str2$$Register, 11622 $cnt1$$Register, $cnt2$$Register, 11623 (-1), $result$$Register, 11624 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11625 %} 11626 ins_pipe( pipe_slow ); 11627 %} 11628 11629 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11630 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11631 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11632 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11633 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11634 11635 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11636 ins_encode %{ 11637 __ string_indexof($str1$$Register, $str2$$Register, 11638 $cnt1$$Register, $cnt2$$Register, 11639 (-1), $result$$Register, 11640 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11641 %} 11642 ins_pipe( pipe_slow ); 11643 %} 11644 11645 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11646 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11647 predicate(UseSSE42Intrinsics); 11648 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11649 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11650 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11651 ins_encode %{ 11652 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11653 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11654 %} 11655 ins_pipe( pipe_slow ); 11656 %} 11657 11658 // fast array equals 11659 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11660 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11661 %{ 11662 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11663 match(Set result (AryEq ary1 ary2)); 11664 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11665 //ins_cost(300); 11666 11667 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11668 ins_encode %{ 11669 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11670 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11671 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11672 %} 11673 ins_pipe( pipe_slow ); 11674 %} 11675 11676 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11677 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11678 %{ 11679 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11680 match(Set result (AryEq ary1 ary2)); 11681 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11682 //ins_cost(300); 11683 11684 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11685 ins_encode %{ 11686 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11687 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11688 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11689 %} 11690 ins_pipe( pipe_slow ); 11691 %} 11692 11693 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11694 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11695 %{ 11696 match(Set result (HasNegatives ary1 len)); 11697 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11698 11699 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11700 ins_encode %{ 11701 __ has_negatives($ary1$$Register, $len$$Register, 11702 $result$$Register, $tmp3$$Register, 11703 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11704 %} 11705 ins_pipe( pipe_slow ); 11706 %} 11707 11708 // fast char[] to byte[] compression 11709 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11710 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11711 match(Set result (StrCompressedCopy src (Binary dst len))); 11712 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11713 11714 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11715 ins_encode %{ 11716 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11717 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11718 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11719 %} 11720 ins_pipe( pipe_slow ); 11721 %} 11722 11723 // fast byte[] to char[] inflation 11724 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11725 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11726 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11727 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11728 11729 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11730 ins_encode %{ 11731 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11732 $tmp1$$XMMRegister, $tmp2$$Register); 11733 %} 11734 ins_pipe( pipe_slow ); 11735 %} 11736 11737 // encode char[] to byte[] in ISO_8859_1 11738 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11739 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11740 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11741 match(Set result (EncodeISOArray src (Binary dst len))); 11742 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11743 11744 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11745 ins_encode %{ 11746 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11747 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11748 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11749 %} 11750 ins_pipe( pipe_slow ); 11751 %} 11752 11753 11754 //----------Control Flow Instructions------------------------------------------ 11755 // Signed compare Instructions 11756 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11757 match(Set cr (CmpI op1 op2)); 11758 effect( DEF cr, USE op1, USE op2 ); 11759 format %{ "CMP $op1,$op2" %} 11760 opcode(0x3B); /* Opcode 3B /r */ 11761 ins_encode( OpcP, RegReg( op1, op2) ); 11762 ins_pipe( ialu_cr_reg_reg ); 11763 %} 11764 11765 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11766 match(Set cr (CmpI op1 op2)); 11767 effect( DEF cr, USE op1 ); 11768 format %{ "CMP $op1,$op2" %} 11769 opcode(0x81,0x07); /* Opcode 81 /7 */ 11770 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11771 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11772 ins_pipe( ialu_cr_reg_imm ); 11773 %} 11774 11775 // Cisc-spilled version of cmpI_eReg 11776 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11777 match(Set cr (CmpI op1 (LoadI op2))); 11778 11779 format %{ "CMP $op1,$op2" %} 11780 ins_cost(500); 11781 opcode(0x3B); /* Opcode 3B /r */ 11782 ins_encode( OpcP, RegMem( op1, op2) ); 11783 ins_pipe( ialu_cr_reg_mem ); 11784 %} 11785 11786 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11787 match(Set cr (CmpI src zero)); 11788 effect( DEF cr, USE src ); 11789 11790 format %{ "TEST $src,$src" %} 11791 opcode(0x85); 11792 ins_encode( OpcP, RegReg( src, src ) ); 11793 ins_pipe( ialu_cr_reg_imm ); 11794 %} 11795 11796 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11797 match(Set cr (CmpI (AndI src con) zero)); 11798 11799 format %{ "TEST $src,$con" %} 11800 opcode(0xF7,0x00); 11801 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11802 ins_pipe( ialu_cr_reg_imm ); 11803 %} 11804 11805 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11806 match(Set cr (CmpI (AndI src mem) zero)); 11807 11808 format %{ "TEST $src,$mem" %} 11809 opcode(0x85); 11810 ins_encode( OpcP, RegMem( src, mem ) ); 11811 ins_pipe( ialu_cr_reg_mem ); 11812 %} 11813 11814 // Unsigned compare Instructions; really, same as signed except they 11815 // produce an eFlagsRegU instead of eFlagsReg. 11816 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11817 match(Set cr (CmpU op1 op2)); 11818 11819 format %{ "CMPu $op1,$op2" %} 11820 opcode(0x3B); /* Opcode 3B /r */ 11821 ins_encode( OpcP, RegReg( op1, op2) ); 11822 ins_pipe( ialu_cr_reg_reg ); 11823 %} 11824 11825 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11826 match(Set cr (CmpU op1 op2)); 11827 11828 format %{ "CMPu $op1,$op2" %} 11829 opcode(0x81,0x07); /* Opcode 81 /7 */ 11830 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11831 ins_pipe( ialu_cr_reg_imm ); 11832 %} 11833 11834 // // Cisc-spilled version of cmpU_eReg 11835 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11836 match(Set cr (CmpU op1 (LoadI op2))); 11837 11838 format %{ "CMPu $op1,$op2" %} 11839 ins_cost(500); 11840 opcode(0x3B); /* Opcode 3B /r */ 11841 ins_encode( OpcP, RegMem( op1, op2) ); 11842 ins_pipe( ialu_cr_reg_mem ); 11843 %} 11844 11845 // // Cisc-spilled version of cmpU_eReg 11846 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11847 // match(Set cr (CmpU (LoadI op1) op2)); 11848 // 11849 // format %{ "CMPu $op1,$op2" %} 11850 // ins_cost(500); 11851 // opcode(0x39); /* Opcode 39 /r */ 11852 // ins_encode( OpcP, RegMem( op1, op2) ); 11853 //%} 11854 11855 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11856 match(Set cr (CmpU src zero)); 11857 11858 format %{ "TESTu $src,$src" %} 11859 opcode(0x85); 11860 ins_encode( OpcP, RegReg( src, src ) ); 11861 ins_pipe( ialu_cr_reg_imm ); 11862 %} 11863 11864 // Unsigned pointer compare Instructions 11865 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11866 match(Set cr (CmpP op1 op2)); 11867 11868 format %{ "CMPu $op1,$op2" %} 11869 opcode(0x3B); /* Opcode 3B /r */ 11870 ins_encode( OpcP, RegReg( op1, op2) ); 11871 ins_pipe( ialu_cr_reg_reg ); 11872 %} 11873 11874 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11875 match(Set cr (CmpP op1 op2)); 11876 11877 format %{ "CMPu $op1,$op2" %} 11878 opcode(0x81,0x07); /* Opcode 81 /7 */ 11879 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11880 ins_pipe( ialu_cr_reg_imm ); 11881 %} 11882 11883 // // Cisc-spilled version of cmpP_eReg 11884 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11885 match(Set cr (CmpP op1 (LoadP op2))); 11886 11887 format %{ "CMPu $op1,$op2" %} 11888 ins_cost(500); 11889 opcode(0x3B); /* Opcode 3B /r */ 11890 ins_encode( OpcP, RegMem( op1, op2) ); 11891 ins_pipe( ialu_cr_reg_mem ); 11892 %} 11893 11894 // // Cisc-spilled version of cmpP_eReg 11895 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11896 // match(Set cr (CmpP (LoadP op1) op2)); 11897 // 11898 // format %{ "CMPu $op1,$op2" %} 11899 // ins_cost(500); 11900 // opcode(0x39); /* Opcode 39 /r */ 11901 // ins_encode( OpcP, RegMem( op1, op2) ); 11902 //%} 11903 11904 // Compare raw pointer (used in out-of-heap check). 11905 // Only works because non-oop pointers must be raw pointers 11906 // and raw pointers have no anti-dependencies. 11907 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11908 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11909 match(Set cr (CmpP op1 (LoadP op2))); 11910 11911 format %{ "CMPu $op1,$op2" %} 11912 opcode(0x3B); /* Opcode 3B /r */ 11913 ins_encode( OpcP, RegMem( op1, op2) ); 11914 ins_pipe( ialu_cr_reg_mem ); 11915 %} 11916 11917 // 11918 // This will generate a signed flags result. This should be ok 11919 // since any compare to a zero should be eq/neq. 11920 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11921 match(Set cr (CmpP src zero)); 11922 11923 format %{ "TEST $src,$src" %} 11924 opcode(0x85); 11925 ins_encode( OpcP, RegReg( src, src ) ); 11926 ins_pipe( ialu_cr_reg_imm ); 11927 %} 11928 11929 // Cisc-spilled version of testP_reg 11930 // This will generate a signed flags result. This should be ok 11931 // since any compare to a zero should be eq/neq. 11932 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11933 match(Set cr (CmpP (LoadP op) zero)); 11934 11935 format %{ "TEST $op,0xFFFFFFFF" %} 11936 ins_cost(500); 11937 opcode(0xF7); /* Opcode F7 /0 */ 11938 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11939 ins_pipe( ialu_cr_reg_imm ); 11940 %} 11941 11942 // Yanked all unsigned pointer compare operations. 11943 // Pointer compares are done with CmpP which is already unsigned. 11944 11945 //----------Max and Min-------------------------------------------------------- 11946 // Min Instructions 11947 //// 11948 // *** Min and Max using the conditional move are slower than the 11949 // *** branch version on a Pentium III. 11950 // // Conditional move for min 11951 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11952 // effect( USE_DEF op2, USE op1, USE cr ); 11953 // format %{ "CMOVlt $op2,$op1\t! min" %} 11954 // opcode(0x4C,0x0F); 11955 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11956 // ins_pipe( pipe_cmov_reg ); 11957 //%} 11958 // 11959 //// Min Register with Register (P6 version) 11960 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11961 // predicate(VM_Version::supports_cmov() ); 11962 // match(Set op2 (MinI op1 op2)); 11963 // ins_cost(200); 11964 // expand %{ 11965 // eFlagsReg cr; 11966 // compI_eReg(cr,op1,op2); 11967 // cmovI_reg_lt(op2,op1,cr); 11968 // %} 11969 //%} 11970 11971 // Min Register with Register (generic version) 11972 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11973 match(Set dst (MinI dst src)); 11974 effect(KILL flags); 11975 ins_cost(300); 11976 11977 format %{ "MIN $dst,$src" %} 11978 opcode(0xCC); 11979 ins_encode( min_enc(dst,src) ); 11980 ins_pipe( pipe_slow ); 11981 %} 11982 11983 // Max Register with Register 11984 // *** Min and Max using the conditional move are slower than the 11985 // *** branch version on a Pentium III. 11986 // // Conditional move for max 11987 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11988 // effect( USE_DEF op2, USE op1, USE cr ); 11989 // format %{ "CMOVgt $op2,$op1\t! max" %} 11990 // opcode(0x4F,0x0F); 11991 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11992 // ins_pipe( pipe_cmov_reg ); 11993 //%} 11994 // 11995 // // Max Register with Register (P6 version) 11996 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11997 // predicate(VM_Version::supports_cmov() ); 11998 // match(Set op2 (MaxI op1 op2)); 11999 // ins_cost(200); 12000 // expand %{ 12001 // eFlagsReg cr; 12002 // compI_eReg(cr,op1,op2); 12003 // cmovI_reg_gt(op2,op1,cr); 12004 // %} 12005 //%} 12006 12007 // Max Register with Register (generic version) 12008 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12009 match(Set dst (MaxI dst src)); 12010 effect(KILL flags); 12011 ins_cost(300); 12012 12013 format %{ "MAX $dst,$src" %} 12014 opcode(0xCC); 12015 ins_encode( max_enc(dst,src) ); 12016 ins_pipe( pipe_slow ); 12017 %} 12018 12019 // ============================================================================ 12020 // Counted Loop limit node which represents exact final iterator value. 12021 // Note: the resulting value should fit into integer range since 12022 // counted loops have limit check on overflow. 12023 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12024 match(Set limit (LoopLimit (Binary init limit) stride)); 12025 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12026 ins_cost(300); 12027 12028 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12029 ins_encode %{ 12030 int strd = (int)$stride$$constant; 12031 assert(strd != 1 && strd != -1, "sanity"); 12032 int m1 = (strd > 0) ? 1 : -1; 12033 // Convert limit to long (EAX:EDX) 12034 __ cdql(); 12035 // Convert init to long (init:tmp) 12036 __ movl($tmp$$Register, $init$$Register); 12037 __ sarl($tmp$$Register, 31); 12038 // $limit - $init 12039 __ subl($limit$$Register, $init$$Register); 12040 __ sbbl($limit_hi$$Register, $tmp$$Register); 12041 // + ($stride - 1) 12042 if (strd > 0) { 12043 __ addl($limit$$Register, (strd - 1)); 12044 __ adcl($limit_hi$$Register, 0); 12045 __ movl($tmp$$Register, strd); 12046 } else { 12047 __ addl($limit$$Register, (strd + 1)); 12048 __ adcl($limit_hi$$Register, -1); 12049 __ lneg($limit_hi$$Register, $limit$$Register); 12050 __ movl($tmp$$Register, -strd); 12051 } 12052 // signed devision: (EAX:EDX) / pos_stride 12053 __ idivl($tmp$$Register); 12054 if (strd < 0) { 12055 // restore sign 12056 __ negl($tmp$$Register); 12057 } 12058 // (EAX) * stride 12059 __ mull($tmp$$Register); 12060 // + init (ignore upper bits) 12061 __ addl($limit$$Register, $init$$Register); 12062 %} 12063 ins_pipe( pipe_slow ); 12064 %} 12065 12066 // ============================================================================ 12067 // Branch Instructions 12068 // Jump Table 12069 instruct jumpXtnd(rRegI switch_val) %{ 12070 match(Jump switch_val); 12071 ins_cost(350); 12072 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12073 ins_encode %{ 12074 // Jump to Address(table_base + switch_reg) 12075 Address index(noreg, $switch_val$$Register, Address::times_1); 12076 __ jump(ArrayAddress($constantaddress, index)); 12077 %} 12078 ins_pipe(pipe_jmp); 12079 %} 12080 12081 // Jump Direct - Label defines a relative address from JMP+1 12082 instruct jmpDir(label labl) %{ 12083 match(Goto); 12084 effect(USE labl); 12085 12086 ins_cost(300); 12087 format %{ "JMP $labl" %} 12088 size(5); 12089 ins_encode %{ 12090 Label* L = $labl$$label; 12091 __ jmp(*L, false); // Always long jump 12092 %} 12093 ins_pipe( pipe_jmp ); 12094 %} 12095 12096 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12097 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12098 match(If cop cr); 12099 effect(USE labl); 12100 12101 ins_cost(300); 12102 format %{ "J$cop $labl" %} 12103 size(6); 12104 ins_encode %{ 12105 Label* L = $labl$$label; 12106 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12107 %} 12108 ins_pipe( pipe_jcc ); 12109 %} 12110 12111 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12112 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12113 match(CountedLoopEnd cop cr); 12114 effect(USE labl); 12115 12116 ins_cost(300); 12117 format %{ "J$cop $labl\t# Loop end" %} 12118 size(6); 12119 ins_encode %{ 12120 Label* L = $labl$$label; 12121 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12122 %} 12123 ins_pipe( pipe_jcc ); 12124 %} 12125 12126 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12127 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12128 match(CountedLoopEnd cop cmp); 12129 effect(USE labl); 12130 12131 ins_cost(300); 12132 format %{ "J$cop,u $labl\t# Loop end" %} 12133 size(6); 12134 ins_encode %{ 12135 Label* L = $labl$$label; 12136 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12137 %} 12138 ins_pipe( pipe_jcc ); 12139 %} 12140 12141 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12142 match(CountedLoopEnd cop cmp); 12143 effect(USE labl); 12144 12145 ins_cost(200); 12146 format %{ "J$cop,u $labl\t# Loop end" %} 12147 size(6); 12148 ins_encode %{ 12149 Label* L = $labl$$label; 12150 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12151 %} 12152 ins_pipe( pipe_jcc ); 12153 %} 12154 12155 // Jump Direct Conditional - using unsigned comparison 12156 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12157 match(If cop cmp); 12158 effect(USE labl); 12159 12160 ins_cost(300); 12161 format %{ "J$cop,u $labl" %} 12162 size(6); 12163 ins_encode %{ 12164 Label* L = $labl$$label; 12165 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12166 %} 12167 ins_pipe(pipe_jcc); 12168 %} 12169 12170 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12171 match(If cop cmp); 12172 effect(USE labl); 12173 12174 ins_cost(200); 12175 format %{ "J$cop,u $labl" %} 12176 size(6); 12177 ins_encode %{ 12178 Label* L = $labl$$label; 12179 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12180 %} 12181 ins_pipe(pipe_jcc); 12182 %} 12183 12184 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12185 match(If cop cmp); 12186 effect(USE labl); 12187 12188 ins_cost(200); 12189 format %{ $$template 12190 if ($cop$$cmpcode == Assembler::notEqual) { 12191 $$emit$$"JP,u $labl\n\t" 12192 $$emit$$"J$cop,u $labl" 12193 } else { 12194 $$emit$$"JP,u done\n\t" 12195 $$emit$$"J$cop,u $labl\n\t" 12196 $$emit$$"done:" 12197 } 12198 %} 12199 ins_encode %{ 12200 Label* l = $labl$$label; 12201 if ($cop$$cmpcode == Assembler::notEqual) { 12202 __ jcc(Assembler::parity, *l, false); 12203 __ jcc(Assembler::notEqual, *l, false); 12204 } else if ($cop$$cmpcode == Assembler::equal) { 12205 Label done; 12206 __ jccb(Assembler::parity, done); 12207 __ jcc(Assembler::equal, *l, false); 12208 __ bind(done); 12209 } else { 12210 ShouldNotReachHere(); 12211 } 12212 %} 12213 ins_pipe(pipe_jcc); 12214 %} 12215 12216 // ============================================================================ 12217 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12218 // array for an instance of the superklass. Set a hidden internal cache on a 12219 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12220 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12221 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12222 match(Set result (PartialSubtypeCheck sub super)); 12223 effect( KILL rcx, KILL cr ); 12224 12225 ins_cost(1100); // slightly larger than the next version 12226 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12227 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12228 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12229 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12230 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12231 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12232 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12233 "miss:\t" %} 12234 12235 opcode(0x1); // Force a XOR of EDI 12236 ins_encode( enc_PartialSubtypeCheck() ); 12237 ins_pipe( pipe_slow ); 12238 %} 12239 12240 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12241 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12242 effect( KILL rcx, KILL result ); 12243 12244 ins_cost(1000); 12245 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12246 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12247 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12248 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12249 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12250 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12251 "miss:\t" %} 12252 12253 opcode(0x0); // No need to XOR EDI 12254 ins_encode( enc_PartialSubtypeCheck() ); 12255 ins_pipe( pipe_slow ); 12256 %} 12257 12258 // ============================================================================ 12259 // Branch Instructions -- short offset versions 12260 // 12261 // These instructions are used to replace jumps of a long offset (the default 12262 // match) with jumps of a shorter offset. These instructions are all tagged 12263 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12264 // match rules in general matching. Instead, the ADLC generates a conversion 12265 // method in the MachNode which can be used to do in-place replacement of the 12266 // long variant with the shorter variant. The compiler will determine if a 12267 // branch can be taken by the is_short_branch_offset() predicate in the machine 12268 // specific code section of the file. 12269 12270 // Jump Direct - Label defines a relative address from JMP+1 12271 instruct jmpDir_short(label labl) %{ 12272 match(Goto); 12273 effect(USE labl); 12274 12275 ins_cost(300); 12276 format %{ "JMP,s $labl" %} 12277 size(2); 12278 ins_encode %{ 12279 Label* L = $labl$$label; 12280 __ jmpb(*L); 12281 %} 12282 ins_pipe( pipe_jmp ); 12283 ins_short_branch(1); 12284 %} 12285 12286 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12287 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12288 match(If cop cr); 12289 effect(USE labl); 12290 12291 ins_cost(300); 12292 format %{ "J$cop,s $labl" %} 12293 size(2); 12294 ins_encode %{ 12295 Label* L = $labl$$label; 12296 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12297 %} 12298 ins_pipe( pipe_jcc ); 12299 ins_short_branch(1); 12300 %} 12301 12302 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12303 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12304 match(CountedLoopEnd cop cr); 12305 effect(USE labl); 12306 12307 ins_cost(300); 12308 format %{ "J$cop,s $labl\t# Loop end" %} 12309 size(2); 12310 ins_encode %{ 12311 Label* L = $labl$$label; 12312 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12313 %} 12314 ins_pipe( pipe_jcc ); 12315 ins_short_branch(1); 12316 %} 12317 12318 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12319 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12320 match(CountedLoopEnd cop cmp); 12321 effect(USE labl); 12322 12323 ins_cost(300); 12324 format %{ "J$cop,us $labl\t# Loop end" %} 12325 size(2); 12326 ins_encode %{ 12327 Label* L = $labl$$label; 12328 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12329 %} 12330 ins_pipe( pipe_jcc ); 12331 ins_short_branch(1); 12332 %} 12333 12334 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12335 match(CountedLoopEnd cop cmp); 12336 effect(USE labl); 12337 12338 ins_cost(300); 12339 format %{ "J$cop,us $labl\t# Loop end" %} 12340 size(2); 12341 ins_encode %{ 12342 Label* L = $labl$$label; 12343 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12344 %} 12345 ins_pipe( pipe_jcc ); 12346 ins_short_branch(1); 12347 %} 12348 12349 // Jump Direct Conditional - using unsigned comparison 12350 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12351 match(If cop cmp); 12352 effect(USE labl); 12353 12354 ins_cost(300); 12355 format %{ "J$cop,us $labl" %} 12356 size(2); 12357 ins_encode %{ 12358 Label* L = $labl$$label; 12359 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12360 %} 12361 ins_pipe( pipe_jcc ); 12362 ins_short_branch(1); 12363 %} 12364 12365 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12366 match(If cop cmp); 12367 effect(USE labl); 12368 12369 ins_cost(300); 12370 format %{ "J$cop,us $labl" %} 12371 size(2); 12372 ins_encode %{ 12373 Label* L = $labl$$label; 12374 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12375 %} 12376 ins_pipe( pipe_jcc ); 12377 ins_short_branch(1); 12378 %} 12379 12380 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12381 match(If cop cmp); 12382 effect(USE labl); 12383 12384 ins_cost(300); 12385 format %{ $$template 12386 if ($cop$$cmpcode == Assembler::notEqual) { 12387 $$emit$$"JP,u,s $labl\n\t" 12388 $$emit$$"J$cop,u,s $labl" 12389 } else { 12390 $$emit$$"JP,u,s done\n\t" 12391 $$emit$$"J$cop,u,s $labl\n\t" 12392 $$emit$$"done:" 12393 } 12394 %} 12395 size(4); 12396 ins_encode %{ 12397 Label* l = $labl$$label; 12398 if ($cop$$cmpcode == Assembler::notEqual) { 12399 __ jccb(Assembler::parity, *l); 12400 __ jccb(Assembler::notEqual, *l); 12401 } else if ($cop$$cmpcode == Assembler::equal) { 12402 Label done; 12403 __ jccb(Assembler::parity, done); 12404 __ jccb(Assembler::equal, *l); 12405 __ bind(done); 12406 } else { 12407 ShouldNotReachHere(); 12408 } 12409 %} 12410 ins_pipe(pipe_jcc); 12411 ins_short_branch(1); 12412 %} 12413 12414 // ============================================================================ 12415 // Long Compare 12416 // 12417 // Currently we hold longs in 2 registers. Comparing such values efficiently 12418 // is tricky. The flavor of compare used depends on whether we are testing 12419 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12420 // The GE test is the negated LT test. The LE test can be had by commuting 12421 // the operands (yielding a GE test) and then negating; negate again for the 12422 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12423 // NE test is negated from that. 12424 12425 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12426 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12427 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12428 // are collapsed internally in the ADLC's dfa-gen code. The match for 12429 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12430 // foo match ends up with the wrong leaf. One fix is to not match both 12431 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12432 // both forms beat the trinary form of long-compare and both are very useful 12433 // on Intel which has so few registers. 12434 12435 // Manifest a CmpL result in an integer register. Very painful. 12436 // This is the test to avoid. 12437 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12438 match(Set dst (CmpL3 src1 src2)); 12439 effect( KILL flags ); 12440 ins_cost(1000); 12441 format %{ "XOR $dst,$dst\n\t" 12442 "CMP $src1.hi,$src2.hi\n\t" 12443 "JLT,s m_one\n\t" 12444 "JGT,s p_one\n\t" 12445 "CMP $src1.lo,$src2.lo\n\t" 12446 "JB,s m_one\n\t" 12447 "JEQ,s done\n" 12448 "p_one:\tINC $dst\n\t" 12449 "JMP,s done\n" 12450 "m_one:\tDEC $dst\n" 12451 "done:" %} 12452 ins_encode %{ 12453 Label p_one, m_one, done; 12454 __ xorptr($dst$$Register, $dst$$Register); 12455 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12456 __ jccb(Assembler::less, m_one); 12457 __ jccb(Assembler::greater, p_one); 12458 __ cmpl($src1$$Register, $src2$$Register); 12459 __ jccb(Assembler::below, m_one); 12460 __ jccb(Assembler::equal, done); 12461 __ bind(p_one); 12462 __ incrementl($dst$$Register); 12463 __ jmpb(done); 12464 __ bind(m_one); 12465 __ decrementl($dst$$Register); 12466 __ bind(done); 12467 %} 12468 ins_pipe( pipe_slow ); 12469 %} 12470 12471 //====== 12472 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12473 // compares. Can be used for LE or GT compares by reversing arguments. 12474 // NOT GOOD FOR EQ/NE tests. 12475 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12476 match( Set flags (CmpL src zero )); 12477 ins_cost(100); 12478 format %{ "TEST $src.hi,$src.hi" %} 12479 opcode(0x85); 12480 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12481 ins_pipe( ialu_cr_reg_reg ); 12482 %} 12483 12484 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12485 // compares. Can be used for LE or GT compares by reversing arguments. 12486 // NOT GOOD FOR EQ/NE tests. 12487 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12488 match( Set flags (CmpL src1 src2 )); 12489 effect( TEMP tmp ); 12490 ins_cost(300); 12491 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12492 "MOV $tmp,$src1.hi\n\t" 12493 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12494 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12495 ins_pipe( ialu_cr_reg_reg ); 12496 %} 12497 12498 // Long compares reg < zero/req OR reg >= zero/req. 12499 // Just a wrapper for a normal branch, plus the predicate test. 12500 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12501 match(If cmp flags); 12502 effect(USE labl); 12503 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12504 expand %{ 12505 jmpCon(cmp,flags,labl); // JLT or JGE... 12506 %} 12507 %} 12508 12509 // Compare 2 longs and CMOVE longs. 12510 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12511 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12512 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12513 ins_cost(400); 12514 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12515 "CMOV$cmp $dst.hi,$src.hi" %} 12516 opcode(0x0F,0x40); 12517 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12518 ins_pipe( pipe_cmov_reg_long ); 12519 %} 12520 12521 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12522 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12523 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12524 ins_cost(500); 12525 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12526 "CMOV$cmp $dst.hi,$src.hi" %} 12527 opcode(0x0F,0x40); 12528 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12529 ins_pipe( pipe_cmov_reg_long ); 12530 %} 12531 12532 // Compare 2 longs and CMOVE ints. 12533 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12534 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12535 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12536 ins_cost(200); 12537 format %{ "CMOV$cmp $dst,$src" %} 12538 opcode(0x0F,0x40); 12539 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12540 ins_pipe( pipe_cmov_reg ); 12541 %} 12542 12543 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12544 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12545 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12546 ins_cost(250); 12547 format %{ "CMOV$cmp $dst,$src" %} 12548 opcode(0x0F,0x40); 12549 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12550 ins_pipe( pipe_cmov_mem ); 12551 %} 12552 12553 // Compare 2 longs and CMOVE ints. 12554 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12555 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12556 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12557 ins_cost(200); 12558 format %{ "CMOV$cmp $dst,$src" %} 12559 opcode(0x0F,0x40); 12560 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12561 ins_pipe( pipe_cmov_reg ); 12562 %} 12563 12564 // Compare 2 longs and CMOVE doubles 12565 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12566 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12567 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12568 ins_cost(200); 12569 expand %{ 12570 fcmovDPR_regS(cmp,flags,dst,src); 12571 %} 12572 %} 12573 12574 // Compare 2 longs and CMOVE doubles 12575 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12576 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12577 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12578 ins_cost(200); 12579 expand %{ 12580 fcmovD_regS(cmp,flags,dst,src); 12581 %} 12582 %} 12583 12584 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12585 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12586 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12587 ins_cost(200); 12588 expand %{ 12589 fcmovFPR_regS(cmp,flags,dst,src); 12590 %} 12591 %} 12592 12593 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12594 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12595 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12596 ins_cost(200); 12597 expand %{ 12598 fcmovF_regS(cmp,flags,dst,src); 12599 %} 12600 %} 12601 12602 //====== 12603 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12604 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12605 match( Set flags (CmpL src zero )); 12606 effect(TEMP tmp); 12607 ins_cost(200); 12608 format %{ "MOV $tmp,$src.lo\n\t" 12609 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12610 ins_encode( long_cmp_flags0( src, tmp ) ); 12611 ins_pipe( ialu_reg_reg_long ); 12612 %} 12613 12614 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12615 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12616 match( Set flags (CmpL src1 src2 )); 12617 ins_cost(200+300); 12618 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12619 "JNE,s skip\n\t" 12620 "CMP $src1.hi,$src2.hi\n\t" 12621 "skip:\t" %} 12622 ins_encode( long_cmp_flags1( src1, src2 ) ); 12623 ins_pipe( ialu_cr_reg_reg ); 12624 %} 12625 12626 // Long compare reg == zero/reg OR reg != zero/reg 12627 // Just a wrapper for a normal branch, plus the predicate test. 12628 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12629 match(If cmp flags); 12630 effect(USE labl); 12631 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12632 expand %{ 12633 jmpCon(cmp,flags,labl); // JEQ or JNE... 12634 %} 12635 %} 12636 12637 // Compare 2 longs and CMOVE longs. 12638 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12639 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12640 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12641 ins_cost(400); 12642 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12643 "CMOV$cmp $dst.hi,$src.hi" %} 12644 opcode(0x0F,0x40); 12645 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12646 ins_pipe( pipe_cmov_reg_long ); 12647 %} 12648 12649 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12650 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12651 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12652 ins_cost(500); 12653 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12654 "CMOV$cmp $dst.hi,$src.hi" %} 12655 opcode(0x0F,0x40); 12656 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12657 ins_pipe( pipe_cmov_reg_long ); 12658 %} 12659 12660 // Compare 2 longs and CMOVE ints. 12661 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12662 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12663 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12664 ins_cost(200); 12665 format %{ "CMOV$cmp $dst,$src" %} 12666 opcode(0x0F,0x40); 12667 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12668 ins_pipe( pipe_cmov_reg ); 12669 %} 12670 12671 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12672 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12673 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12674 ins_cost(250); 12675 format %{ "CMOV$cmp $dst,$src" %} 12676 opcode(0x0F,0x40); 12677 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12678 ins_pipe( pipe_cmov_mem ); 12679 %} 12680 12681 // Compare 2 longs and CMOVE ints. 12682 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12683 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12684 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12685 ins_cost(200); 12686 format %{ "CMOV$cmp $dst,$src" %} 12687 opcode(0x0F,0x40); 12688 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12689 ins_pipe( pipe_cmov_reg ); 12690 %} 12691 12692 // Compare 2 longs and CMOVE doubles 12693 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12694 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12695 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12696 ins_cost(200); 12697 expand %{ 12698 fcmovDPR_regS(cmp,flags,dst,src); 12699 %} 12700 %} 12701 12702 // Compare 2 longs and CMOVE doubles 12703 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12704 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12705 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12706 ins_cost(200); 12707 expand %{ 12708 fcmovD_regS(cmp,flags,dst,src); 12709 %} 12710 %} 12711 12712 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12713 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12714 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12715 ins_cost(200); 12716 expand %{ 12717 fcmovFPR_regS(cmp,flags,dst,src); 12718 %} 12719 %} 12720 12721 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12722 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12723 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12724 ins_cost(200); 12725 expand %{ 12726 fcmovF_regS(cmp,flags,dst,src); 12727 %} 12728 %} 12729 12730 //====== 12731 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12732 // Same as cmpL_reg_flags_LEGT except must negate src 12733 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12734 match( Set flags (CmpL src zero )); 12735 effect( TEMP tmp ); 12736 ins_cost(300); 12737 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12738 "CMP $tmp,$src.lo\n\t" 12739 "SBB $tmp,$src.hi\n\t" %} 12740 ins_encode( long_cmp_flags3(src, tmp) ); 12741 ins_pipe( ialu_reg_reg_long ); 12742 %} 12743 12744 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12745 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12746 // requires a commuted test to get the same result. 12747 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12748 match( Set flags (CmpL src1 src2 )); 12749 effect( TEMP tmp ); 12750 ins_cost(300); 12751 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12752 "MOV $tmp,$src2.hi\n\t" 12753 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12754 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12755 ins_pipe( ialu_cr_reg_reg ); 12756 %} 12757 12758 // Long compares reg < zero/req OR reg >= zero/req. 12759 // Just a wrapper for a normal branch, plus the predicate test 12760 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12761 match(If cmp flags); 12762 effect(USE labl); 12763 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12764 ins_cost(300); 12765 expand %{ 12766 jmpCon(cmp,flags,labl); // JGT or JLE... 12767 %} 12768 %} 12769 12770 // Compare 2 longs and CMOVE longs. 12771 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12772 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12773 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12774 ins_cost(400); 12775 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12776 "CMOV$cmp $dst.hi,$src.hi" %} 12777 opcode(0x0F,0x40); 12778 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12779 ins_pipe( pipe_cmov_reg_long ); 12780 %} 12781 12782 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12783 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12784 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12785 ins_cost(500); 12786 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12787 "CMOV$cmp $dst.hi,$src.hi+4" %} 12788 opcode(0x0F,0x40); 12789 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12790 ins_pipe( pipe_cmov_reg_long ); 12791 %} 12792 12793 // Compare 2 longs and CMOVE ints. 12794 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12795 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12796 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12797 ins_cost(200); 12798 format %{ "CMOV$cmp $dst,$src" %} 12799 opcode(0x0F,0x40); 12800 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12801 ins_pipe( pipe_cmov_reg ); 12802 %} 12803 12804 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12805 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12806 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12807 ins_cost(250); 12808 format %{ "CMOV$cmp $dst,$src" %} 12809 opcode(0x0F,0x40); 12810 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12811 ins_pipe( pipe_cmov_mem ); 12812 %} 12813 12814 // Compare 2 longs and CMOVE ptrs. 12815 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12816 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12817 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12818 ins_cost(200); 12819 format %{ "CMOV$cmp $dst,$src" %} 12820 opcode(0x0F,0x40); 12821 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12822 ins_pipe( pipe_cmov_reg ); 12823 %} 12824 12825 // Compare 2 longs and CMOVE doubles 12826 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12827 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12828 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12829 ins_cost(200); 12830 expand %{ 12831 fcmovDPR_regS(cmp,flags,dst,src); 12832 %} 12833 %} 12834 12835 // Compare 2 longs and CMOVE doubles 12836 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12837 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12838 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12839 ins_cost(200); 12840 expand %{ 12841 fcmovD_regS(cmp,flags,dst,src); 12842 %} 12843 %} 12844 12845 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12846 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12847 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12848 ins_cost(200); 12849 expand %{ 12850 fcmovFPR_regS(cmp,flags,dst,src); 12851 %} 12852 %} 12853 12854 12855 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12856 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12857 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12858 ins_cost(200); 12859 expand %{ 12860 fcmovF_regS(cmp,flags,dst,src); 12861 %} 12862 %} 12863 12864 12865 // ============================================================================ 12866 // Procedure Call/Return Instructions 12867 // Call Java Static Instruction 12868 // Note: If this code changes, the corresponding ret_addr_offset() and 12869 // compute_padding() functions will have to be adjusted. 12870 instruct CallStaticJavaDirect(method meth) %{ 12871 match(CallStaticJava); 12872 effect(USE meth); 12873 12874 ins_cost(300); 12875 format %{ "CALL,static " %} 12876 opcode(0xE8); /* E8 cd */ 12877 ins_encode( pre_call_resets, 12878 Java_Static_Call( meth ), 12879 call_epilog, 12880 post_call_FPU ); 12881 ins_pipe( pipe_slow ); 12882 ins_alignment(4); 12883 %} 12884 12885 // Call Java Dynamic Instruction 12886 // Note: If this code changes, the corresponding ret_addr_offset() and 12887 // compute_padding() functions will have to be adjusted. 12888 instruct CallDynamicJavaDirect(method meth) %{ 12889 match(CallDynamicJava); 12890 effect(USE meth); 12891 12892 ins_cost(300); 12893 format %{ "MOV EAX,(oop)-1\n\t" 12894 "CALL,dynamic" %} 12895 opcode(0xE8); /* E8 cd */ 12896 ins_encode( pre_call_resets, 12897 Java_Dynamic_Call( meth ), 12898 call_epilog, 12899 post_call_FPU ); 12900 ins_pipe( pipe_slow ); 12901 ins_alignment(4); 12902 %} 12903 12904 // Call Runtime Instruction 12905 instruct CallRuntimeDirect(method meth) %{ 12906 match(CallRuntime ); 12907 effect(USE meth); 12908 12909 ins_cost(300); 12910 format %{ "CALL,runtime " %} 12911 opcode(0xE8); /* E8 cd */ 12912 // Use FFREEs to clear entries in float stack 12913 ins_encode( pre_call_resets, 12914 FFree_Float_Stack_All, 12915 Java_To_Runtime( meth ), 12916 post_call_FPU ); 12917 ins_pipe( pipe_slow ); 12918 %} 12919 12920 // Call runtime without safepoint 12921 instruct CallLeafDirect(method meth) %{ 12922 match(CallLeaf); 12923 effect(USE meth); 12924 12925 ins_cost(300); 12926 format %{ "CALL_LEAF,runtime " %} 12927 opcode(0xE8); /* E8 cd */ 12928 ins_encode( pre_call_resets, 12929 FFree_Float_Stack_All, 12930 Java_To_Runtime( meth ), 12931 Verify_FPU_For_Leaf, post_call_FPU ); 12932 ins_pipe( pipe_slow ); 12933 %} 12934 12935 instruct CallLeafNoFPDirect(method meth) %{ 12936 match(CallLeafNoFP); 12937 effect(USE meth); 12938 12939 ins_cost(300); 12940 format %{ "CALL_LEAF_NOFP,runtime " %} 12941 opcode(0xE8); /* E8 cd */ 12942 ins_encode(Java_To_Runtime(meth)); 12943 ins_pipe( pipe_slow ); 12944 %} 12945 12946 12947 // Return Instruction 12948 // Remove the return address & jump to it. 12949 instruct Ret() %{ 12950 match(Return); 12951 format %{ "RET" %} 12952 opcode(0xC3); 12953 ins_encode(OpcP); 12954 ins_pipe( pipe_jmp ); 12955 %} 12956 12957 // Tail Call; Jump from runtime stub to Java code. 12958 // Also known as an 'interprocedural jump'. 12959 // Target of jump will eventually return to caller. 12960 // TailJump below removes the return address. 12961 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12962 match(TailCall jump_target method_oop ); 12963 ins_cost(300); 12964 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12965 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12966 ins_encode( OpcP, RegOpc(jump_target) ); 12967 ins_pipe( pipe_jmp ); 12968 %} 12969 12970 12971 // Tail Jump; remove the return address; jump to target. 12972 // TailCall above leaves the return address around. 12973 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12974 match( TailJump jump_target ex_oop ); 12975 ins_cost(300); 12976 format %{ "POP EDX\t# pop return address into dummy\n\t" 12977 "JMP $jump_target " %} 12978 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12979 ins_encode( enc_pop_rdx, 12980 OpcP, RegOpc(jump_target) ); 12981 ins_pipe( pipe_jmp ); 12982 %} 12983 12984 // Create exception oop: created by stack-crawling runtime code. 12985 // Created exception is now available to this handler, and is setup 12986 // just prior to jumping to this handler. No code emitted. 12987 instruct CreateException( eAXRegP ex_oop ) 12988 %{ 12989 match(Set ex_oop (CreateEx)); 12990 12991 size(0); 12992 // use the following format syntax 12993 format %{ "# exception oop is in EAX; no code emitted" %} 12994 ins_encode(); 12995 ins_pipe( empty ); 12996 %} 12997 12998 12999 // Rethrow exception: 13000 // The exception oop will come in the first argument position. 13001 // Then JUMP (not call) to the rethrow stub code. 13002 instruct RethrowException() 13003 %{ 13004 match(Rethrow); 13005 13006 // use the following format syntax 13007 format %{ "JMP rethrow_stub" %} 13008 ins_encode(enc_rethrow); 13009 ins_pipe( pipe_jmp ); 13010 %} 13011 13012 // inlined locking and unlocking 13013 13014 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13015 predicate(Compile::current()->use_rtm()); 13016 match(Set cr (FastLock object box)); 13017 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13018 ins_cost(300); 13019 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13020 ins_encode %{ 13021 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13022 $scr$$Register, $cx1$$Register, $cx2$$Register, 13023 _counters, _rtm_counters, _stack_rtm_counters, 13024 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13025 true, ra_->C->profile_rtm()); 13026 %} 13027 ins_pipe(pipe_slow); 13028 %} 13029 13030 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13031 predicate(!Compile::current()->use_rtm()); 13032 match(Set cr (FastLock object box)); 13033 effect(TEMP tmp, TEMP scr, USE_KILL box); 13034 ins_cost(300); 13035 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13036 ins_encode %{ 13037 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13038 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13039 %} 13040 ins_pipe(pipe_slow); 13041 %} 13042 13043 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13044 match(Set cr (FastUnlock object box)); 13045 effect(TEMP tmp, USE_KILL box); 13046 ins_cost(300); 13047 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13048 ins_encode %{ 13049 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13050 %} 13051 ins_pipe(pipe_slow); 13052 %} 13053 13054 13055 13056 // ============================================================================ 13057 // Safepoint Instruction 13058 instruct safePoint_poll(eFlagsReg cr) %{ 13059 match(SafePoint); 13060 effect(KILL cr); 13061 13062 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13063 // On SPARC that might be acceptable as we can generate the address with 13064 // just a sethi, saving an or. By polling at offset 0 we can end up 13065 // putting additional pressure on the index-0 in the D$. Because of 13066 // alignment (just like the situation at hand) the lower indices tend 13067 // to see more traffic. It'd be better to change the polling address 13068 // to offset 0 of the last $line in the polling page. 13069 13070 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13071 ins_cost(125); 13072 size(6) ; 13073 ins_encode( Safepoint_Poll() ); 13074 ins_pipe( ialu_reg_mem ); 13075 %} 13076 13077 13078 // ============================================================================ 13079 // This name is KNOWN by the ADLC and cannot be changed. 13080 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13081 // for this guy. 13082 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13083 match(Set dst (ThreadLocal)); 13084 effect(DEF dst, KILL cr); 13085 13086 format %{ "MOV $dst, Thread::current()" %} 13087 ins_encode %{ 13088 Register dstReg = as_Register($dst$$reg); 13089 __ get_thread(dstReg); 13090 %} 13091 ins_pipe( ialu_reg_fat ); 13092 %} 13093 13094 13095 13096 //----------PEEPHOLE RULES----------------------------------------------------- 13097 // These must follow all instruction definitions as they use the names 13098 // defined in the instructions definitions. 13099 // 13100 // peepmatch ( root_instr_name [preceding_instruction]* ); 13101 // 13102 // peepconstraint %{ 13103 // (instruction_number.operand_name relational_op instruction_number.operand_name 13104 // [, ...] ); 13105 // // instruction numbers are zero-based using left to right order in peepmatch 13106 // 13107 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13108 // // provide an instruction_number.operand_name for each operand that appears 13109 // // in the replacement instruction's match rule 13110 // 13111 // ---------VM FLAGS--------------------------------------------------------- 13112 // 13113 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13114 // 13115 // Each peephole rule is given an identifying number starting with zero and 13116 // increasing by one in the order seen by the parser. An individual peephole 13117 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13118 // on the command-line. 13119 // 13120 // ---------CURRENT LIMITATIONS---------------------------------------------- 13121 // 13122 // Only match adjacent instructions in same basic block 13123 // Only equality constraints 13124 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13125 // Only one replacement instruction 13126 // 13127 // ---------EXAMPLE---------------------------------------------------------- 13128 // 13129 // // pertinent parts of existing instructions in architecture description 13130 // instruct movI(rRegI dst, rRegI src) %{ 13131 // match(Set dst (CopyI src)); 13132 // %} 13133 // 13134 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13135 // match(Set dst (AddI dst src)); 13136 // effect(KILL cr); 13137 // %} 13138 // 13139 // // Change (inc mov) to lea 13140 // peephole %{ 13141 // // increment preceeded by register-register move 13142 // peepmatch ( incI_eReg movI ); 13143 // // require that the destination register of the increment 13144 // // match the destination register of the move 13145 // peepconstraint ( 0.dst == 1.dst ); 13146 // // construct a replacement instruction that sets 13147 // // the destination to ( move's source register + one ) 13148 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13149 // %} 13150 // 13151 // Implementation no longer uses movX instructions since 13152 // machine-independent system no longer uses CopyX nodes. 13153 // 13154 // peephole %{ 13155 // peepmatch ( incI_eReg movI ); 13156 // peepconstraint ( 0.dst == 1.dst ); 13157 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13158 // %} 13159 // 13160 // peephole %{ 13161 // peepmatch ( decI_eReg movI ); 13162 // peepconstraint ( 0.dst == 1.dst ); 13163 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13164 // %} 13165 // 13166 // peephole %{ 13167 // peepmatch ( addI_eReg_imm movI ); 13168 // peepconstraint ( 0.dst == 1.dst ); 13169 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13170 // %} 13171 // 13172 // peephole %{ 13173 // peepmatch ( addP_eReg_imm movP ); 13174 // peepconstraint ( 0.dst == 1.dst ); 13175 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13176 // %} 13177 13178 // // Change load of spilled value to only a spill 13179 // instruct storeI(memory mem, rRegI src) %{ 13180 // match(Set mem (StoreI mem src)); 13181 // %} 13182 // 13183 // instruct loadI(rRegI dst, memory mem) %{ 13184 // match(Set dst (LoadI mem)); 13185 // %} 13186 // 13187 peephole %{ 13188 peepmatch ( loadI storeI ); 13189 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13190 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13191 %} 13192 13193 //----------SMARTSPILL RULES--------------------------------------------------- 13194 // These must follow all instruction definitions as they use the names 13195 // defined in the instructions definitions.